Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/kernel/device/metal/compat.h69
-rw-r--r--intern/cycles/util/types.h12
-rw-r--r--intern/cycles/util/types_float2.h3
-rw-r--r--intern/cycles/util/types_float2_impl.h6
-rw-r--r--intern/cycles/util/types_float3.h5
-rw-r--r--intern/cycles/util/types_float3_impl.h41
-rw-r--r--intern/cycles/util/types_float4.h7
-rw-r--r--intern/cycles/util/types_float4_impl.h40
-rw-r--r--intern/cycles/util/types_int3.h5
-rw-r--r--intern/cycles/util/types_int3_impl.h41
-rw-r--r--intern/cycles/util/types_int4.h9
-rw-r--r--intern/cycles/util/types_int4_impl.h57
12 files changed, 159 insertions, 136 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 80ee8ef5b57..b20cfca9a9c 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -189,35 +189,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
} volume_write_lambda_pass{kg, this, state};
/* make_type definitions with Metal style element initializers */
-#ifdef make_float2
-# undef make_float2
-#endif
-#ifdef make_float3
-# undef make_float3
-#endif
-#ifdef make_float4
-# undef make_float4
-#endif
-#ifdef make_int2
-# undef make_int2
-#endif
-#ifdef make_int3
-# undef make_int3
-#endif
-#ifdef make_int4
-# undef make_int4
-#endif
-#ifdef make_uchar4
-# undef make_uchar4
-#endif
-
-#define make_float2(x, y) float2(x, y)
-#define make_float3(x, y, z) float3(x, y, z)
-#define make_float4(x, y, z, w) float4(x, y, z, w)
-#define make_int2(x, y) int2(x, y)
-#define make_int3(x, y, z) int3(x, y, z)
-#define make_int4(x, y, z, w) int4(x, y, z, w)
-#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+ccl_device_forceinline float2 make_float2(const float x, const float y)
+{
+ return float2(x, y);
+}
+
+ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
+{
+ return float3(x, y, z);
+}
+
+ccl_device_forceinline float4 make_float4(const float x,
+ const float y,
+ const float z,
+ const float w)
+{
+ return float4(x, y, z, w);
+}
+
+ccl_device_forceinline int2 make_int2(const int x, const int y)
+{
+ return int2(x, y);
+}
+
+ccl_device_forceinline int3 make_int3(const int x, const int y, const int z)
+{
+ return int3(x, y, z);
+}
+
+ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
+{
+ return int4(x, y, z, w);
+}
+
+ccl_device_forceinline uchar4 make_uchar4(const uchar x,
+ const uchar y,
+ const uchar z,
+ const uchar w)
+{
+ return uchar4(x, y, z, w);
+}
/* Math functions */
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
index 26031d9e0fd..d0d8c2941d7 100644
--- a/intern/cycles/util/types.h
+++ b/intern/cycles/util/types.h
@@ -71,6 +71,18 @@ ccl_device_inline bool is_power_of_two(size_t x)
CCL_NAMESPACE_END
+/* Device side printf only tested on CUDA, may work on more GPU devices. */
+#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__)
+# define __KERNEL_PRINTF__
+#endif
+
+ccl_device_inline void print_float(ccl_private const char *label, const float a)
+{
+#ifdef __KERNEL_PRINTF__
+ printf("%s: %.8f\n", label, (double)a);
+#endif
+}
+
/* Most GPU APIs matching native vector types, so we only need to implement them for
* CPU and oneAPI. */
#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)
diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h
index f37aa1b4ad2..ea510ef832c 100644
--- a/intern/cycles/util/types_float2.h
+++ b/intern/cycles/util/types_float2.h
@@ -20,7 +20,8 @@ struct float2 {
};
ccl_device_inline float2 make_float2(float x, float y);
-ccl_device_inline void print_float2(const char *label, const float2 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h
index 9d1820fe17d..7ba7dee2e3a 100644
--- a/intern/cycles/util/types_float2_impl.h
+++ b/intern/cycles/util/types_float2_impl.h
@@ -31,11 +31,13 @@ ccl_device_inline float2 make_float2(float x, float y)
float2 a = {x, y};
return a;
}
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
-ccl_device_inline void print_float2(const char *label, const float2 &a)
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index 4e43e928657..87c6b1d3654 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -47,11 +47,12 @@ struct ccl_try_align(16) float3
# endif
};
-ccl_device_inline float3 make_float3(float f);
ccl_device_inline float3 make_float3(float x, float y, float z);
-ccl_device_inline void print_float3(const char *label, const float3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline float3 make_float3(float f);
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a);
+
/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
* CPU SIMD instructions can be used. */
#if defined(__KERNEL_METAL__)
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index cbd3f76dae4..da76ab2ab2a 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -56,38 +56,35 @@ __forceinline float &float3::operator[](int i)
}
# endif
-ccl_device_inline float3 make_float3(float f)
+ccl_device_inline float3 make_float3(float x, float y, float z)
{
-# ifdef __KERNEL_GPU__
- float3 a = {f, f, f};
+# if defined(__KERNEL_GPU__)
+ return {x, y, z};
+# elif defined(__KERNEL_SSE__)
+ return float3(_mm_set_ps(0.0f, z, y, x));
# else
-# ifdef __KERNEL_SSE__
- float3 a(_mm_set1_ps(f));
-# else
- float3 a = {f, f, f, f};
-# endif
+ return {x, y, z, 0.0f};
# endif
- return a;
}
-ccl_device_inline float3 make_float3(float x, float y, float z)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float3 make_float3(float f)
{
-# ifdef __KERNEL_GPU__
- float3 a = {x, y, z};
-# else
-# ifdef __KERNEL_SSE__
- float3 a(_mm_set_ps(0.0f, z, y, x));
-# else
- float3 a = {x, y, z, 0.0f};
-# endif
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_float3(f, f, f);
+#elif defined(__KERNEL_SSE__)
+ return float3(_mm_set1_ps(f));
+#else
+ return {f, f, f, f};
+#endif
}
-ccl_device_inline void print_float3(const char *label, const float3 &a)
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h
index 6b301c47362..a347cfce9a1 100644
--- a/intern/cycles/util/types_float4.h
+++ b/intern/cycles/util/types_float4.h
@@ -40,10 +40,11 @@ struct ccl_try_align(16) float4
# endif
};
-ccl_device_inline float4 make_float4(float f);
ccl_device_inline float4 make_float4(float x, float y, float z, float w);
-ccl_device_inline float4 make_float4(const int4 &i);
-ccl_device_inline void print_float4(const char *label, const float4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline float4 make_float4(float f);
+ccl_device_inline float4 make_float4(const int4 i);
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h
index 77b4fbff788..420d9316926 100644
--- a/intern/cycles/util/types_float4_impl.h
+++ b/intern/cycles/util/types_float4_impl.h
@@ -52,40 +52,40 @@ __forceinline float &float4::operator[](int i)
}
# endif
-ccl_device_inline float4 make_float4(float f)
+ccl_device_inline float4 make_float4(float x, float y, float z, float w)
{
# ifdef __KERNEL_SSE__
- float4 a(_mm_set1_ps(f));
+ return float4(_mm_set_ps(w, z, y, x));
# else
- float4 a = {f, f, f, f};
+ return {x, y, z, w};
# endif
- return a;
}
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float4 make_float4(float f)
{
-# ifdef __KERNEL_SSE__
- float4 a(_mm_set_ps(w, z, y, x));
-# else
- float4 a = {x, y, z, w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return float4(_mm_set1_ps(f));
+#else
+ return make_float4(f, f, f, f);
+#endif
}
-ccl_device_inline float4 make_float4(const int4 &i)
+ccl_device_inline float4 make_float4(const int4 i)
{
-# ifdef __KERNEL_SSE__
- float4 a(_mm_cvtepi32_ps(i.m128));
-# else
- float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return float4(_mm_cvtepi32_ps(i.m128));
+#else
+ return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w);
+#endif
}
-ccl_device_inline void print_float4(const char *label, const float4 &a)
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h
index 3196b158ee9..e059ddd3660 100644
--- a/intern/cycles/util/types_int3.h
+++ b/intern/cycles/util/types_int3.h
@@ -44,9 +44,10 @@ struct ccl_try_align(16) int3
# endif
};
-ccl_device_inline int3 make_int3(int i);
ccl_device_inline int3 make_int3(int x, int y, int z);
-ccl_device_inline void print_int3(const char *label, const int3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline int3 make_int3(int i);
+ccl_device_inline void print_int3(ccl_private const char *label, const int3 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h
index abc0f4e4309..830dfa3c658 100644
--- a/intern/cycles/util/types_int3_impl.h
+++ b/intern/cycles/util/types_int3_impl.h
@@ -56,38 +56,35 @@ __forceinline int &int3::operator[](int i)
}
# endif
-ccl_device_inline int3 make_int3(int i)
+ccl_device_inline int3 make_int3(int x, int y, int z)
{
-# ifdef __KERNEL_GPU__
- int3 a = {i, i, i};
+# if defined(__KERNEL_GPU__)
+ return {x, y, z};
+# elif defined(__KERNEL_SSE__)
+ return int3(_mm_set_epi32(0, z, y, x));
# else
-# ifdef __KERNEL_SSE__
- int3 a(_mm_set1_epi32(i));
-# else
- int3 a = {i, i, i, i};
-# endif
+ return {x, y, z, 0};
# endif
- return a;
}
-ccl_device_inline int3 make_int3(int x, int y, int z)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline int3 make_int3(int i)
{
-# ifdef __KERNEL_GPU__
- int3 a = {x, y, z};
-# else
-# ifdef __KERNEL_SSE__
- int3 a(_mm_set_epi32(0, z, y, x));
-# else
- int3 a = {x, y, z, 0};
-# endif
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_int3(i, i, i);
+#elif defined(__KERNEL_SSE__)
+ return int3(_mm_set1_epi32(i));
+#else
+ return {i, i, i, i};
+#endif
}
-ccl_device_inline void print_int3(const char *label, const int3 &a)
+ccl_device_inline void print_int3(ccl_private const char *label, const int3 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h
index 0ac53ffc09c..1a13c03e60e 100644
--- a/intern/cycles/util/types_int4.h
+++ b/intern/cycles/util/types_int4.h
@@ -42,11 +42,12 @@ struct ccl_try_align(16) int4
# endif
};
-ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(int x, int y, int z, int w);
-ccl_device_inline int4 make_int4(const float3 &f);
-ccl_device_inline int4 make_int4(const float4 &f);
-ccl_device_inline void print_int4(const char *label, const int4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline int4 make_int4(int i);
+ccl_device_inline int4 make_int4(const float3 f);
+ccl_device_inline int4 make_int4(const float4 f);
+ccl_device_inline void print_int4(ccl_private const char *label, const int4 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h
index 2aab29df941..067794e67b4 100644
--- a/intern/cycles/util/types_int4_impl.h
+++ b/intern/cycles/util/types_int4_impl.h
@@ -56,52 +56,51 @@ __forceinline int &int4::operator[](int i)
}
# endif
-ccl_device_inline int4 make_int4(int i)
+ccl_device_inline int4 make_int4(int x, int y, int z, int w)
{
# ifdef __KERNEL_SSE__
- int4 a(_mm_set1_epi32(i));
+ return int4(_mm_set_epi32(w, z, y, x));
# else
- int4 a = {i, i, i, i};
+ return {x, y, z, w};
# endif
- return a;
}
-ccl_device_inline int4 make_int4(int x, int y, int z, int w)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline int4 make_int4(int i)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_set_epi32(w, z, y, x));
-# else
- int4 a = {x, y, z, w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return int4(_mm_set1_epi32(i));
+#else
+ return make_int4(i, i, i, i);
+#endif
}
-ccl_device_inline int4 make_int4(const float3 &f)
+ccl_device_inline int4 make_int4(const float3 f)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_cvtps_epi32(f.m128));
-# elif defined(__KERNEL_ONEAPI__)
- int4 a = {(int)f.x, (int)f.y, (int)f.z, 0};
-# else
- int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_int4((int)f.x, (int)f.y, (int)f.z, 0);
+#elif defined(__KERNEL_SSE__)
+ return int4(_mm_cvtps_epi32(f.m128));
+#else
+ return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
+#endif
}
-ccl_device_inline int4 make_int4(const float4 &f)
+ccl_device_inline int4 make_int4(const float4 f)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_cvtps_epi32(f.m128));
-# else
- int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return int4(_mm_cvtps_epi32(f.m128));
+#else
+ return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
+#endif
}
-ccl_device_inline void print_int4(const char *label, const int4 &a)
+ccl_device_inline void print_int4(ccl_private const char *label, const int4 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END