diff options
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 69 | ||||
-rw-r--r-- | intern/cycles/util/types.h | 12 | ||||
-rw-r--r-- | intern/cycles/util/types_float2.h | 3 | ||||
-rw-r--r-- | intern/cycles/util/types_float2_impl.h | 6 | ||||
-rw-r--r-- | intern/cycles/util/types_float3.h | 5 | ||||
-rw-r--r-- | intern/cycles/util/types_float3_impl.h | 41 | ||||
-rw-r--r-- | intern/cycles/util/types_float4.h | 7 | ||||
-rw-r--r-- | intern/cycles/util/types_float4_impl.h | 40 | ||||
-rw-r--r-- | intern/cycles/util/types_int3.h | 5 | ||||
-rw-r--r-- | intern/cycles/util/types_int3_impl.h | 41 | ||||
-rw-r--r-- | intern/cycles/util/types_int4.h | 9 | ||||
-rw-r--r-- | intern/cycles/util/types_int4_impl.h | 57 |
12 files changed, 159 insertions, 136 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 80ee8ef5b57..b20cfca9a9c 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -189,35 +189,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ } volume_write_lambda_pass{kg, this, state}; /* make_type definitions with Metal style element initializers */ -#ifdef make_float2 -# undef make_float2 -#endif -#ifdef make_float3 -# undef make_float3 -#endif -#ifdef make_float4 -# undef make_float4 -#endif -#ifdef make_int2 -# undef make_int2 -#endif -#ifdef make_int3 -# undef make_int3 -#endif -#ifdef make_int4 -# undef make_int4 -#endif -#ifdef make_uchar4 -# undef make_uchar4 -#endif - -#define make_float2(x, y) float2(x, y) -#define make_float3(x, y, z) float3(x, y, z) -#define make_float4(x, y, z, w) float4(x, y, z, w) -#define make_int2(x, y) int2(x, y) -#define make_int3(x, y, z) int3(x, y, z) -#define make_int4(x, y, z, w) int4(x, y, z, w) -#define make_uchar4(x, y, z, w) uchar4(x, y, z, w) +ccl_device_forceinline float2 make_float2(const float x, const float y) +{ + return float2(x, y); +} + +ccl_device_forceinline float3 make_float3(const float x, const float y, const float z) +{ + return float3(x, y, z); +} + +ccl_device_forceinline float4 make_float4(const float x, + const float y, + const float z, + const float w) +{ + return float4(x, y, z, w); +} + +ccl_device_forceinline int2 make_int2(const int x, const int y) +{ + return int2(x, y); +} + +ccl_device_forceinline int3 make_int3(const int x, const int y, const int z) +{ + return int3(x, y, z); +} + +ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w) +{ + return int4(x, y, z, w); +} + +ccl_device_forceinline uchar4 make_uchar4(const uchar x, + const uchar y, + const uchar z, + const uchar w) +{ + return uchar4(x, y, z, w); +} /* Math functions */ diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h index 26031d9e0fd..d0d8c2941d7 100644 --- a/intern/cycles/util/types.h +++ b/intern/cycles/util/types.h @@ -71,6 +71,18 @@ ccl_device_inline bool is_power_of_two(size_t x) CCL_NAMESPACE_END +/* Device side printf only tested on CUDA, may work on more GPU devices. */ +#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__) +# define __KERNEL_PRINTF__ +#endif + +ccl_device_inline void print_float(ccl_private const char *label, const float a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f\n", label, (double)a); +#endif +} + /* Most GPU APIs matching native vector types, so we only need to implement them for * CPU and oneAPI. */ #if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__) diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h index f37aa1b4ad2..ea510ef832c 100644 --- a/intern/cycles/util/types_float2.h +++ b/intern/cycles/util/types_float2.h @@ -20,7 +20,8 @@ struct float2 { }; ccl_device_inline float2 make_float2(float x, float y); -ccl_device_inline void print_float2(const char *label, const float2 &a); #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ +ccl_device_inline void print_float2(ccl_private const char *label, const float2 a); + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h index 9d1820fe17d..7ba7dee2e3a 100644 --- a/intern/cycles/util/types_float2_impl.h +++ b/intern/cycles/util/types_float2_impl.h @@ -31,11 +31,13 @@ ccl_device_inline float2 make_float2(float x, float y) float2 a = {x, y}; return a; } +#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline void print_float2(const char *label, const float2 &a) +ccl_device_inline void print_float2(ccl_private const char *label, const float2 a) { +#ifdef __KERNEL_PRINTF__ printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); +#endif } -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h index 4e43e928657..87c6b1d3654 100644 --- a/intern/cycles/util/types_float3.h +++ b/intern/cycles/util/types_float3.h @@ -47,11 +47,12 @@ struct ccl_try_align(16) float3 # endif }; -ccl_device_inline float3 make_float3(float f); ccl_device_inline float3 make_float3(float x, float y, float z); -ccl_device_inline void print_float3(const char *label, const float3 &a); #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ +ccl_device_inline float3 make_float3(float f); +ccl_device_inline void print_float3(ccl_private const char *label, const float3 a); + /* Smaller float3 for storage. For math operations this must be converted to float3, so that on the * CPU SIMD instructions can be used. */ #if defined(__KERNEL_METAL__) diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h index cbd3f76dae4..da76ab2ab2a 100644 --- a/intern/cycles/util/types_float3_impl.h +++ b/intern/cycles/util/types_float3_impl.h @@ -56,38 +56,35 @@ __forceinline float &float3::operator[](int i) } # endif -ccl_device_inline float3 make_float3(float f) +ccl_device_inline float3 make_float3(float x, float y, float z) { -# ifdef __KERNEL_GPU__ - float3 a = {f, f, f}; +# if defined(__KERNEL_GPU__) + return {x, y, z}; +# elif defined(__KERNEL_SSE__) + return float3(_mm_set_ps(0.0f, z, y, x)); # else -# ifdef __KERNEL_SSE__ - float3 a(_mm_set1_ps(f)); -# else - float3 a = {f, f, f, f}; -# endif + return {x, y, z, 0.0f}; # endif - return a; } -ccl_device_inline float3 make_float3(float x, float y, float z) +#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ + +ccl_device_inline float3 make_float3(float f) { -# ifdef __KERNEL_GPU__ - float3 a = {x, y, z}; -# else -# ifdef __KERNEL_SSE__ - float3 a(_mm_set_ps(0.0f, z, y, x)); -# else - float3 a = {x, y, z, 0.0f}; -# endif -# endif - return a; +#if defined(__KERNEL_GPU__) + return make_float3(f, f, f); +#elif defined(__KERNEL_SSE__) + return float3(_mm_set1_ps(f)); +#else + return {f, f, f, f}; +#endif } -ccl_device_inline void print_float3(const char *label, const float3 &a) +ccl_device_inline void print_float3(ccl_private const char *label, const float3 a) { +#ifdef __KERNEL_PRINTF__ printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); +#endif } -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h index 6b301c47362..a347cfce9a1 100644 --- a/intern/cycles/util/types_float4.h +++ b/intern/cycles/util/types_float4.h @@ -40,10 +40,11 @@ struct ccl_try_align(16) float4 # endif }; -ccl_device_inline float4 make_float4(float f); ccl_device_inline float4 make_float4(float x, float y, float z, float w); -ccl_device_inline float4 make_float4(const int4 &i); -ccl_device_inline void print_float4(const char *label, const float4 &a); #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ +ccl_device_inline float4 make_float4(float f); +ccl_device_inline float4 make_float4(const int4 i); +ccl_device_inline void print_float4(ccl_private const char *label, const float4 a); + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h index 77b4fbff788..420d9316926 100644 --- a/intern/cycles/util/types_float4_impl.h +++ b/intern/cycles/util/types_float4_impl.h @@ -52,40 +52,40 @@ __forceinline float &float4::operator[](int i) } # endif -ccl_device_inline float4 make_float4(float f) +ccl_device_inline float4 make_float4(float x, float y, float z, float w) { # ifdef __KERNEL_SSE__ - float4 a(_mm_set1_ps(f)); + return float4(_mm_set_ps(w, z, y, x)); # else - float4 a = {f, f, f, f}; + return {x, y, z, w}; # endif - return a; } -ccl_device_inline float4 make_float4(float x, float y, float z, float w) +#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ + +ccl_device_inline float4 make_float4(float f) { -# ifdef __KERNEL_SSE__ - float4 a(_mm_set_ps(w, z, y, x)); -# else - float4 a = {x, y, z, w}; -# endif - return a; +#ifdef __KERNEL_SSE__ + return float4(_mm_set1_ps(f)); +#else + return make_float4(f, f, f, f); +#endif } -ccl_device_inline float4 make_float4(const int4 &i) +ccl_device_inline float4 make_float4(const int4 i) { -# ifdef __KERNEL_SSE__ - float4 a(_mm_cvtepi32_ps(i.m128)); -# else - float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; -# endif - return a; +#ifdef __KERNEL_SSE__ + return float4(_mm_cvtepi32_ps(i.m128)); +#else + return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w); +#endif } -ccl_device_inline void print_float4(const char *label, const float4 &a) +ccl_device_inline void print_float4(ccl_private const char *label, const float4 a) { +#ifdef __KERNEL_PRINTF__ printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); +#endif } -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h index 3196b158ee9..e059ddd3660 100644 --- a/intern/cycles/util/types_int3.h +++ b/intern/cycles/util/types_int3.h @@ -44,9 +44,10 @@ struct ccl_try_align(16) int3 # endif }; -ccl_device_inline int3 make_int3(int i); ccl_device_inline int3 make_int3(int x, int y, int z); -ccl_device_inline void print_int3(const char *label, const int3 &a); #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ +ccl_device_inline int3 make_int3(int i); +ccl_device_inline void print_int3(ccl_private const char *label, const int3 a); + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h index abc0f4e4309..830dfa3c658 100644 --- a/intern/cycles/util/types_int3_impl.h +++ b/intern/cycles/util/types_int3_impl.h @@ -56,38 +56,35 @@ __forceinline int &int3::operator[](int i) } # endif -ccl_device_inline int3 make_int3(int i) +ccl_device_inline int3 make_int3(int x, int y, int z) { -# ifdef __KERNEL_GPU__ - int3 a = {i, i, i}; +# if defined(__KERNEL_GPU__) + return {x, y, z}; +# elif defined(__KERNEL_SSE__) + return int3(_mm_set_epi32(0, z, y, x)); # else -# ifdef __KERNEL_SSE__ - int3 a(_mm_set1_epi32(i)); -# else - int3 a = {i, i, i, i}; -# endif + return {x, y, z, 0}; # endif - return a; } -ccl_device_inline int3 make_int3(int x, int y, int z) +#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ + +ccl_device_inline int3 make_int3(int i) { -# ifdef __KERNEL_GPU__ - int3 a = {x, y, z}; -# else -# ifdef __KERNEL_SSE__ - int3 a(_mm_set_epi32(0, z, y, x)); -# else - int3 a = {x, y, z, 0}; -# endif -# endif - return a; +#if defined(__KERNEL_GPU__) + return make_int3(i, i, i); +#elif defined(__KERNEL_SSE__) + return int3(_mm_set1_epi32(i)); +#else + return {i, i, i, i}; +#endif } -ccl_device_inline void print_int3(const char *label, const int3 &a) +ccl_device_inline void print_int3(ccl_private const char *label, const int3 a) { +#ifdef __KERNEL_PRINTF__ printf("%s: %d %d %d\n", label, a.x, a.y, a.z); +#endif } -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h index 0ac53ffc09c..1a13c03e60e 100644 --- a/intern/cycles/util/types_int4.h +++ b/intern/cycles/util/types_int4.h @@ -42,11 +42,12 @@ struct ccl_try_align(16) int4 # endif }; -ccl_device_inline int4 make_int4(int i); ccl_device_inline int4 make_int4(int x, int y, int z, int w); -ccl_device_inline int4 make_int4(const float3 &f); -ccl_device_inline int4 make_int4(const float4 &f); -ccl_device_inline void print_int4(const char *label, const int4 &a); #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ +ccl_device_inline int4 make_int4(int i); +ccl_device_inline int4 make_int4(const float3 f); +ccl_device_inline int4 make_int4(const float4 f); +ccl_device_inline void print_int4(ccl_private const char *label, const int4 a); + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h index 2aab29df941..067794e67b4 100644 --- a/intern/cycles/util/types_int4_impl.h +++ b/intern/cycles/util/types_int4_impl.h @@ -56,52 +56,51 @@ __forceinline int &int4::operator[](int i) } # endif -ccl_device_inline int4 make_int4(int i) +ccl_device_inline int4 make_int4(int x, int y, int z, int w) { # ifdef __KERNEL_SSE__ - int4 a(_mm_set1_epi32(i)); + return int4(_mm_set_epi32(w, z, y, x)); # else - int4 a = {i, i, i, i}; + return {x, y, z, w}; # endif - return a; } -ccl_device_inline int4 make_int4(int x, int y, int z, int w) +#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ + +ccl_device_inline int4 make_int4(int i) { -# ifdef __KERNEL_SSE__ - int4 a(_mm_set_epi32(w, z, y, x)); -# else - int4 a = {x, y, z, w}; -# endif - return a; +#ifdef __KERNEL_SSE__ + return int4(_mm_set1_epi32(i)); +#else + return make_int4(i, i, i, i); +#endif } -ccl_device_inline int4 make_int4(const float3 &f) +ccl_device_inline int4 make_int4(const float3 f) { -# ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -# elif defined(__KERNEL_ONEAPI__) - int4 a = {(int)f.x, (int)f.y, (int)f.z, 0}; -# else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -# endif - return a; +#if defined(__KERNEL_GPU__) + return make_int4((int)f.x, (int)f.y, (int)f.z, 0); +#elif defined(__KERNEL_SSE__) + return int4(_mm_cvtps_epi32(f.m128)); +#else + return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); +#endif } -ccl_device_inline int4 make_int4(const float4 &f) +ccl_device_inline int4 make_int4(const float4 f) { -# ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -# else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -# endif - return a; +#ifdef __KERNEL_SSE__ + return int4(_mm_cvtps_epi32(f.m128)); +#else + return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); +#endif } -ccl_device_inline void print_int4(const char *label, const int4 &a) +ccl_device_inline void print_int4(ccl_private const char *label, const int4 a) { +#ifdef __KERNEL_PRINTF__ printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); +#endif } -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END |