Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2022-07-29 15:40:50 +0300
committerBrecht Van Lommel <brecht@blender.org>2022-08-09 17:07:23 +0300
commit1988665c3c310cbfb66b4e499408fa48c522545c (patch)
tree5e5771dd13165d9d7ab67fe8837ac088de522eec /intern/cycles/util
parent9990792e87238386375c97f431f648c1334bd587 (diff)
Cleanup: make vector types make/print functions consistent between CPU and GPU
Now all the same ones are available on CPU and GPU, which was previously not possible due to lack of operator overloadng in OpenCL. Print functions are no-ops on some GPUs. Ref D15535
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/types.h12
-rw-r--r--intern/cycles/util/types_float2.h3
-rw-r--r--intern/cycles/util/types_float2_impl.h6
-rw-r--r--intern/cycles/util/types_float3.h5
-rw-r--r--intern/cycles/util/types_float3_impl.h41
-rw-r--r--intern/cycles/util/types_float4.h7
-rw-r--r--intern/cycles/util/types_float4_impl.h40
-rw-r--r--intern/cycles/util/types_int3.h5
-rw-r--r--intern/cycles/util/types_int3_impl.h41
-rw-r--r--intern/cycles/util/types_int4.h9
-rw-r--r--intern/cycles/util/types_int4_impl.h57
11 files changed, 119 insertions, 107 deletions
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
index 26031d9e0fd..d0d8c2941d7 100644
--- a/intern/cycles/util/types.h
+++ b/intern/cycles/util/types.h
@@ -71,6 +71,18 @@ ccl_device_inline bool is_power_of_two(size_t x)
CCL_NAMESPACE_END
+/* Device side printf only tested on CUDA, may work on more GPU devices. */
+#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__)
+# define __KERNEL_PRINTF__
+#endif
+
+ccl_device_inline void print_float(ccl_private const char *label, const float a)
+{
+#ifdef __KERNEL_PRINTF__
+ printf("%s: %.8f\n", label, (double)a);
+#endif
+}
+
/* Most GPU APIs matching native vector types, so we only need to implement them for
* CPU and oneAPI. */
#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)
diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h
index f37aa1b4ad2..ea510ef832c 100644
--- a/intern/cycles/util/types_float2.h
+++ b/intern/cycles/util/types_float2.h
@@ -20,7 +20,8 @@ struct float2 {
};
ccl_device_inline float2 make_float2(float x, float y);
-ccl_device_inline void print_float2(const char *label, const float2 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h
index 9d1820fe17d..7ba7dee2e3a 100644
--- a/intern/cycles/util/types_float2_impl.h
+++ b/intern/cycles/util/types_float2_impl.h
@@ -31,11 +31,13 @@ ccl_device_inline float2 make_float2(float x, float y)
float2 a = {x, y};
return a;
}
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
-ccl_device_inline void print_float2(const char *label, const float2 &a)
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index 4e43e928657..87c6b1d3654 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -47,11 +47,12 @@ struct ccl_try_align(16) float3
# endif
};
-ccl_device_inline float3 make_float3(float f);
ccl_device_inline float3 make_float3(float x, float y, float z);
-ccl_device_inline void print_float3(const char *label, const float3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline float3 make_float3(float f);
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a);
+
/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
* CPU SIMD instructions can be used. */
#if defined(__KERNEL_METAL__)
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index cbd3f76dae4..da76ab2ab2a 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -56,38 +56,35 @@ __forceinline float &float3::operator[](int i)
}
# endif
-ccl_device_inline float3 make_float3(float f)
+ccl_device_inline float3 make_float3(float x, float y, float z)
{
-# ifdef __KERNEL_GPU__
- float3 a = {f, f, f};
+# if defined(__KERNEL_GPU__)
+ return {x, y, z};
+# elif defined(__KERNEL_SSE__)
+ return float3(_mm_set_ps(0.0f, z, y, x));
# else
-# ifdef __KERNEL_SSE__
- float3 a(_mm_set1_ps(f));
-# else
- float3 a = {f, f, f, f};
-# endif
+ return {x, y, z, 0.0f};
# endif
- return a;
}
-ccl_device_inline float3 make_float3(float x, float y, float z)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float3 make_float3(float f)
{
-# ifdef __KERNEL_GPU__
- float3 a = {x, y, z};
-# else
-# ifdef __KERNEL_SSE__
- float3 a(_mm_set_ps(0.0f, z, y, x));
-# else
- float3 a = {x, y, z, 0.0f};
-# endif
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_float3(f, f, f);
+#elif defined(__KERNEL_SSE__)
+ return float3(_mm_set1_ps(f));
+#else
+ return {f, f, f, f};
+#endif
}
-ccl_device_inline void print_float3(const char *label, const float3 &a)
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h
index 6b301c47362..a347cfce9a1 100644
--- a/intern/cycles/util/types_float4.h
+++ b/intern/cycles/util/types_float4.h
@@ -40,10 +40,11 @@ struct ccl_try_align(16) float4
# endif
};
-ccl_device_inline float4 make_float4(float f);
ccl_device_inline float4 make_float4(float x, float y, float z, float w);
-ccl_device_inline float4 make_float4(const int4 &i);
-ccl_device_inline void print_float4(const char *label, const float4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline float4 make_float4(float f);
+ccl_device_inline float4 make_float4(const int4 i);
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h
index 77b4fbff788..420d9316926 100644
--- a/intern/cycles/util/types_float4_impl.h
+++ b/intern/cycles/util/types_float4_impl.h
@@ -52,40 +52,40 @@ __forceinline float &float4::operator[](int i)
}
# endif
-ccl_device_inline float4 make_float4(float f)
+ccl_device_inline float4 make_float4(float x, float y, float z, float w)
{
# ifdef __KERNEL_SSE__
- float4 a(_mm_set1_ps(f));
+ return float4(_mm_set_ps(w, z, y, x));
# else
- float4 a = {f, f, f, f};
+ return {x, y, z, w};
# endif
- return a;
}
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float4 make_float4(float f)
{
-# ifdef __KERNEL_SSE__
- float4 a(_mm_set_ps(w, z, y, x));
-# else
- float4 a = {x, y, z, w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return float4(_mm_set1_ps(f));
+#else
+ return make_float4(f, f, f, f);
+#endif
}
-ccl_device_inline float4 make_float4(const int4 &i)
+ccl_device_inline float4 make_float4(const int4 i)
{
-# ifdef __KERNEL_SSE__
- float4 a(_mm_cvtepi32_ps(i.m128));
-# else
- float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return float4(_mm_cvtepi32_ps(i.m128));
+#else
+ return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w);
+#endif
}
-ccl_device_inline void print_float4(const char *label, const float4 &a)
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h
index 3196b158ee9..e059ddd3660 100644
--- a/intern/cycles/util/types_int3.h
+++ b/intern/cycles/util/types_int3.h
@@ -44,9 +44,10 @@ struct ccl_try_align(16) int3
# endif
};
-ccl_device_inline int3 make_int3(int i);
ccl_device_inline int3 make_int3(int x, int y, int z);
-ccl_device_inline void print_int3(const char *label, const int3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline int3 make_int3(int i);
+ccl_device_inline void print_int3(ccl_private const char *label, const int3 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h
index abc0f4e4309..830dfa3c658 100644
--- a/intern/cycles/util/types_int3_impl.h
+++ b/intern/cycles/util/types_int3_impl.h
@@ -56,38 +56,35 @@ __forceinline int &int3::operator[](int i)
}
# endif
-ccl_device_inline int3 make_int3(int i)
+ccl_device_inline int3 make_int3(int x, int y, int z)
{
-# ifdef __KERNEL_GPU__
- int3 a = {i, i, i};
+# if defined(__KERNEL_GPU__)
+ return {x, y, z};
+# elif defined(__KERNEL_SSE__)
+ return int3(_mm_set_epi32(0, z, y, x));
# else
-# ifdef __KERNEL_SSE__
- int3 a(_mm_set1_epi32(i));
-# else
- int3 a = {i, i, i, i};
-# endif
+ return {x, y, z, 0};
# endif
- return a;
}
-ccl_device_inline int3 make_int3(int x, int y, int z)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline int3 make_int3(int i)
{
-# ifdef __KERNEL_GPU__
- int3 a = {x, y, z};
-# else
-# ifdef __KERNEL_SSE__
- int3 a(_mm_set_epi32(0, z, y, x));
-# else
- int3 a = {x, y, z, 0};
-# endif
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_int3(i, i, i);
+#elif defined(__KERNEL_SSE__)
+ return int3(_mm_set1_epi32(i));
+#else
+ return {i, i, i, i};
+#endif
}
-ccl_device_inline void print_int3(const char *label, const int3 &a)
+ccl_device_inline void print_int3(ccl_private const char *label, const int3 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h
index 0ac53ffc09c..1a13c03e60e 100644
--- a/intern/cycles/util/types_int4.h
+++ b/intern/cycles/util/types_int4.h
@@ -42,11 +42,12 @@ struct ccl_try_align(16) int4
# endif
};
-ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(int x, int y, int z, int w);
-ccl_device_inline int4 make_int4(const float3 &f);
-ccl_device_inline int4 make_int4(const float4 &f);
-ccl_device_inline void print_int4(const char *label, const int4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+ccl_device_inline int4 make_int4(int i);
+ccl_device_inline int4 make_int4(const float3 f);
+ccl_device_inline int4 make_int4(const float4 f);
+ccl_device_inline void print_int4(ccl_private const char *label, const int4 a);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h
index 2aab29df941..067794e67b4 100644
--- a/intern/cycles/util/types_int4_impl.h
+++ b/intern/cycles/util/types_int4_impl.h
@@ -56,52 +56,51 @@ __forceinline int &int4::operator[](int i)
}
# endif
-ccl_device_inline int4 make_int4(int i)
+ccl_device_inline int4 make_int4(int x, int y, int z, int w)
{
# ifdef __KERNEL_SSE__
- int4 a(_mm_set1_epi32(i));
+ return int4(_mm_set_epi32(w, z, y, x));
# else
- int4 a = {i, i, i, i};
+ return {x, y, z, w};
# endif
- return a;
}
-ccl_device_inline int4 make_int4(int x, int y, int z, int w)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline int4 make_int4(int i)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_set_epi32(w, z, y, x));
-# else
- int4 a = {x, y, z, w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return int4(_mm_set1_epi32(i));
+#else
+ return make_int4(i, i, i, i);
+#endif
}
-ccl_device_inline int4 make_int4(const float3 &f)
+ccl_device_inline int4 make_int4(const float3 f)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_cvtps_epi32(f.m128));
-# elif defined(__KERNEL_ONEAPI__)
- int4 a = {(int)f.x, (int)f.y, (int)f.z, 0};
-# else
- int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-# endif
- return a;
+#if defined(__KERNEL_GPU__)
+ return make_int4((int)f.x, (int)f.y, (int)f.z, 0);
+#elif defined(__KERNEL_SSE__)
+ return int4(_mm_cvtps_epi32(f.m128));
+#else
+ return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
+#endif
}
-ccl_device_inline int4 make_int4(const float4 &f)
+ccl_device_inline int4 make_int4(const float4 f)
{
-# ifdef __KERNEL_SSE__
- int4 a(_mm_cvtps_epi32(f.m128));
-# else
- int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-# endif
- return a;
+#ifdef __KERNEL_SSE__
+ return int4(_mm_cvtps_epi32(f.m128));
+#else
+ return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
+#endif
}
-ccl_device_inline void print_int4(const char *label, const int4 &a)
+ccl_device_inline void print_int4(ccl_private const char *label, const int4 a)
{
+#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
+#endif
}
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
CCL_NAMESPACE_END