Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_half.h170
-rw-r--r--intern/cycles/util/util_image.h4
-rw-r--r--intern/cycles/util/util_math.h30
3 files changed, 112 insertions, 92 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 81723abe1e2..0db5acd319a 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -59,99 +59,16 @@ struct half4 {
half x, y, z, w;
};
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+/* Conversion to/from half float for image textures
+ *
+ * Simplified float to half for fast sampling on processor without a native
+ * instruction, and eliminating any NaN and inf values. */
-ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
+ccl_device_inline half float_to_half_image(float f)
{
- h[0] = __float2half(f.x);
- h[1] = __float2half(f.y);
- h[2] = __float2half(f.z);
- h[3] = __float2half(f.w);
-}
-
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __float2half(f);
#else
-
-ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
-{
-
-# ifndef __KERNEL_SSE2__
- for (int i = 0; i < 4; i++) {
- /* optimized float to half for pixels:
- * assumes no negative, no nan, no inf, and sets denormal to 0 */
- union {
- uint i;
- float f;
- } in;
- in.f = (f[i] > 0.0f) ? ((f[i] < 65504.0f) ? f[i] : 65504.0f) : 0.0f;
- int x = in.i;
-
- int absolute = x & 0x7FFFFFFF;
- int Z = absolute + 0xC8000000;
- int result = (absolute < 0x38800000) ? 0 : Z;
- int rshift = (result >> 13);
-
- h[i] = (rshift & 0x7FFF);
- }
-# else
- /* same as above with SSE */
- ssef x = min(max(load4f(f), 0.0f), 65504.0f);
-
-# ifdef __KERNEL_AVX2__
- ssei rpack = _mm_cvtps_ph(x, 0);
-# else
- ssei absolute = cast(x) & 0x7FFFFFFF;
- ssei Z = absolute + 0xC8000000;
- ssei result = andnot(absolute < 0x38800000, Z);
- ssei rshift = (result >> 13) & 0x7FFF;
- ssei rpack = _mm_packs_epi32(rshift, rshift);
-# endif
-
- _mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack));
-# endif
-}
-
-# ifndef __KERNEL_HIP__
-
-ccl_device_inline float half_to_float(half h)
-{
- float f;
-
- *((int *)&f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
-
- return f;
-}
-# else
-
-ccl_device_inline float half_to_float(std::uint32_t a) noexcept
-{
-
- std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
-
- std::uint32_t v = __float_as_uint(__uint_as_float(u) *
- __uint_as_float(0x77800000U) /*0x1.0p+112f*/) +
- 0x38000000U;
-
- u = (a & 0x7fff) != 0 ? v : u;
-
- return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/;
-}
-
-# endif /* __KERNEL_HIP__ */
-
-ccl_device_inline float4 half4_to_float4(half4 h)
-{
- float4 f;
-
- f.x = half_to_float(h.x);
- f.y = half_to_float(h.y);
- f.z = half_to_float(h.z);
- f.w = half_to_float(h.w);
-
- return f;
-}
-
-ccl_device_inline half float_to_half(float f)
-{
const uint u = __float_as_uint(f);
/* Sign bit, shifted to its position. */
uint sign_bit = u & 0x80000000;
@@ -170,10 +87,83 @@ ccl_device_inline half float_to_half(float f)
value_bits = (exponent_bits == 0 ? 0 : value_bits);
/* Re-insert sign bit and return. */
return (value_bits | sign_bit);
+#endif
+}
+
+ccl_device_inline float half_to_float_image(half h)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __half2float(h);
+#else
+ const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
+ return __int_as_float(x);
+#endif
}
+ccl_device_inline float4 half4_to_float4_image(const half4 h)
+{
+ /* Unable to use because it gives different results half_to_float_image, can we
+ * modify float_to_half_image so the conversion results are identical? */
+#if 0 /* defined(__KERNEL_AVX2__) */
+ /* CPU: AVX. */
+ __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
+ return float4(_mm_cvtph_ps(x));
#endif
+ const float4 f = make_float4(half_to_float_image(h.x),
+ half_to_float_image(h.y),
+ half_to_float_image(h.z),
+ half_to_float_image(h.w));
+ return f;
+}
+
+/* Conversion to half float texture for display.
+ *
+ * Simplified float to half for fast display texture conversion on processors
+ * without a native instruction. Assumes no negative, no NaN, no inf, and sets
+ * denormal to 0. */
+
+ccl_device_inline half float_to_half_display(const float f)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __float2half(f);
+#else
+ const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
+ const int absolute = x & 0x7FFFFFFF;
+ const int Z = absolute + 0xC8000000;
+ const int result = (absolute < 0x38800000) ? 0 : Z;
+ const int rshift = (result >> 13);
+ return (rshift & 0x7FFF);
+#endif
+}
+
+ccl_device_inline half4 float4_to_half4_display(const float4 f)
+{
+#ifdef __KERNEL_SSE2__
+ /* CPU: SSE and AVX. */
+ ssef x = min(max(load4f(f), 0.0f), 65504.0f);
+# ifdef __KERNEL_AVX2__
+ ssei rpack = _mm_cvtps_ph(x, 0);
+# else
+ ssei absolute = cast(x) & 0x7FFFFFFF;
+ ssei Z = absolute + 0xC8000000;
+ ssei result = andnot(absolute < 0x38800000, Z);
+ ssei rshift = (result >> 13) & 0x7FFF;
+ ssei rpack = _mm_packs_epi32(rshift, rshift);
+# endif
+ half4 h;
+ _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
+ return h;
+#else
+ /* GPU and scalar fallback. */
+ const half4 h = {float_to_half_display(f.x),
+ float_to_half_display(f.y),
+ float_to_half_display(f.z),
+ float_to_half_display(f.w)};
+ return h;
+#endif
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_HALF_H__ */
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index 27ec7ffb423..b082b971613 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -56,7 +56,7 @@ template<> inline float util_image_cast_to_float(uint16_t value)
}
template<> inline float util_image_cast_to_float(half value)
{
- return half_to_float(value);
+ return half_to_float_image(value);
}
/* Cast float value to output pixel type. */
@@ -88,7 +88,7 @@ template<> inline uint16_t util_image_cast_from_float(float value)
}
template<> inline half util_image_cast_from_float(float value)
{
- return float_to_half(value);
+ return float_to_half_image(value);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index f834011a032..535b6881d3f 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -268,6 +268,36 @@ ccl_device_inline float4 __int4_as_float4(int4 i)
#endif
}
+template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr)
+{
+ return ((uint64_t)ptr) & 0xFFFFFFFF;
+}
+
+template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr)
+{
+ return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF;
+}
+
+template<typename T> ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b)
+{
+ return (T *)(((uint64_t)b << 32) | a);
+}
+
+ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b)
+{
+ return (a << 16) | b;
+}
+
+ccl_device_inline uint uint16_unpack_from_uint_0(const uint i)
+{
+ return i >> 16;
+}
+
+ccl_device_inline uint uint16_unpack_from_uint_1(const uint i)
+{
+ return i & 0xFFFF;
+}
+
/* Versions of functions which are safe for fast math. */
ccl_device_inline bool isnan_safe(float f)
{