Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r--intern/cycles/util/util_half.h46
1 files changed, 18 insertions, 28 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index a8d4ee75e20..d9edfec5da3 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -28,14 +28,8 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
-#ifdef __KERNEL_OPENCL__
-
-# define float4_store_half(h, f, scale) vstore_half4(f *(scale), 0, h);
-
-#else
-
/* CUDA has its own half data type, no need to define then */
-# ifndef __KERNEL_CUDA__
+#ifndef __KERNEL_CUDA__
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half {
@@ -59,27 +53,27 @@ class half {
private:
unsigned short v;
};
-# endif
+#endif
struct half4 {
half x, y, z, w;
};
-# ifdef __KERNEL_CUDA__
+#ifdef __KERNEL_CUDA__
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
- h[0] = __float2half(f.x * scale);
- h[1] = __float2half(f.y * scale);
- h[2] = __float2half(f.z * scale);
- h[3] = __float2half(f.w * scale);
+ h[0] = __float2half(f.x);
+ h[1] = __float2half(f.y);
+ h[2] = __float2half(f.z);
+ h[3] = __float2half(f.w);
}
-# else
+#else
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
-# ifndef __KERNEL_SSE2__
+# ifndef __KERNEL_SSE2__
for (int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
@@ -87,8 +81,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
uint i;
float f;
} in;
- float fscale = f[i] * scale;
- in.f = (fscale > 0.0f) ? ((fscale < 65504.0f) ? fscale : 65504.0f) : 0.0f;
+ in.f = (f[i] > 0.0f) ? ((f[i] < 65504.0f) ? f[i] : 65504.0f) : 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
@@ -98,23 +91,22 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
h[i] = (rshift & 0x7FFF);
}
-# else
+# else
/* same as above with SSE */
- ssef fscale = load4f(f) * scale;
- ssef x = min(max(fscale, 0.0f), 65504.0f);
+ ssef x = min(max(load4f(f), 0.0f), 65504.0f);
-# ifdef __KERNEL_AVX2__
+# ifdef __KERNEL_AVX2__
ssei rpack = _mm_cvtps_ph(x, 0);
-# else
+# else
ssei absolute = cast(x) & 0x7FFFFFFF;
ssei Z = absolute + 0xC8000000;
ssei result = andnot(absolute < 0x38800000, Z);
ssei rshift = (result >> 13) & 0x7FFF;
ssei rpack = _mm_packs_epi32(rshift, rshift);
-# endif
+# endif
_mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack));
-# endif
+# endif
}
ccl_device_inline float half_to_float(half h)
@@ -160,8 +152,6 @@ ccl_device_inline half float_to_half(float f)
return (value_bits | sign_bit);
}
-# endif
-
#endif
CCL_NAMESPACE_END