Cycles: Fix wrong render results with texture limit and half-float textures

author: Sergey Sharybin <sergey.vfx@gmail.com> 2017-02-23 16:46:22 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2017-02-23 17:07:45 +0300
commit: 909c8ec07aa87f35107ce6e30e7e3fa8fc5b019f (patch)
tree: 9218ffcf04963879e46163917b3d26c04eaf9733 /intern
parent: b95645c1aec371be44460c82d3b559647b1a4491 (diff)
2 files changed, 80 insertions, 4 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 5db3384cda4..7285c6ef600 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -110,6 +110,28 @@ ccl_device_inline float4 half4_to_float4(half4 h)
 	return f;
 }
 
+ccl_device_inline half float_to_half(float f)
+{
+	const uint u = __float_as_uint(f);
+	/* Sign bit, shifted to it's position. */
+	uint sign_bit = u & 0x80000000;
+	sign_bit >>= 16;
+	/* Exponent. */
+	uint exponent_bits = u & 0x7f800000;
+	/* Non-sign bits. */
+	uint value_bits = u & 0x7fffffff;
+	value_bits >>= 13;  /* Align mantissa on MSB. */
+	value_bits -= 0x1c000;  /* Adjust bias. */
+	/* Flush-to-zero. */
+	value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
+	/* Clamp-to-max. */
+	value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
+	/* Denormals-as-zero. */
+	value_bits = (exponent_bits == 0 ? 0 : value_bits);
+	/* Re-insert sign bit and return. */
+	return (value_bits | sign_bit);
+}
+
 #endif
 
 #endif
diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h
index 73ecfda0855..4daf1eaac22 100644
--- a/intern/cycles/util/util_image_impl.h
+++ b/intern/cycles/util/util_image_impl.h
@@ -19,6 +19,7 @@
 
 #include "util_algorithm.h"
 #include "util_debug.h"
+#include "util_half.h"
 #include "util_image.h"
 
 CCL_NAMESPACE_BEGIN
@@ -38,6 +39,52 @@ const T *util_image_read(const vector<T>& pixels,
 	return &pixels[index];
 }
 
+/* Cast input pixel from unknown storage to float. */
+template<typename T>
+inline float cast_to_float(T value);
+
+template<>
+inline float cast_to_float(float value)
+{
+	return value;
+}
+template<>
+inline float cast_to_float(uchar value)
+{
+	return (float)value / 255.0f;
+}
+template<>
+inline float cast_to_float(half value)
+{
+	return half_to_float(value);
+}
+
+/* Cast float value to output pixel type. */
+template<typename T>
+inline T cast_from_float(float value);
+
+template<>
+inline float cast_from_float(float value)
+{
+	return value;
+}
+template<>
+inline uchar cast_from_float(float value)
+{
+	if(value < 0.0f) {
+		return 0;
+	}
+	else if(value > (1.0f - 0.5f / 255.0f)) {
+		return 255;
+	}
+	return (uchar)((255.0f * value) + 0.5f);
+}
+template<>
+inline half cast_from_float(float value)
+{
+	return float_to_half(value);
+}
+
 template<typename T>
 void util_image_downscale_sample(const vector<T>& pixels,
                                  const size_t width,
@@ -71,15 +118,22 @@ void util_image_downscale_sample(const vector<T>& pixels,
 				                                 components,
 				                                 nx, ny, nz);
 				for(size_t k = 0; k < components; ++k) {
-					accum[k] += pixel[k];
+					accum[k] += cast_to_float(pixel[k]);
 				}
 				++count;
 			}
 		}
 	}
-	const float inv_count = 1.0f / (float)count;
-	for(size_t k = 0; k < components; ++k) {
-		result[k] = T(accum[k] * inv_count);
+	if(count != 0) {
+		const float inv_count = 1.0f / (float)count;
+		for(size_t k = 0; k < components; ++k) {
+			result[k] = cast_from_float<T>(accum[k] * inv_count);
+		}
+	}
+	else {
+		for(size_t k = 0; k < components; ++k) {
+			result[k] = T(0.0f);
+		}
 	}
 }
author	Sergey Sharybin <sergey.vfx@gmail.com>	2017-02-23 16:46:22 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2017-02-23 17:07:45 +0300
commit	909c8ec07aa87f35107ce6e30e7e3fa8fc5b019f (patch)
tree	9218ffcf04963879e46163917b3d26c04eaf9733 /intern
parent	b95645c1aec371be44460c82d3b559647b1a4491 (diff)