Cycles: Enable half float support (4 channels and 1 channel) on CUDA.

Atm OpenEXR half files benefit from this and will use only 1/2 of the memory now. More space for HDRs! Part of my GSoC 2016.
author: Thomas Dinges <blender@dingto.org> 2016-08-11 23:47:53 +0300
committer: Thomas Dinges <blender@dingto.org> 2016-08-11 23:47:53 +0300
commit: 9d236ac06c2b6511365eb53f84bbe366c76acc72 (patch)
tree: ecf56359ed8beb9c96d215e2e1c19a5f45784709 /intern/cycles/util
parent: 5ac7ef873b7b5895b3482387ba05168e694c6525 (diff)
2 files changed, 10 insertions, 6 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index ae85ab3a915..5db3384cda4 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN
 
 #else
 
+/* CUDA has its own half data type, no need to define then */
+#ifndef __KERNEL_CUDA__
 typedef unsigned short half;
+#endif
+
 struct half4 { half x, y, z, w; };
 
 #ifdef __KERNEL_CUDA__
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 {
-	h[0] = __float2half_rn(f.x * scale);
-	h[1] = __float2half_rn(f.y * scale);
-	h[2] = __float2half_rn(f.z * scale);
-	h[3] = __float2half_rn(f.w * scale);
+	h[0] = __float2half(f.x * scale);
+	h[1] = __float2half(f.y * scale);
+	h[2] = __float2half(f.z * scale);
+	h[3] = __float2half(f.w * scale);
 }
 
 #else
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index ec3ee2b8191..be1177d3be9 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -52,10 +52,10 @@ CCL_NAMESPACE_BEGIN
 /* CUDA (Kepler, Geforce 6xx and above) */
 #define TEX_NUM_FLOAT4_CUDA_KEPLER		1024
 #define TEX_NUM_BYTE4_CUDA_KEPLER		1024
-#define TEX_NUM_HALF4_CUDA_KEPLER		0
+#define TEX_NUM_HALF4_CUDA_KEPLER		1024
 #define TEX_NUM_FLOAT_CUDA_KEPLER		1024
 #define TEX_NUM_BYTE_CUDA_KEPLER		1024
-#define TEX_NUM_HALF_CUDA_KEPLER		0
+#define TEX_NUM_HALF_CUDA_KEPLER		1024
 #define TEX_START_FLOAT4_CUDA_KEPLER	0
 #define TEX_START_BYTE4_CUDA_KEPLER		TEX_NUM_FLOAT4_CUDA_KEPLER
 #define TEX_START_HALF4_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
author	Thomas Dinges <blender@dingto.org>	2016-08-11 23:47:53 +0300
committer	Thomas Dinges <blender@dingto.org>	2016-08-11 23:47:53 +0300
commit	9d236ac06c2b6511365eb53f84bbe366c76acc72 (patch)
tree	ecf56359ed8beb9c96d215e2e1c19a5f45784709 /intern/cycles/util
parent	5ac7ef873b7b5895b3482387ba05168e694c6525 (diff)