Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2016-08-11 23:47:53 +0300
committerThomas Dinges <blender@dingto.org>2016-08-11 23:47:53 +0300
commit9d236ac06c2b6511365eb53f84bbe366c76acc72 (patch)
treeecf56359ed8beb9c96d215e2e1c19a5f45784709 /intern/cycles/util
parent5ac7ef873b7b5895b3482387ba05168e694c6525 (diff)
Cycles: Enable half float support (4 channels and 1 channel) on CUDA.
Atm OpenEXR half files benefit from this and will use only 1/2 of the memory now. More space for HDRs! Part of my GSoC 2016.
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_half.h12
-rw-r--r--intern/cycles/util/util_texture.h4
2 files changed, 10 insertions, 6 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index ae85ab3a915..5db3384cda4 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN
#else
+/* CUDA has its own half data type, no need to define then */
+#ifndef __KERNEL_CUDA__
typedef unsigned short half;
+#endif
+
struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f.x * scale);
- h[1] = __float2half_rn(f.y * scale);
- h[2] = __float2half_rn(f.z * scale);
- h[3] = __float2half_rn(f.w * scale);
+ h[0] = __float2half(f.x * scale);
+ h[1] = __float2half(f.y * scale);
+ h[2] = __float2half(f.z * scale);
+ h[3] = __float2half(f.w * scale);
}
#else
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index ec3ee2b8191..be1177d3be9 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -52,10 +52,10 @@ CCL_NAMESPACE_BEGIN
/* CUDA (Kepler, Geforce 6xx and above) */
#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024
#define TEX_NUM_BYTE4_CUDA_KEPLER 1024
-#define TEX_NUM_HALF4_CUDA_KEPLER 0
+#define TEX_NUM_HALF4_CUDA_KEPLER 1024
#define TEX_NUM_FLOAT_CUDA_KEPLER 1024
#define TEX_NUM_BYTE_CUDA_KEPLER 1024
-#define TEX_NUM_HALF_CUDA_KEPLER 0
+#define TEX_NUM_HALF_CUDA_KEPLER 1024
#define TEX_START_FLOAT4_CUDA_KEPLER 0
#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER
#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)