diff options
author | Thomas Dinges <blender@dingto.org> | 2016-08-11 23:47:53 +0300 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2016-08-11 23:47:53 +0300 |
commit | 9d236ac06c2b6511365eb53f84bbe366c76acc72 (patch) | |
tree | ecf56359ed8beb9c96d215e2e1c19a5f45784709 /intern/cycles/util | |
parent | 5ac7ef873b7b5895b3482387ba05168e694c6525 (diff) |
Cycles: Enable half float support (4 channels and 1 channel) on CUDA.
Atm OpenEXR half files benefit from this and will use only 1/2 of the memory now. More space for HDRs!
Part of my GSoC 2016.
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/util_half.h | 12 | ||||
-rw-r--r-- | intern/cycles/util/util_texture.h | 4 |
2 files changed, 10 insertions, 6 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index ae85ab3a915..5db3384cda4 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN #else +/* CUDA has its own half data type, no need to define then */ +#ifndef __KERNEL_CUDA__ typedef unsigned short half; +#endif + struct half4 { half x, y, z, w; }; #ifdef __KERNEL_CUDA__ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half_rn(f.x * scale); - h[1] = __float2half_rn(f.y * scale); - h[2] = __float2half_rn(f.z * scale); - h[3] = __float2half_rn(f.w * scale); + h[0] = __float2half(f.x * scale); + h[1] = __float2half(f.y * scale); + h[2] = __float2half(f.z * scale); + h[3] = __float2half(f.w * scale); } #else diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index ec3ee2b8191..be1177d3be9 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -52,10 +52,10 @@ CCL_NAMESPACE_BEGIN /* CUDA (Kepler, Geforce 6xx and above) */ #define TEX_NUM_FLOAT4_CUDA_KEPLER 1024 #define TEX_NUM_BYTE4_CUDA_KEPLER 1024 -#define TEX_NUM_HALF4_CUDA_KEPLER 0 +#define TEX_NUM_HALF4_CUDA_KEPLER 1024 #define TEX_NUM_FLOAT_CUDA_KEPLER 1024 #define TEX_NUM_BYTE_CUDA_KEPLER 1024 -#define TEX_NUM_HALF_CUDA_KEPLER 0 +#define TEX_NUM_HALF_CUDA_KEPLER 1024 #define TEX_START_FLOAT4_CUDA_KEPLER 0 #define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER #define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) |