Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2016-08-11 23:47:53 +0300
committerThomas Dinges <blender@dingto.org>2016-08-11 23:47:53 +0300
commit9d236ac06c2b6511365eb53f84bbe366c76acc72 (patch)
treeecf56359ed8beb9c96d215e2e1c19a5f45784709 /intern
parent5ac7ef873b7b5895b3482387ba05168e694c6525 (diff)
Cycles: Enable half float support (4 channels and 1 channel) on CUDA.
Atm OpenEXR half files benefit from this and will use only 1/2 of the memory now. More space for HDRs! Part of my GSoC 2016.
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/device_cuda.cpp9
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h1
-rw-r--r--intern/cycles/kernel/svm/svm_image.h4
-rw-r--r--intern/cycles/util/util_half.h12
-rw-r--r--intern/cycles/util/util_texture.h4
5 files changed, 21 insertions, 9 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index a85f34082db..76e52498b42 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -576,6 +576,7 @@ public:
case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
default: assert(0); return;
}
@@ -747,8 +748,12 @@ public:
}
/* Resize once */
- if(flat_slot >= bindless_mapping.size())
- bindless_mapping.resize(4096); /*TODO(dingto): Make this a variable */
+ if(flat_slot >= bindless_mapping.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations.
+ */
+ bindless_mapping.resize(flat_slot + 128);
+ }
/* Set Mapping and tag that we need to (re-)upload to device */
bindless_mapping.get_data()[flat_slot] = (uint)tex;
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 063220b542e..d656fac81e6 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -31,6 +31,7 @@
#endif
#include <cuda.h>
+#include <cuda_fp16.h>
#include <float.h>
/* Qualifier wrappers for different names on different devices */
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index f359829374d..9050ce93951 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Float4 textures on various devices. */
#if defined(__KERNEL_CPU__)
-# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU
+# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU
#elif defined(__KERNEL_CUDA__)
# if __CUDA_ARCH__ < 300
# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA
@@ -277,8 +277,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
}
# else
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
+ /* float4, byte4 and half4 */
if(id < TEX_START_FLOAT_CUDA_KEPLER)
r = kernel_tex_image_interp_float4(tex, x, y);
+ /* float, byte and half */
else {
float f = kernel_tex_image_interp_float(tex, x, y);
r = make_float4(f, f, f, 1.0);
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index ae85ab3a915..5db3384cda4 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN
#else
+/* CUDA has its own half data type, no need to define then */
+#ifndef __KERNEL_CUDA__
typedef unsigned short half;
+#endif
+
struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f.x * scale);
- h[1] = __float2half_rn(f.y * scale);
- h[2] = __float2half_rn(f.z * scale);
- h[3] = __float2half_rn(f.w * scale);
+ h[0] = __float2half(f.x * scale);
+ h[1] = __float2half(f.y * scale);
+ h[2] = __float2half(f.z * scale);
+ h[3] = __float2half(f.w * scale);
}
#else
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index ec3ee2b8191..be1177d3be9 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -52,10 +52,10 @@ CCL_NAMESPACE_BEGIN
/* CUDA (Kepler, Geforce 6xx and above) */
#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024
#define TEX_NUM_BYTE4_CUDA_KEPLER 1024
-#define TEX_NUM_HALF4_CUDA_KEPLER 0
+#define TEX_NUM_HALF4_CUDA_KEPLER 1024
#define TEX_NUM_FLOAT_CUDA_KEPLER 1024
#define TEX_NUM_BYTE_CUDA_KEPLER 1024
-#define TEX_NUM_HALF_CUDA_KEPLER 0
+#define TEX_NUM_HALF_CUDA_KEPLER 1024
#define TEX_START_FLOAT4_CUDA_KEPLER 0
#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER
#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)