From ec8ae4d5e9f735ab5aeb149dea8aa47ab8f8f977 Mon Sep 17 00:00:00 2001 From: Mai Lavelle Date: Tue, 8 Aug 2017 07:12:04 -0400 Subject: Cycles: Pack kernel textures into buffers for OpenCL Image textures were being packed into a single buffer for OpenCL, which limited the amount of memory available for images to the size of one buffer (usually 4gb on AMD hardware). By packing textures into multiple buffers that limit is removed, while simultaneously reducing the number of buffers that need to be passed to each kernel. Benchmarks were within 2%. Fixes T51554. Differential Revision: https://developer.blender.org/D2745 --- intern/cycles/kernel/kernel_globals.h | 68 ++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'intern/cycles/kernel/kernel_globals.h') diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index f95f0d98c52..c078f09e1d7 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -23,6 +23,10 @@ # include "util/util_vector.h" #endif +#ifdef __KERNEL_OPENCL__ +# include "util/util_atomic.h" +#endif + CCL_NAMESPACE_BEGIN /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in @@ -109,11 +113,22 @@ typedef struct KernelGlobals { #ifdef __KERNEL_OPENCL__ +# define KERNEL_TEX(type, ttype, name) \ +typedef type name##_t; +# include "kernel/kernel_textures.h" + +typedef struct tex_info_t { + uint buffer, padding; + ulong offset; + uint width, height, depth, options; +} tex_info_t; + typedef ccl_addr_space struct KernelGlobals { ccl_constant KernelData *data; + ccl_global char *buffers[8]; # define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name; + tex_info_t name; # include "kernel/kernel_textures.h" # ifdef __SPLIT_KERNEL__ @@ -122,6 +137,57 @@ typedef ccl_addr_space struct KernelGlobals { # endif } KernelGlobals; +#define KERNEL_BUFFER_PARAMS \ + ccl_global char *buffer0, \ + ccl_global char *buffer1, \ + ccl_global char *buffer2, \ + ccl_global char *buffer3, \ + ccl_global char *buffer4, \ + ccl_global char *buffer5, \ + ccl_global char *buffer6, \ + ccl_global char *buffer7 + +#define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7 + +ccl_device_inline void kernel_set_buffer_pointers(KernelGlobals *kg, KERNEL_BUFFER_PARAMS) +{ +#ifdef __SPLIT_KERNEL__ + if(ccl_local_id(0) + ccl_local_id(1) == 0) +#endif + { + kg->buffers[0] = buffer0; + kg->buffers[1] = buffer1; + kg->buffers[2] = buffer2; + kg->buffers[3] = buffer3; + kg->buffers[4] = buffer4; + kg->buffers[5] = buffer5; + kg->buffers[6] = buffer6; + kg->buffers[7] = buffer7; + } + +# ifdef __SPLIT_KERNEL__ + ccl_barrier(CCL_LOCAL_MEM_FENCE); +# endif +} + +ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg) +{ +# ifdef __SPLIT_KERNEL__ + if(ccl_local_id(0) + ccl_local_id(1) == 0) +# endif + { + ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0]; + +# define KERNEL_TEX(type, ttype, name) \ + kg->name = *(info++); +# include "kernel/kernel_textures.h" + } + +# ifdef __SPLIT_KERNEL__ + ccl_barrier(CCL_LOCAL_MEM_FENCE); +# endif +} + #endif /* __KERNEL_OPENCL__ */ /* Interpolated lookup table access */ -- cgit v1.2.3 From 176ad9ecdd4efca4dbe33b9c3a003c16e3706433 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Wed, 9 Aug 2017 13:44:21 +0200 Subject: Cycles: Remove ulong usage This is a bit confusing, especially when one mixes OpenCL code where ulong equals to uint64_t with CPU side code where ulong is expected to be something else from the naming. This commit makes it so we use explicit name, common on all platforms. --- intern/cycles/kernel/kernel_globals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'intern/cycles/kernel/kernel_globals.h') diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index c078f09e1d7..9d55183d94b 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -119,7 +119,7 @@ typedef type name##_t; typedef struct tex_info_t { uint buffer, padding; - ulong offset; + uint64_t offset; uint width, height, depth, options; } tex_info_t; -- cgit v1.2.3