From 1cad64900e3f052fa895a4ac2a994d87b0c3fce1 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Wed, 8 Mar 2017 13:34:29 +0100 Subject: Cycles: Define ccl_local variables in kernel functions Declaring ccl_local in a device function is not supported by certain compilers. --- intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 21 +++++++++++++------- intern/cycles/kernel/kernels/cuda/kernel_split.cu | 23 +++++++++++++++------- .../kernel/kernels/opencl/kernel_buffer_update.cl | 3 ++- .../kernels/opencl/kernel_direct_lighting.cl | 3 ++- ...holdout_emission_blurring_pathtermination_ao.cl | 5 ++++- .../kernels/opencl/kernel_next_iteration_setup.cl | 3 ++- .../kernel/kernels/opencl/kernel_queue_enqueue.cl | 3 ++- .../kernel/kernels/opencl/kernel_shader_eval.cl | 3 ++- .../kernels/opencl/kernel_subsurface_scatter.cl | 3 ++- intern/cycles/kernel/split/kernel_buffer_update.h | 8 ++++---- .../cycles/kernel/split/kernel_direct_lighting.h | 8 ++++---- ..._holdout_emission_blurring_pathtermination_ao.h | 14 ++++++------- .../kernel/split/kernel_indirect_background.h | 1 - .../kernel/split/kernel_next_iteration_setup.h | 8 ++++---- intern/cycles/kernel/split/kernel_queue_enqueue.h | 21 ++++++++++---------- intern/cycles/kernel/split/kernel_shader_eval.h | 8 ++++---- .../cycles/kernel/split/kernel_split_data_types.h | 11 +++++++++++ .../kernel/split/kernel_subsurface_scatter.h | 9 ++++----- 18 files changed, 94 insertions(+), 61 deletions(-) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ba6b1033915..e220d857384 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -168,21 +168,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, kernel_##name(kg); \ } +#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + ccl_local type locals; \ + kernel_##name(kg, &locals); \ + } + DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) { diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index fbdf79697d5..4479a044921 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -93,21 +93,30 @@ kernel_cuda_path_trace_data_init( kernel_##name(NULL); \ } +#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + extern "C" __global__ void \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + kernel_cuda_##name() \ + { \ + ccl_local type locals; \ + kernel_##name(NULL, &locals); \ + } + DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index 3c25d1d85a2..b61f1cda330 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_buffer_update( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_buffer_update((KernelGlobals*)kg); + ccl_local unsigned int local_queue_atomics; + kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index 942a80f94f5..374be6cbd05 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_direct_lighting( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_direct_lighting((KernelGlobals*)kg); + ccl_local unsigned int local_queue_atomics; + kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index 209080fecd6..351687e2036 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -22,5 +22,8 @@ __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao ccl_global char *kg, ccl_constant KernelData *data) { - kernel_holdout_emission_blurring_pathtermination_ao((KernelGlobals*)kg); + ccl_local BackgroundAOLocals locals; + kernel_holdout_emission_blurring_pathtermination_ao( + (KernelGlobals*)kg, + &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 2a007e39c33..fd49ed5def8 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_next_iteration_setup( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_next_iteration_setup((KernelGlobals*)kg); + ccl_local unsigned int local_queue_atomics; + kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 19074db1b81..6dd9d39c4e2 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_queue_enqueue( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_queue_enqueue((KernelGlobals*)kg); + ccl_local QueueEnqueueLocals locals; + kernel_queue_enqueue((KernelGlobals*)kg, &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 534d37f695b..71ac2886978 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_shader_eval( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_shader_eval((KernelGlobals*)kg); + ccl_local unsigned int local_queue_atomics; + kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 34a01bbdfe3..853bba2efc5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -22,5 +22,6 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter( ccl_global char *kg, ccl_constant KernelData *data) { - kernel_subsurface_scatter((KernelGlobals*)kg); + ccl_local unsigned int local_queue_atomics; + kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index e8f574c5546..f36899b884a 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -38,11 +38,11 @@ CCL_NAMESPACE_BEGIN * RAY_REGENERATED rays. * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. */ -ccl_device void kernel_buffer_update(KernelGlobals *kg) +ccl_device void kernel_buffer_update(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -188,7 +188,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg) QUEUE_ACTIVE_AND_REGENERATED_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); } diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index dfe461fb357..3d062cf0e2b 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -40,11 +40,11 @@ CCL_NAMESPACE_BEGIN * shadow_blocked function must be executed, after this kernel call * Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty. */ -ccl_device void kernel_direct_lighting(KernelGlobals *kg) +ccl_device void kernel_direct_lighting(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -130,7 +130,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg) QUEUE_SHADOW_RAY_CAST_DL_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); #endif diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index bb948ad24b0..e4bf513ffdc 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -52,13 +52,13 @@ CCL_NAMESPACE_BEGIN * - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with * flag RAY_SHADOW_RAY_CAST_AO */ -ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg) +ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( + KernelGlobals *kg, + ccl_local_param BackgroundAOLocals *locals) { - ccl_local unsigned int local_queue_atomics_bg; - ccl_local unsigned int local_queue_atomics_ao; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics_bg = 0; - local_queue_atomics_ao = 0; + locals->queue_atomics_bg = 0; + locals->queue_atomics_ao = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -253,7 +253,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics_bg, + &locals->queue_atomics_bg, kernel_split_state.queue_data, kernel_split_params.queue_index); @@ -263,7 +263,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal QUEUE_SHADOW_RAY_CAST_AO_RAYS, enqueue_flag_AO_SHADOW_RAY_CAST, kernel_split_params.queue_size, - &local_queue_atomics_ao, + &locals->queue_atomics_bg, kernel_split_state.queue_data, kernel_split_params.queue_index); #endif diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index 96ca0f094b1..100f5996f83 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -18,7 +18,6 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_background(KernelGlobals *kg) { - ccl_global char *ray_state = kernel_split_state.ray_state; int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index ad1f6c78e8f..056fb1d8c08 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -44,11 +44,11 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays. */ -ccl_device void kernel_next_iteration_setup(KernelGlobals *kg) +ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -161,7 +161,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); } diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index f4a4657d23f..e2e841f36d3 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -35,17 +35,16 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. */ -ccl_device void kernel_queue_enqueue(KernelGlobals *kg) +ccl_device void kernel_queue_enqueue(KernelGlobals *kg, + ccl_local_param QueueEnqueueLocals *locals) { /* We have only 2 cases (Hit/Not-Hit) */ - ccl_local unsigned int local_queue_atomics[2]; - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); if(lidx == 0) { - local_queue_atomics[0] = 0; - local_queue_atomics[1] = 0; + locals->queue_atomics[0] = 0; + locals->queue_atomics[1] = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -62,18 +61,18 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) unsigned int my_lqidx; if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); + my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); } ccl_barrier(CCL_LOCAL_MEM_FENCE); if(lidx == 0) { - local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = + locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - local_queue_atomics, + locals->queue_atomics, kernel_split_params.queue_index); - local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = + locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - local_queue_atomics, + locals->queue_atomics, kernel_split_params.queue_index); } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -83,7 +82,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) my_gqidx = get_global_queue_index(queue_number, kernel_split_params.queue_size, my_lqidx, - local_queue_atomics); + locals->queue_atomics); kernel_split_state.queue_data[my_gqidx] = ray_index; } } diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 43872c6f388..fc966b77b2c 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -22,12 +22,12 @@ CCL_NAMESPACE_BEGIN * It also identifies the rays of state RAY_TO_REGENERATE and enqueues them * in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ -ccl_device void kernel_shader_eval(KernelGlobals *kg) +ccl_device void kernel_shader_eval(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -47,7 +47,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 365d78c9f99..748197b7183 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -111,6 +111,17 @@ __device__ SplitParams __split_param_data; # define kernel_split_params (__split_param_data) #endif /* __KERNEL_CUDA__ */ +/* Local storage for queue_enqueue kernel. */ +typedef struct QueueEnqueueLocals { + uint queue_atomics[2]; +} QueueEnqueueLocals; + +/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */ +typedef struct BackgroundAOLocals { + uint queue_atomics_bg; + uint queue_atomics_ao; +} BackgroundAOLocals; + CCL_NAMESPACE_END #endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index e282ac00a63..709a296c9a0 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -17,13 +17,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) +ccl_device void kernel_subsurface_scatter(KernelGlobals *kg, + ccl_local_param unsigned int* local_queue_atomics) { #ifdef __SUBSURFACE__ - - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -89,7 +88,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); -- cgit v1.2.3