diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-16 17:42:49 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-16 17:42:49 +0300 |
commit | fc61cdf1428dc74f4ef9b834de9082f910ce08e0 (patch) | |
tree | ba4f4dc67a11c83c3dea90803e578053f0aa734b /intern | |
parent | dd3ae7bad70ecfd466791f371b138b5b958e0a19 (diff) | |
parent | 0434053f1329384c312fd07812bce48ec9c28f50 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
34 files changed, 137 insertions, 114 deletions
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index a09d93c625e..3faae4039e3 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -346,9 +346,18 @@ public: virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/) { + cl_device_type type; + clGetDeviceInfo(device->cdDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); + + /* Use small global size on CPU devices as it seems to be much faster. */ + if(type == CL_DEVICE_TYPE_CPU) { + VLOG(1) << "Global size: (64, 64)."; + return make_int2(64, 64); + } + cl_ulong max_buffer_size; clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL); - VLOG(1) << "Maximum device allocation side: " + VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(max_buffer_size) << " bytes. (" << string_human_readable_size(max_buffer_size) << ")."; diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ba6b1033915..e220d857384 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -168,21 +168,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, kernel_##name(kg); \ } +#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + ccl_local type locals; \ + kernel_##name(kg, &locals); \ + } + DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) { diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index fbdf79697d5..4479a044921 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -93,21 +93,30 @@ kernel_cuda_path_trace_data_init( kernel_##name(NULL); \ } +#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + extern "C" __global__ void \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + kernel_cuda_##name() \ + { \ + ccl_local type locals; \ + kernel_##name(NULL, &locals); \ + } + DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index d3058501f27..b61f1cda330 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -19,8 +19,9 @@ #include "split/kernel_buffer_update.h" __kernel void kernel_ocl_path_trace_buffer_update( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_buffer_update(kg); + ccl_local unsigned int local_queue_atomics; + kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 1e3c4fa28c7..54d4a577e10 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -19,7 +19,7 @@ #include "split/kernel_data_init.h" __kernel void kernel_ocl_path_trace_data_init( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data, ccl_global void *split_data_buffer, int num_elements, @@ -40,7 +40,7 @@ __kernel void kernel_ocl_path_trace_data_init( unsigned int num_samples, /* Total number of samples per pixel */ ccl_global float *buffer) { - kernel_data_init(kg, + kernel_data_init((KernelGlobals*)kg, data, split_data_buffer, num_elements, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index 5d2f46b319d..374be6cbd05 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -19,8 +19,9 @@ #include "split/kernel_direct_lighting.h" __kernel void kernel_ocl_path_trace_direct_lighting( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_direct_lighting(kg); + ccl_local unsigned int local_queue_atomics; + kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl index 6380e9cb746..08187b0e03e 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -19,8 +19,8 @@ #include "split/kernel_do_volume.h" __kernel void kernel_ocl_path_trace_do_volume( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_do_volume(kg); + kernel_do_volume((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index 7724b8a0bdf..351687e2036 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -19,8 +19,11 @@ #include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_holdout_emission_blurring_pathtermination_ao(kg); + ccl_local BackgroundAOLocals locals; + kernel_holdout_emission_blurring_pathtermination_ao( + (KernelGlobals*)kg, + &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl index 671501bf237..b18fba4c01f 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -19,8 +19,8 @@ #include "split/kernel_indirect_background.h" __kernel void kernel_ocl_path_trace_indirect_background( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_indirect_background(kg); + kernel_indirect_background((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl index b5e52e81ebf..ce2e96ad789 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -19,8 +19,8 @@ #include "split/kernel_indirect_subsurface.h" __kernel void kernel_ocl_path_trace_indirect_subsurface( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_indirect_subsurface(kg); + kernel_indirect_subsurface((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 2b84d0ea43e..830e4e373ac 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -19,8 +19,8 @@ #include "split/kernel_lamp_emission.h" __kernel void kernel_ocl_path_trace_lamp_emission( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_lamp_emission(kg); + kernel_lamp_emission((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index e87e367fb9c..fd49ed5def8 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -19,8 +19,9 @@ #include "split/kernel_next_iteration_setup.h" __kernel void kernel_ocl_path_trace_next_iteration_setup( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_next_iteration_setup(kg); + ccl_local unsigned int local_queue_atomics; + kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl index 7e9e4a02529..8194f5d22ca 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -19,8 +19,8 @@ #include "split/kernel_path_init.h" __kernel void kernel_ocl_path_trace_path_init( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_path_init(kg); + kernel_path_init((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 9ceb6a5c3d8..6dd9d39c4e2 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -19,8 +19,9 @@ #include "split/kernel_queue_enqueue.h" __kernel void kernel_ocl_path_trace_queue_enqueue( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_queue_enqueue(kg); + ccl_local QueueEnqueueLocals locals; + kernel_queue_enqueue((KernelGlobals*)kg, &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index 4e083e87d1c..c675640c599 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -19,8 +19,8 @@ #include "split/kernel_scene_intersect.h" __kernel void kernel_ocl_path_trace_scene_intersect( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_scene_intersect(kg); + kernel_scene_intersect((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index a2b48b15928..71ac2886978 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -19,8 +19,9 @@ #include "split/kernel_shader_eval.h" __kernel void kernel_ocl_path_trace_shader_eval( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_shader_eval(kg); + ccl_local unsigned int local_queue_atomics; + kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl index 1c96d67fec2..37824097031 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -19,8 +19,8 @@ #include "split/kernel_shadow_blocked_ao.h" __kernel void kernel_ocl_path_trace_shadow_blocked_ao( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_shadow_blocked_ao(kg); + kernel_shadow_blocked_ao((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl index 2231f767c0c..4889f49d8dc 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -19,8 +19,8 @@ #include "split/kernel_shadow_blocked_dl.h" __kernel void kernel_ocl_path_trace_shadow_blocked_dl( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_shadow_blocked_dl(kg); + kernel_shadow_blocked_dl((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl index 4c9bf63ef51..b23ff33786d 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl @@ -18,12 +18,12 @@ #include "split/kernel_split_common.h" __kernel void kernel_ocl_path_trace_state_buffer_size( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data, uint num_threads, ccl_global uint64_t *size) { - kg->data = data; - *size = split_data_buffer_size(kg, num_threads); + ((KernelGlobals*)kg)->data = data; + *size = split_data_buffer_size((KernelGlobals*)kg, num_threads); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 8dae79bacb0..853bba2efc5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -19,8 +19,9 @@ #include "split/kernel_subsurface_scatter.h" __kernel void kernel_ocl_path_trace_subsurface_scatter( - KernelGlobals *kg, + ccl_global char *kg, ccl_constant KernelData *data) { - kernel_subsurface_scatter(kg); + ccl_local unsigned int local_queue_atomics; + kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index e8f574c5546..f36899b884a 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -38,11 +38,11 @@ CCL_NAMESPACE_BEGIN * RAY_REGENERATED rays. * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. */ -ccl_device void kernel_buffer_update(KernelGlobals *kg) +ccl_device void kernel_buffer_update(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -188,7 +188,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg) QUEUE_ACTIVE_AND_REGENERATED_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); } diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index dfe461fb357..3d062cf0e2b 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -40,11 +40,11 @@ CCL_NAMESPACE_BEGIN * shadow_blocked function must be executed, after this kernel call * Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty. */ -ccl_device void kernel_direct_lighting(KernelGlobals *kg) +ccl_device void kernel_direct_lighting(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -130,7 +130,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg) QUEUE_SHADOW_RAY_CAST_DL_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); #endif diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 18da6e8aa3a..b1df45d6bb2 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -25,10 +25,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; } /* Fetch use_queues_flag. */ - ccl_local char local_use_queues_flag; - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_use_queues_flag = *kernel_split_params.use_queues_flag; - } + char local_use_queues_flag = *kernel_split_params.use_queues_flag; ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index bb948ad24b0..e4bf513ffdc 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -52,13 +52,13 @@ CCL_NAMESPACE_BEGIN * - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with * flag RAY_SHADOW_RAY_CAST_AO */ -ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg) +ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( + KernelGlobals *kg, + ccl_local_param BackgroundAOLocals *locals) { - ccl_local unsigned int local_queue_atomics_bg; - ccl_local unsigned int local_queue_atomics_ao; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics_bg = 0; - local_queue_atomics_ao = 0; + locals->queue_atomics_bg = 0; + locals->queue_atomics_ao = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -253,7 +253,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics_bg, + &locals->queue_atomics_bg, kernel_split_state.queue_data, kernel_split_params.queue_index); @@ -263,7 +263,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal QUEUE_SHADOW_RAY_CAST_AO_RAYS, enqueue_flag_AO_SHADOW_RAY_CAST, kernel_split_params.queue_size, - &local_queue_atomics_ao, + &locals->queue_atomics_bg, kernel_split_state.queue_data, kernel_split_params.queue_index); #endif diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index 96ca0f094b1..100f5996f83 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -18,7 +18,6 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_background(KernelGlobals *kg) { - ccl_global char *ray_state = kernel_split_state.ray_state; int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index f61643cceef..c669d79ddcd 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -29,10 +29,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) } #endif /* Fetch use_queues_flag. */ - ccl_local char local_use_queues_flag; - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_use_queues_flag = *kernel_split_params.use_queues_flag; - } + char local_use_queues_flag = *kernel_split_params.use_queues_flag; ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index ad1f6c78e8f..056fb1d8c08 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -44,11 +44,11 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays. */ -ccl_device void kernel_next_iteration_setup(KernelGlobals *kg) +ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -161,7 +161,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); } diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index f4a4657d23f..e2e841f36d3 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -35,17 +35,16 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. */ -ccl_device void kernel_queue_enqueue(KernelGlobals *kg) +ccl_device void kernel_queue_enqueue(KernelGlobals *kg, + ccl_local_param QueueEnqueueLocals *locals) { /* We have only 2 cases (Hit/Not-Hit) */ - ccl_local unsigned int local_queue_atomics[2]; - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); if(lidx == 0) { - local_queue_atomics[0] = 0; - local_queue_atomics[1] = 0; + locals->queue_atomics[0] = 0; + locals->queue_atomics[1] = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -62,18 +61,18 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) unsigned int my_lqidx; if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); + my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); } ccl_barrier(CCL_LOCAL_MEM_FENCE); if(lidx == 0) { - local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = + locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - local_queue_atomics, + locals->queue_atomics, kernel_split_params.queue_index); - local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = + locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - local_queue_atomics, + locals->queue_atomics, kernel_split_params.queue_index); } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -83,7 +82,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) my_gqidx = get_global_queue_index(queue_number, kernel_split_params.queue_size, my_lqidx, - local_queue_atomics); + locals->queue_atomics); kernel_split_state.queue_data[my_gqidx] = ray_index; } } diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index 33d0df7a2cb..66f549f59b7 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -26,10 +26,7 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_scene_intersect(KernelGlobals *kg) { /* Fetch use_queues_flag */ - ccl_local char local_use_queues_flag; - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_use_queues_flag = *kernel_split_params.use_queues_flag; - } + char local_use_queues_flag = *kernel_split_params.use_queues_flag; ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 43872c6f388..fc966b77b2c 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -22,12 +22,12 @@ CCL_NAMESPACE_BEGIN * It also identifies the rays of state RAY_TO_REGENERATE and enqueues them * in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ -ccl_device void kernel_shader_eval(KernelGlobals *kg) +ccl_device void kernel_shader_eval(KernelGlobals *kg, + ccl_local_param unsigned int *local_queue_atomics) { /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -47,7 +47,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index a1eb0d1eccd..4243e18de72 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -19,12 +19,7 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for AO. */ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) { - int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0); - - ccl_local unsigned int ao_queue_length; - if(lidx == 0) { - ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - } + unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = QUEUE_EMPTY_SLOT; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 2e5629944dc..bb8f0157965 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -19,12 +19,7 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for direct visible light. */ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) { - int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0); - - ccl_local unsigned int dl_queue_length; - if(lidx == 0) { - dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - } + unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = QUEUE_EMPTY_SLOT; diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 365d78c9f99..748197b7183 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -111,6 +111,17 @@ __device__ SplitParams __split_param_data; # define kernel_split_params (__split_param_data) #endif /* __KERNEL_CUDA__ */ +/* Local storage for queue_enqueue kernel. */ +typedef struct QueueEnqueueLocals { + uint queue_atomics[2]; +} QueueEnqueueLocals; + +/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */ +typedef struct BackgroundAOLocals { + uint queue_atomics_bg; + uint queue_atomics_ao; +} BackgroundAOLocals; + CCL_NAMESPACE_END #endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index e282ac00a63..709a296c9a0 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -17,13 +17,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) +ccl_device void kernel_subsurface_scatter(KernelGlobals *kg, + ccl_local_param unsigned int* local_queue_atomics) { #ifdef __SUBSURFACE__ - - ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - local_queue_atomics = 0; + *local_queue_atomics = 0; } ccl_barrier(CCL_LOCAL_MEM_FENCE); @@ -89,7 +88,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, enqueue_flag, kernel_split_params.queue_size, - &local_queue_atomics, + local_queue_atomics, kernel_split_state.queue_data, kernel_split_params.queue_index); |