Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2017-03-16 17:42:49 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-03-16 17:42:49 +0300
commitfc61cdf1428dc74f4ef9b834de9082f910ce08e0 (patch)
treeba4f4dc67a11c83c3dea90803e578053f0aa734b /intern
parentdd3ae7bad70ecfd466791f371b138b5b958e0a19 (diff)
parent0434053f1329384c312fd07812bce48ec9c28f50 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp11
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h21
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl5
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl5
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl7
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl5
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_path_init.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl5
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl5
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl4
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl6
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl5
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h8
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h8
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h5
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h14
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h1
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h5
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h8
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h21
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h5
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h8
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h7
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h7
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h11
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h9
34 files changed, 137 insertions, 114 deletions
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index a09d93c625e..3faae4039e3 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -346,9 +346,18 @@ public:
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
{
+ cl_device_type type;
+ clGetDeviceInfo(device->cdDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
+
+ /* Use small global size on CPU devices as it seems to be much faster. */
+ if(type == CL_DEVICE_TYPE_CPU) {
+ VLOG(1) << "Global size: (64, 64).";
+ return make_int2(64, 64);
+ }
+
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
- VLOG(1) << "Maximum device allocation side: "
+ VLOG(1) << "Maximum device allocation size: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index ba6b1033915..e220d857384 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -168,21 +168,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
kernel_##name(kg); \
}
+#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
+ { \
+ ccl_local type locals; \
+ kernel_##name(kg, &locals); \
+ }
+
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
{
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index fbdf79697d5..4479a044921 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -93,21 +93,30 @@ kernel_cuda_path_trace_data_init(
kernel_##name(NULL); \
}
+#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ extern "C" __global__ void \
+ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
+ kernel_cuda_##name() \
+ { \
+ ccl_local type locals; \
+ kernel_##name(NULL, &locals); \
+ }
+
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
index d3058501f27..b61f1cda330 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
@@ -19,8 +19,9 @@
#include "split/kernel_buffer_update.h"
__kernel void kernel_ocl_path_trace_buffer_update(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_buffer_update(kg);
+ ccl_local unsigned int local_queue_atomics;
+ kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 1e3c4fa28c7..54d4a577e10 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -19,7 +19,7 @@
#include "split/kernel_data_init.h"
__kernel void kernel_ocl_path_trace_data_init(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global void *split_data_buffer,
int num_elements,
@@ -40,7 +40,7 @@ __kernel void kernel_ocl_path_trace_data_init(
unsigned int num_samples, /* Total number of samples per pixel */
ccl_global float *buffer)
{
- kernel_data_init(kg,
+ kernel_data_init((KernelGlobals*)kg,
data,
split_data_buffer,
num_elements,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
index 5d2f46b319d..374be6cbd05 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
@@ -19,8 +19,9 @@
#include "split/kernel_direct_lighting.h"
__kernel void kernel_ocl_path_trace_direct_lighting(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_direct_lighting(kg);
+ ccl_local unsigned int local_queue_atomics;
+ kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
index 6380e9cb746..08187b0e03e 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
@@ -19,8 +19,8 @@
#include "split/kernel_do_volume.h"
__kernel void kernel_ocl_path_trace_do_volume(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_do_volume(kg);
+ kernel_do_volume((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
index 7724b8a0bdf..351687e2036 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
@@ -19,8 +19,11 @@
#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_holdout_emission_blurring_pathtermination_ao(kg);
+ ccl_local BackgroundAOLocals locals;
+ kernel_holdout_emission_blurring_pathtermination_ao(
+ (KernelGlobals*)kg,
+ &locals);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
index 671501bf237..b18fba4c01f 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
@@ -19,8 +19,8 @@
#include "split/kernel_indirect_background.h"
__kernel void kernel_ocl_path_trace_indirect_background(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_indirect_background(kg);
+ kernel_indirect_background((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
index b5e52e81ebf..ce2e96ad789 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
@@ -19,8 +19,8 @@
#include "split/kernel_indirect_subsurface.h"
__kernel void kernel_ocl_path_trace_indirect_subsurface(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_indirect_subsurface(kg);
+ kernel_indirect_subsurface((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
index 2b84d0ea43e..830e4e373ac 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
@@ -19,8 +19,8 @@
#include "split/kernel_lamp_emission.h"
__kernel void kernel_ocl_path_trace_lamp_emission(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_lamp_emission(kg);
+ kernel_lamp_emission((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
index e87e367fb9c..fd49ed5def8 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
@@ -19,8 +19,9 @@
#include "split/kernel_next_iteration_setup.h"
__kernel void kernel_ocl_path_trace_next_iteration_setup(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_next_iteration_setup(kg);
+ ccl_local unsigned int local_queue_atomics;
+ kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
index 7e9e4a02529..8194f5d22ca 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
@@ -19,8 +19,8 @@
#include "split/kernel_path_init.h"
__kernel void kernel_ocl_path_trace_path_init(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_path_init(kg);
+ kernel_path_init((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
index 9ceb6a5c3d8..6dd9d39c4e2 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
@@ -19,8 +19,9 @@
#include "split/kernel_queue_enqueue.h"
__kernel void kernel_ocl_path_trace_queue_enqueue(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_queue_enqueue(kg);
+ ccl_local QueueEnqueueLocals locals;
+ kernel_queue_enqueue((KernelGlobals*)kg, &locals);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
index 4e083e87d1c..c675640c599 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
@@ -19,8 +19,8 @@
#include "split/kernel_scene_intersect.h"
__kernel void kernel_ocl_path_trace_scene_intersect(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_scene_intersect(kg);
+ kernel_scene_intersect((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index a2b48b15928..71ac2886978 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -19,8 +19,9 @@
#include "split/kernel_shader_eval.h"
__kernel void kernel_ocl_path_trace_shader_eval(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_shader_eval(kg);
+ ccl_local unsigned int local_queue_atomics;
+ kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
index 1c96d67fec2..37824097031 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
@@ -19,8 +19,8 @@
#include "split/kernel_shadow_blocked_ao.h"
__kernel void kernel_ocl_path_trace_shadow_blocked_ao(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_shadow_blocked_ao(kg);
+ kernel_shadow_blocked_ao((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
index 2231f767c0c..4889f49d8dc 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
@@ -19,8 +19,8 @@
#include "split/kernel_shadow_blocked_dl.h"
__kernel void kernel_ocl_path_trace_shadow_blocked_dl(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_shadow_blocked_dl(kg);
+ kernel_shadow_blocked_dl((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
index 4c9bf63ef51..b23ff33786d 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
@@ -18,12 +18,12 @@
#include "split/kernel_split_common.h"
__kernel void kernel_ocl_path_trace_state_buffer_size(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data,
uint num_threads,
ccl_global uint64_t *size)
{
- kg->data = data;
- *size = split_data_buffer_size(kg, num_threads);
+ ((KernelGlobals*)kg)->data = data;
+ *size = split_data_buffer_size((KernelGlobals*)kg, num_threads);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 8dae79bacb0..853bba2efc5 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -19,8 +19,9 @@
#include "split/kernel_subsurface_scatter.h"
__kernel void kernel_ocl_path_trace_subsurface_scatter(
- KernelGlobals *kg,
+ ccl_global char *kg,
ccl_constant KernelData *data)
{
- kernel_subsurface_scatter(kg);
+ ccl_local unsigned int local_queue_atomics;
+ kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
}
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index e8f574c5546..f36899b884a 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -38,11 +38,11 @@ CCL_NAMESPACE_BEGIN
* RAY_REGENERATED rays.
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty.
*/
-ccl_device void kernel_buffer_update(KernelGlobals *kg)
+ccl_device void kernel_buffer_update(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
{
- ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics = 0;
+ *local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -188,7 +188,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg)
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics,
+ local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
}
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index dfe461fb357..3d062cf0e2b 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -40,11 +40,11 @@ CCL_NAMESPACE_BEGIN
* shadow_blocked function must be executed, after this kernel call
* Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
*/
-ccl_device void kernel_direct_lighting(KernelGlobals *kg)
+ccl_device void kernel_direct_lighting(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
{
- ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics = 0;
+ *local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -130,7 +130,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg)
QUEUE_SHADOW_RAY_CAST_DL_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics,
+ local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
#endif
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 18da6e8aa3a..b1df45d6bb2 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -25,10 +25,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
}
/* Fetch use_queues_flag. */
- ccl_local char local_use_queues_flag;
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_use_queues_flag = *kernel_split_params.use_queues_flag;
- }
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index bb948ad24b0..e4bf513ffdc 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -52,13 +52,13 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
* flag RAY_SHADOW_RAY_CAST_AO
*/
-ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobals *kg)
+ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
+ KernelGlobals *kg,
+ ccl_local_param BackgroundAOLocals *locals)
{
- ccl_local unsigned int local_queue_atomics_bg;
- ccl_local unsigned int local_queue_atomics_ao;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics_bg = 0;
- local_queue_atomics_ao = 0;
+ locals->queue_atomics_bg = 0;
+ locals->queue_atomics_ao = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -253,7 +253,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics_bg,
+ &locals->queue_atomics_bg,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
@@ -263,7 +263,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
QUEUE_SHADOW_RAY_CAST_AO_RAYS,
enqueue_flag_AO_SHADOW_RAY_CAST,
kernel_split_params.queue_size,
- &local_queue_atomics_ao,
+ &locals->queue_atomics_bg,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
#endif
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 96ca0f094b1..100f5996f83 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -18,7 +18,6 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_background(KernelGlobals *kg)
{
-
ccl_global char *ray_state = kernel_split_state.ray_state;
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index f61643cceef..c669d79ddcd 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -29,10 +29,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
}
#endif
/* Fetch use_queues_flag. */
- ccl_local char local_use_queues_flag;
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_use_queues_flag = *kernel_split_params.use_queues_flag;
- }
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index ad1f6c78e8f..056fb1d8c08 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -44,11 +44,11 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
*/
-ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
+ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
{
- ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics = 0;
+ *local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -161,7 +161,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics,
+ local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
}
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index f4a4657d23f..e2e841f36d3 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -35,17 +35,16 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
*/
-ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
+ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
+ ccl_local_param QueueEnqueueLocals *locals)
{
/* We have only 2 cases (Hit/Not-Hit) */
- ccl_local unsigned int local_queue_atomics[2];
-
int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(lidx == 0) {
- local_queue_atomics[0] = 0;
- local_queue_atomics[1] = 0;
+ locals->queue_atomics[0] = 0;
+ locals->queue_atomics[1] = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -62,18 +61,18 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
unsigned int my_lqidx;
if(queue_number != -1) {
- my_lqidx = get_local_queue_index(queue_number, local_queue_atomics);
+ my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
if(lidx == 0) {
- local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
+ locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- local_queue_atomics,
+ locals->queue_atomics,
kernel_split_params.queue_index);
- local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
+ locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- local_queue_atomics,
+ locals->queue_atomics,
kernel_split_params.queue_index);
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -83,7 +82,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
my_gqidx = get_global_queue_index(queue_number,
kernel_split_params.queue_size,
my_lqidx,
- local_queue_atomics);
+ locals->queue_atomics);
kernel_split_state.queue_data[my_gqidx] = ray_index;
}
}
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 33d0df7a2cb..66f549f59b7 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -26,10 +26,7 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_scene_intersect(KernelGlobals *kg)
{
/* Fetch use_queues_flag */
- ccl_local char local_use_queues_flag;
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_use_queues_flag = *kernel_split_params.use_queues_flag;
- }
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 43872c6f388..fc966b77b2c 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -22,12 +22,12 @@ CCL_NAMESPACE_BEGIN
* It also identifies the rays of state RAY_TO_REGENERATE and enqueues them
* in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
*/
-ccl_device void kernel_shader_eval(KernelGlobals *kg)
+ccl_device void kernel_shader_eval(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
{
/* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
- ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics = 0;
+ *local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -47,7 +47,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics,
+ local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index a1eb0d1eccd..4243e18de72 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -19,12 +19,7 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for AO. */
ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
{
- int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0);
-
- ccl_local unsigned int ao_queue_length;
- if(lidx == 0) {
- ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
- }
+ unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = QUEUE_EMPTY_SLOT;
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 2e5629944dc..bb8f0157965 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -19,12 +19,7 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for direct visible light. */
ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
{
- int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0);
-
- ccl_local unsigned int dl_queue_length;
- if(lidx == 0) {
- dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
- }
+ unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = QUEUE_EMPTY_SLOT;
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 365d78c9f99..748197b7183 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -111,6 +111,17 @@ __device__ SplitParams __split_param_data;
# define kernel_split_params (__split_param_data)
#endif /* __KERNEL_CUDA__ */
+/* Local storage for queue_enqueue kernel. */
+typedef struct QueueEnqueueLocals {
+ uint queue_atomics[2];
+} QueueEnqueueLocals;
+
+/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
+typedef struct BackgroundAOLocals {
+ uint queue_atomics_bg;
+ uint queue_atomics_ao;
+} BackgroundAOLocals;
+
CCL_NAMESPACE_END
#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index e282ac00a63..709a296c9a0 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -17,13 +17,12 @@
CCL_NAMESPACE_BEGIN
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
+ ccl_local_param unsigned int* local_queue_atomics)
{
#ifdef __SUBSURFACE__
-
- ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- local_queue_atomics = 0;
+ *local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -89,7 +88,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
- &local_queue_atomics,
+ local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);