Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHristo Gueorguiev <prem.nirved@gmail.com>2017-03-08 19:39:40 +0300
committerHristo Gueorguiev <prem.nirved@gmail.com>2017-03-08 19:39:40 +0300
commita4f8766265bf660536dd29468b3d220514760282 (patch)
tree5b8aa66c9c37256955416d4eda6a04945dd317da
parent2e42f8e73f9a25f53231a37ca3224b006a01a0d5 (diff)
Cycles: split kernel_shadow_blocked to AO & DL parts
Reduces memory allocation for split kernel. This allows for faster rendering due to bigger global size, specially when GPU memory is limited. Perfromance results: R9 290 total render time Before After Change BMW 4:37 4:34 -1.1 % Classroom 14:43 14:30 -1.5 % Fishy Cat 11:20 11:04 -2.4 % Koro 12:11 12:04 -1.0 % Pabellon Barcelona 22:01 20:44 -5.8 % Pabellon Barcelona(*) 15:32 15:09 -2.5 % (*) without glossy connected to volume
-rw-r--r--intern/cycles/device/device_split_kernel.cpp18
-rw-r--r--intern/cycles/device/device_split_kernel.h3
-rw-r--r--intern/cycles/kernel/CMakeLists.txt9
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h9
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl (renamed from intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl)6
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl3
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h (renamed from intern/cycles/kernel/split/kernel_shadow_blocked.h)42
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h91
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h2
11 files changed, 157 insertions, 55 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 52ef9fdd57a..aa85d384745 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -42,7 +42,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL;
- kernel_shadow_blocked = NULL;
+ kernel_shadow_blocked_ao = NULL;
+ kernel_shadow_blocked_dl = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
@@ -66,7 +67,8 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
delete kernel_direct_lighting;
- delete kernel_shadow_blocked;
+ delete kernel_shadow_blocked_ao;
+ delete kernel_shadow_blocked_dl;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
@@ -90,7 +92,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting);
- LOAD_KERNEL(shadow_blocked);
+ LOAD_KERNEL(shadow_blocked_ao);
+ LOAD_KERNEL(shadow_blocked_dl);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
@@ -222,12 +225,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
bool activeRaysAvailable = true;
while(activeRaysAvailable) {
- /* Twice the global work size of other kernels for
- * ckPathTraceKernel_shadow_blocked_direct_lighting. */
- size_t global_size_shadow_blocked[2];
- global_size_shadow_blocked[0] = global_size[0] * 2;
- global_size_shadow_blocked[1] = global_size[1];
-
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
for(int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
@@ -239,7 +236,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
- ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
+ ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index abaf350cbbb..ae61f9e38c1 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -65,7 +65,8 @@ private:
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting;
- SplitKernelFunction *kernel_shadow_blocked;
+ SplitKernelFunction *kernel_shadow_blocked_ao;
+ SplitKernelFunction *kernel_shadow_blocked_dl;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index a9fb7b74815..e285c382946 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -27,7 +27,8 @@ set(SRC
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl
- kernels/opencl/kernel_shadow_blocked.cl
+ kernels/opencl/kernel_shadow_blocked_ao.cl
+ kernels/opencl/kernel_shadow_blocked_dl.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
@@ -214,7 +215,8 @@ set(SRC_SPLIT_HEADERS
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_eval.h
- split/kernel_shadow_blocked.h
+ split/kernel_shadow_blocked_ao.h
+ split/kernel_shadow_blocked_dl.h
split/kernel_split_common.h
split/kernel_split_data.h
split/kernel_subsurface_scatter.h
@@ -421,7 +423,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.c
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked_dl.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 8ce420d8a48..896b80d783e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -81,7 +81,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 8c519a21d95..ba6b1033915 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -48,7 +48,8 @@
# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "split/kernel_subsurface_scatter.h"
# include "split/kernel_direct_lighting.h"
-# include "split/kernel_shadow_blocked.h"
+# include "split/kernel_shadow_blocked_ao.h"
+# include "split/kernel_shadow_blocked_dl.h"
# include "split/kernel_next_iteration_setup.h"
# include "split/kernel_indirect_subsurface.h"
# include "split/kernel_buffer_update.h"
@@ -177,7 +178,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
+DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
@@ -204,7 +206,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
REGISTER(holdout_emission_blurring_pathtermination_ao);
REGISTER(subsurface_scatter);
REGISTER(direct_lighting);
- REGISTER(shadow_blocked);
+ REGISTER(shadow_blocked_ao);
+ REGISTER(shadow_blocked_dl);
REGISTER(next_iteration_setup);
REGISTER(indirect_subsurface);
REGISTER(buffer_update);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
index 3693f7f9c9d..1c96d67fec2 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
@@ -16,11 +16,11 @@
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
-#include "split/kernel_shadow_blocked.h"
+#include "split/kernel_shadow_blocked_ao.h"
-__kernel void kernel_ocl_path_trace_shadow_blocked(
+__kernel void kernel_ocl_path_trace_shadow_blocked_ao(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
- kernel_shadow_blocked(kg);
+ kernel_shadow_blocked_ao(kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
new file mode 100644
index 00000000000..2231f767c0c
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_shadow_blocked_dl.h"
+
+__kernel void kernel_ocl_path_trace_shadow_blocked_dl(
+ KernelGlobals *kg,
+ ccl_constant KernelData *data)
+{
+ kernel_shadow_blocked_dl(kg);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 7a947c48e60..2d9e64824e7 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -27,7 +27,8 @@
#include "kernel_holdout_emission_blurring_pathtermination_ao.cl"
#include "kernel_subsurface_scatter.cl"
#include "kernel_direct_lighting.cl"
-#include "kernel_shadow_blocked.cl"
+#include "kernel_shadow_blocked_ao.cl"
+#include "kernel_shadow_blocked_dl.cl"
#include "kernel_next_iteration_setup.cl"
#include "kernel_indirect_subsurface.cl"
#include "kernel_buffer_update.cl"
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 52f7002acb3..e153c16bd68 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -36,42 +36,28 @@ CCL_NAMESPACE_BEGIN
*
* Note on sd_shadow : sd_shadow is neither input nor output to this kernel. sd_shadow is filled and consumed in this kernel itself.
* Note on queues :
- * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS queues. We will empty
- * these queues this kernel.
+ * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS queue. We will empty this queues in this kernel.
* State of queues when this kernel is called :
* state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same
* before and after this kernel call.
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO
- * and RAY_SHADOW_RAY_CAST_DL respectively, during kernel entry.
- * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit.
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO during kernel entry.
+ * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty at kernel exit.
*/
-ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
+ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
{
int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0);
ccl_local unsigned int ao_queue_length;
- ccl_local unsigned int dl_queue_length;
if(lidx == 0) {
ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
- dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
- /* flag determining if the current ray is to process shadow ray for AO or DL */
- char shadow_blocked_type = -1;
-
int ray_index = QUEUE_EMPTY_SLOT;
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index < ao_queue_length + dl_queue_length) {
- if(thread_index < ao_queue_length) {
- ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO;
- } else {
- ray_index = get_ray_index(kg, thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL;
- }
+ if(thread_index < ao_queue_length) {
+ ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
if(ray_index == QUEUE_EMPTY_SLOT)
@@ -80,22 +66,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
/* Flag determining if we need to update L. */
char update_path_radiance = 0;
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
- IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
- {
+ if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *light_ray_dl_global = &kernel_split_state.light_ray[ray_index];
- ccl_global Ray *light_ray_ao_global = &kernel_split_state.ao_light_ray[ray_index];
-
- ccl_global Ray *light_ray_global =
- shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO
- ? light_ray_ao_global
- : light_ray_dl_global;
+ ccl_global Ray *light_ray_global = &kernel_split_state.ao_light_ray[ray_index];
float3 shadow;
Ray ray = *light_ray_global;
update_path_radiance = !(shadow_blocked(kg,
- &kernel_split_state.sd_DL_shadow[thread_index],
+ &kernel_split_state.sd_DL_shadow[ray_index],
state,
&ray,
&shadow));
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
new file mode 100644
index 00000000000..cfd8d78c2de
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Note on kernel_shadow_blocked kernel.
+ * This is the ninth kernel in the ray tracing logic. This is the eighth
+ * of the path iteration kernels. This kernel takes care of "shadow ray cast"
+ * logic of the direct lighting and AO part of ray tracing.
+ *
+ * The input and output are as follows,
+ *
+ * PathState_coop ----------------------------------|--- kernel_shadow_blocked --|
+ * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop
+ * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop
+ * ray_state ---------------------------------------| |--- ray_state
+ * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS)
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| |
+ * Queue_index(QUEUE_SHADOW_RAY_CAST_AO_RAYS&
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| |
+ * kg (globals) ------------------------------------| |
+ * queuesize ---------------------------------------| |
+ *
+ * Note on sd_shadow : sd_shadow is neither input nor output to this kernel. sd_shadow is filled and consumed in this kernel itself.
+ * Note on queues :
+ * The kernel fetches from QUEUE_SHADOW_RAY_CAST_DL_RAYS queue. We will empty this queue in this kernel.
+ * State of queues when this kernel is called :
+ * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same
+ * before and after this kernel call.
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_DL, during kernel entry.
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit.
+ */
+ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
+{
+ int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0);
+
+ ccl_local unsigned int dl_queue_length;
+ if(lidx == 0) {
+ dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(thread_index < dl_queue_length) {
+ ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
+ }
+
+ if(ray_index == QUEUE_EMPTY_SLOT)
+ return;
+
+ /* Flag determining if we need to update L. */
+ char update_path_radiance = 0;
+
+ if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global Ray *light_ray_global = &kernel_split_state.light_ray[ray_index];
+
+ float3 shadow;
+ Ray ray = *light_ray_global;
+ update_path_radiance = !(shadow_blocked(kg,
+ &kernel_split_state.sd_DL_shadow[ray_index],
+ state,
+ &ray,
+ &shadow));
+
+ *light_ray_global = ray;
+ /* We use light_ray_global's P and t to store shadow and
+ * update_path_radiance.
+ */
+ light_ray_global->P = shadow;
+ light_ray_global->t = update_path_radiance;
+ }
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index ab22c2e4018..772a3ab2fb9 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -78,7 +78,7 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 2) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */