Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHristo Gueorguiev <prem.nirved@gmail.com>2017-03-08 17:42:26 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-03-09 19:09:37 +0300
commit57e26627c485baab63e108821b2712d5e234ae7c (patch)
treee3ee5aa4bf759121559c69504b77d600552bf527 /intern/cycles/kernel/split
parent6c942db30dee14eb37229879656fa049a9ac6df6 (diff)
Cycles: SSS and Volume rendering in split kernel
Decoupled ray marching is not supported yet. Transparent shadows are always enabled for volume rendering. Changes in kernel/bvh and kernel/geom are from Sergey. This simiplifies code significantly, and prepares it for record-all transparent shadow function in split kernel.
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h (renamed from intern/cycles/kernel/split/kernel_background_buffer_update.h)25
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h97
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h87
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h77
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h9
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h4
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h6
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h13
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h4
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked.h4
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h7
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h20
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h11
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h86
14 files changed, 411 insertions, 39 deletions
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 04aaf1bbaad..e42605c88e7 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN
* QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
*/
-ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
+ccl_device void kernel_buffer_update(KernelGlobals *kg)
{
ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
- if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- /* eval background shader if nothing hit */
- if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
- *L_transparent = (*L_transparent) + average((*throughput));
-#ifdef __PASSES__
- if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-#ifdef __BACKGROUND__
- /* sample background shader */
- float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
- path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
-#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
- }
-
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
float3 L_sum = path_radiance_clamp_and_sum(kg, L);
kernel_write_light_passes(kg, buffer, L, sample);
@@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
*L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray);
+#ifdef __SUBSURFACE__
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+#endif
#ifdef __KERNEL_DEBUG__
debug_data_init(debug_data);
#endif
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
new file mode 100644
index 00000000000..18da6e8aa3a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+
+ccl_device void kernel_do_volume(KernelGlobals *kg)
+{
+#ifdef __VOLUME__
+ /* We will empty this queue in this kernel. */
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ }
+ /* Fetch use_queues_flag. */
+ ccl_local char local_use_queues_flag;
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ local_use_queues_flag = *kernel_split_params.use_queues_flag;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(local_use_queues_flag) {
+ ray_index = get_ray_index(kg, ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ }
+
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+
+ bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+ /* volume attenuation, emission, scatter */
+ if(state->volume_stack[0].shader != SHADER_NONE) {
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray))
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
+ else
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+# endif
+ }
+ }
+ }
+
+#endif
+}
+
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
new file mode 100644
index 00000000000..e314a98105e
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_indirect_background(KernelGlobals *kg)
+{
+ /*
+ ccl_local unsigned int local_queue_atomics;
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ // */
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg, ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
+
+ if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ /* eval background shader if nothing hit */
+ if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
+ *L_transparent = (*L_transparent) + average((*throughput));
+#ifdef __PASSES__
+ if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+
+ if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+#ifdef __BACKGROUND__
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
+ path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
+#endif
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+ }
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
new file mode 100644
index 00000000000..a56e85abeb9
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
+{
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+
+ int ray_index;
+ get_ray_index(kg, thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ ray_index = get_ray_index(kg, thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+#ifdef __SUBSURFACE__
+
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+
+ if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ kernel_path_subsurface_accum_indirect(ss_indirect, L);
+
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if(ss_indirect->num_rays) {
+ kernel_path_subsurface_setup_indirect(kg,
+ ss_indirect,
+ state,
+ ray,
+ L,
+ throughput);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+ }
+
+#endif /* __SUBSURFACE__ */
+
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index 261625da31d..84de231b78c 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_lamp_emission(KernelGlobals *kg)
{
+#ifndef __VOLUME__
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
}
+#endif
/* Fetch use_queues_flag. */
ccl_local char local_use_queues_flag;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
- 1);
+#ifndef __VOLUME__
+ 1
+#else
+ 0
+#endif
+ );
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index fe3c9e1e8a2..f44aff30fa9 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
&kernel_split_state.rng[ray_index],
my_sample,
&kernel_split_state.ray[ray_index]);
+#ifdef __SUBSURFACE__
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+#endif
+
#ifdef __KERNEL_DEBUG__
debug_data_init(&kernel_split_state.debug_data[ray_index]);
#endif
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index 66aad705bd4..70ec92b394b 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
int queue_number = -1;
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
- else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
}
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index a7e0c7692a2..144cba67e23 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
#endif
- Intersection *isect = &kernel_split_state.isect[ray_index];
+ Intersection isect;
PathState state = kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
@@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
}
- bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax);
+ bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
- bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f);
+ bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
#endif
+ kernel_split_state.isect[ray_index] = isect;
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
- debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
- debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
- debug_data->num_bvh_intersections += isect->num_intersections;
+ debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
+ debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
+ debug_data->num_bvh_intersections += isect.num_intersections;
}
debug_data->num_ray_bounces++;
#endif
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 35ee19ddf1b..4bd5c8b6eb0 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
/* Continue on with shader evaluation. */
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- Intersection *isect = &kernel_split_state.isect[ray_index];
+ Intersection isect = kernel_split_state.isect[ray_index];
ccl_global uint *rng = &kernel_split_state.rng[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
shader_setup_from_ray(kg,
&kernel_split_state.sd[ray_index],
- isect,
+ &isect,
&ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h
index d532c7cf55b..52f7002acb3 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h
@@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
: light_ray_dl_global;
float3 shadow;
+ Ray ray = *light_ray_global;
update_path_radiance = !(shadow_blocked(kg,
&kernel_split_state.sd_DL_shadow[thread_index],
state,
- light_ray_global,
+ &ray,
&shadow));
+ *light_ray_global = ray;
/* We use light_ray_global's P and t to store shadow and
* update_path_radiance.
*/
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index dd0c3f9c941..5c12fe426ac 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -52,11 +52,11 @@
#include "kernel_passes.h"
#ifdef __SUBSURFACE__
-#include "kernel_subsurface.h"
+# include "kernel_subsurface.h"
#endif
#ifdef __VOLUME__
-#include "kernel_volume.h"
+# include "kernel_volume.h"
#endif
#include "kernel_path_state.h"
@@ -65,9 +65,10 @@
#include "kernel_path_common.h"
#include "kernel_path_surface.h"
#include "kernel_path_volume.h"
+#include "kernel_path_subsurface.h"
#ifdef __KERNEL_DEBUG__
-#include "kernel_debug.h"
+# include "kernel_debug.h"
#endif
#include "kernel_queues.h"
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 5380c0c5de6..81dcdbaedde 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+#ifdef __SUBSURFACE__
+ size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */
+#endif
+
+#ifdef __VOLUME__
+ size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */
+#endif
+
return size;
}
@@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
#define SPLIT_DATA_ENTRY(type, name, num) \
split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
- SPLIT_DATA_ENTRIES
+ SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+#ifdef __SUBSURFACE__
+ split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p;
+ p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16);
+#endif
+
+#ifdef __VOLUME__
+ split_data->state_shadow = (ccl_global PathState*)p;
+ p += align_up(2 * num_elements * sizeof(PathState), 16);
+#endif
+
split_data->ray_state = ray_state;
}
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 62e3ea45ae2..b39ed4995dc 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -68,14 +68,13 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
- SPLIT_DATA_ENTRY(Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
@@ -88,6 +87,14 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
+#ifdef __SUBSURFACE__
+ ccl_global SubsurfaceIndirectRays *ss_rays;
+#endif
+
+#ifdef __VOLUME__
+ ccl_global PathState *state_shadow;
+#endif
+
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
* the host easily) but is still used the same as the other data so we have it here in this struct as well
*/
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
new file mode 100644
index 00000000000..fcdd805f27b
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -0,0 +1,86 @@
+
+
+CCL_NAMESPACE_BEGIN
+
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+#ifdef __SUBSURFACE__
+
+ ccl_local unsigned int local_queue_atomics;
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg, ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+
+
+ char enqueue_flag = 0;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ if(sd->flag & SD_BSSRDF) {
+ if(kernel_path_subsurface_scatter(kg,
+ sd,
+ emission_sd,
+ L,
+ state,
+ rng,
+ ray,
+ throughput,
+ ss_indirect)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ enqueue_flag = 1;
+ }
+ }
+ }
+
+#ifndef __COMPUTE_DEVICE_GPU__
+ }
+#endif
+
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ &local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#endif /* __SUBSURFACE__ */
+
+}
+
+CCL_NAMESPACE_END