diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-03-08 17:42:26 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-09 19:09:37 +0300 |
commit | 57e26627c485baab63e108821b2712d5e234ae7c (patch) | |
tree | e3ee5aa4bf759121559c69504b77d600552bf527 /intern/cycles/kernel/split | |
parent | 6c942db30dee14eb37229879656fa049a9ac6df6 (diff) |
Cycles: SSS and Volume rendering in split kernel
Decoupled ray marching is not supported yet.
Transparent shadows are always enabled for volume rendering.
Changes in kernel/bvh and kernel/geom are from Sergey.
This simiplifies code significantly, and prepares it for
record-all transparent shadow function in split kernel.
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r-- | intern/cycles/kernel/split/kernel_buffer_update.h (renamed from intern/cycles/kernel/split/kernel_background_buffer_update.h) | 25 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_do_volume.h | 97 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_indirect_background.h | 87 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_indirect_subsurface.h | 77 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_lamp_emission.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_path_init.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_queue_enqueue.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_scene_intersect.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shader_eval.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_split_common.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_split_data.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_split_data_types.h | 11 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_subsurface_scatter.h | 86 |
14 files changed, 411 insertions, 39 deletions
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 04aaf1bbaad..e42605c88e7 100644 --- a/intern/cycles/kernel/split/kernel_background_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty */ -ccl_device void kernel_background_buffer_update(KernelGlobals *kg) +ccl_device void kernel_buffer_update(KernelGlobals *kg) { ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { @@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg) rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { - *L_transparent = (*L_transparent) + average((*throughput)); -#ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { -#ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); - path_radiance_accum_background(L, (*throughput), L_background, state->bounce); -#endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - } - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { float3 L_sum = path_radiance_clamp_and_sum(kg, L); kernel_write_light_passes(kg, buffer, L, sample); @@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg) *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray); +#ifdef __SUBSURFACE__ + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); +#endif #ifdef __KERNEL_DEBUG__ debug_data_init(debug_data); #endif diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h new file mode 100644 index 00000000000..18da6e8aa3a --- /dev/null +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -0,0 +1,97 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + + +ccl_device void kernel_do_volume(KernelGlobals *kg) +{ +#ifdef __VOLUME__ + /* We will empty this queue in this kernel. */ + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } + /* Fetch use_queues_flag. */ + ccl_local char local_use_queues_flag; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_use_queues_flag = *kernel_split_params.use_queues_flag; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(local_use_queues_flag) { + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } + + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + + bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global RNG *rng = &kernel_split_state.rng[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index]; + + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + /* volume attenuation, emission, scatter */ + if(state->volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = *ray; + volume_ray.t = (hit)? isect->t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L); + + /* indirect light bounce */ + if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray)) + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED); + else + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER); + } +# endif + } + } + } + +#endif +} + + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h new file mode 100644 index 00000000000..e314a98105e --- /dev/null +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -0,0 +1,87 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_indirect_background(KernelGlobals *kg) +{ + /* + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + // */ + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + /* eval background shader if nothing hit */ + if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { + *L_transparent = (*L_transparent) + average((*throughput)); +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { +#ifdef __BACKGROUND__ + /* sample background shader */ + float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); + path_radiance_accum_background(L, (*throughput), L_background, state->bounce); +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + } + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h new file mode 100644 index 00000000000..a56e85abeb9 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h @@ -0,0 +1,77 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_indirect_subsurface(KernelGlobals *kg) +{ + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + + int ray_index; + get_ray_index(kg, thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + ray_index = get_ray_index(kg, thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + +#ifdef __SUBSURFACE__ + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + kernel_path_subsurface_accum_indirect(ss_indirect, L); + + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if(ss_indirect->num_rays) { + kernel_path_subsurface_setup_indirect(kg, + ss_indirect, + state, + ray, + L, + throughput); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + } + +#endif /* __SUBSURFACE__ */ + +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index 261625da31d..84de231b78c 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_lamp_emission(KernelGlobals *kg) { +#ifndef __VOLUME__ /* We will empty this queue in this kernel. */ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; } +#endif /* Fetch use_queues_flag. */ ccl_local char local_use_queues_flag; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { @@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) QUEUE_ACTIVE_AND_REGENERATED_RAYS, kernel_split_state.queue_data, kernel_split_params.queue_size, - 1); +#ifndef __VOLUME__ + 1 +#else + 0 +#endif + ); if(ray_index == QUEUE_EMPTY_SLOT) { return; } diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index fe3c9e1e8a2..f44aff30fa9 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { &kernel_split_state.rng[ray_index], my_sample, &kernel_split_state.ray[ray_index]); +#ifdef __SUBSURFACE__ + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); +#endif + #ifdef __KERNEL_DEBUG__ debug_data_init(&kernel_split_state.debug_data[ray_index]); #endif diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index 66aad705bd4..70ec92b394b 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) int queue_number = -1; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) { queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; } - else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; } diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index a7e0c7692a2..144cba67e23 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) #ifdef __KERNEL_DEBUG__ DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; #endif - Intersection *isect = &kernel_split_state.isect[ray_index]; + Intersection isect; PathState state = kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; @@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) lcg_state = lcg_state_init(&rng, &state, 0x51633e2d); } - bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax); + bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); #else - bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f); + bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); #endif + kernel_split_state.isect[ray_index] = isect; #ifdef __KERNEL_DEBUG__ if(state.flag & PATH_RAY_CAMERA) { - debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes; - debug_data->num_bvh_traversed_instances += isect->num_traversed_instances; - debug_data->num_bvh_intersections += isect->num_intersections; + debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes; + debug_data->num_bvh_traversed_instances += isect.num_traversed_instances; + debug_data->num_bvh_intersections += isect.num_intersections; } debug_data->num_ray_bounces++; #endif diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 35ee19ddf1b..4bd5c8b6eb0 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) /* Continue on with shader evaluation. */ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - Intersection *isect = &kernel_split_state.isect[ray_index]; + Intersection isect = kernel_split_state.isect[ray_index]; ccl_global uint *rng = &kernel_split_state.rng[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; shader_setup_from_ray(kg, &kernel_split_state.sd[ray_index], - isect, + &isect, &ray); float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF); shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h index d532c7cf55b..52f7002acb3 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg) : light_ray_dl_global; float3 shadow; + Ray ray = *light_ray_global; update_path_radiance = !(shadow_blocked(kg, &kernel_split_state.sd_DL_shadow[thread_index], state, - light_ray_global, + &ray, &shadow)); + *light_ray_global = ray; /* We use light_ray_global's P and t to store shadow and * update_path_radiance. */ diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index dd0c3f9c941..5c12fe426ac 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -52,11 +52,11 @@ #include "kernel_passes.h" #ifdef __SUBSURFACE__ -#include "kernel_subsurface.h" +# include "kernel_subsurface.h" #endif #ifdef __VOLUME__ -#include "kernel_volume.h" +# include "kernel_volume.h" #endif #include "kernel_path_state.h" @@ -65,9 +65,10 @@ #include "kernel_path_common.h" #include "kernel_path_surface.h" #include "kernel_path_volume.h" +#include "kernel_path_subsurface.h" #ifdef __KERNEL_DEBUG__ -#include "kernel_debug.h" +# include "kernel_debug.h" #endif #include "kernel_queues.h" diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 5380c0c5de6..81dcdbaedde 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */ +#endif + +#ifdef __VOLUME__ + size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */ +#endif + return size; } @@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg, #define SPLIT_DATA_ENTRY(type, name, num) \ split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16); - SPLIT_DATA_ENTRIES + SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p; + p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); +#endif + +#ifdef __VOLUME__ + split_data->state_shadow = (ccl_global PathState*)p; + p += align_up(2 * num_elements * sizeof(PathState), 16); +#endif + split_data->ray_state = ray_state; } diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 62e3ea45ae2..b39ed4995dc 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -68,14 +68,13 @@ typedef struct SplitParams { SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \ SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ @@ -88,6 +87,14 @@ typedef struct SplitData { SPLIT_DATA_ENTRIES #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + ccl_global SubsurfaceIndirectRays *ss_rays; +#endif + +#ifdef __VOLUME__ + ccl_global PathState *state_shadow; +#endif + /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from * the host easily) but is still used the same as the other data so we have it here in this struct as well */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h new file mode 100644 index 00000000000..fcdd805f27b --- /dev/null +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -0,0 +1,86 @@ + + +CCL_NAMESPACE_BEGIN + + +ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) +{ +#ifdef __SUBSURFACE__ + + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + + char enqueue_flag = 0; + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global RNG *rng = &kernel_split_state.rng[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + if(sd->flag & SD_BSSRDF) { + if(kernel_path_subsurface_scatter(kg, + sd, + emission_sd, + L, + state, + rng, + ray, + throughput, + ss_indirect)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + enqueue_flag = 1; + } + } + } + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +#endif /* __SUBSURFACE__ */ + +} + +CCL_NAMESPACE_END |