Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/split/kernel_subsurface_scatter.h')
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h334
1 files changed, 279 insertions, 55 deletions
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 0b4d50c70ee..d5083b23f80 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -16,82 +16,306 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
- ccl_local_param unsigned int* local_queue_atomics)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
{
-#ifdef __SUBSURFACE__
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ branched_state->ss_next_closure = 0;
+ branched_state->ss_next_sample = 0;
+
+ branched_state->num_hits = 0;
+ branched_state->next_hit = 0;
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &branched_state->sd;
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if(!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+ branched_state->next_closure == 0 && branched_state->next_sample == 0)
+ {
+ branched_state->lcg_state = lcg_state_init(&rng,
+ branched_state->path_state.rng_offset,
+ branched_state->path_state.sample,
+ 0x68bc21eb);
+ }
+ int num_samples = kernel_data.integrator.subsurface_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(rng, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
+ ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg,
+ &bssrdf_rng,
+ &branched_state->path_state,
+ j,
+ num_samples,
+ PRNG_BSDF_U,
+ &bssrdf_u,
+ &bssrdf_v);
+
+ /* intersection is expensive so avoid doing multiple times for the same input */
+ if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ RNG lcg_state = branched_state->lcg_state;
+ SubsurfaceIntersection ss_isect_private;
+
+ branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
+ &ss_isect_private,
+ sd,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ true);
+
+ branched_state->lcg_state = lcg_state;
+ *ss_isect = ss_isect_private;
+ }
+
+#ifdef __VOLUME__
+ Ray volume_ray = branched_state->ray;
+ bool need_update_volume_stack =
+ kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+ ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+ *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+ * important as the indirect path will write into bssrdf_sd */
+
+ SubsurfaceIntersection ss_isect_private = *ss_isect;
+ subsurface_scatter_multi_setup(kg,
+ &ss_isect_private,
+ hit,
+ bssrdf_sd,
+ &branched_state->path_state,
+ branched_state->path_state.flag,
+ sc,
+ true);
+ *ss_isect = ss_isect_private;
+
+ ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+ *hit_state = branched_state->path_state;
+
+ path_state_branch(hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ /* this next part is expensive as it does scene intersection so only do once */
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ branched_state->volume_stack[k] = hit_state->volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(kg,
+ emission_sd,
+ &volume_ray,
+ branched_state->volume_stack);
+ }
+
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state->volume_stack[k] = branched_state->volume_stack[k];
+ }
+ }
+#endif /* __VOLUME__ */
+
+#ifdef __EMISSION__
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(kg,
+ &rng,
+ bssrdf_sd,
+ emission_sd,
+ hit_state,
+ branched_state->throughput,
+ num_samples_inv,
+ L,
+ all);
+ }
+ }
+#endif /* __EMISSION__ */
+
+ /* indirect light */
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ num_samples_inv,
+ bssrdf_sd,
+ false,
+ false))
+ {
+ branched_state->ss_next_closure = i;
+ branched_state->ss_next_sample = j;
+ branched_state->next_hit = hit;
+
+ return true;
+ }
+
+ branched_state->next_closure = 0;
+ }
+
+ branched_state->next_hit = 0;
+ }
+
+ branched_state->ss_next_sample = 0;
+ }
+
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
}
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
- 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
- char enqueue_flag = 0;
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
+ 1);
+ get_ray_index(kg, thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+#ifdef __SUBSURFACE__
ccl_global char *ray_state = kernel_split_state.ray_state;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
if(sd->flag & SD_BSSRDF) {
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- &rng,
- ray,
- throughput,
- ss_indirect)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+#endif
+ if(kernel_path_subsurface_scatter(kg,
+ sd,
+ emission_sd,
+ L,
+ state,
+ &rng,
+ ray,
+ throughput,
+ ss_indirect))
+ {
+ kernel_split_path_end(kg, ray_index);
+ }
+#ifdef __BRANCHED_PATH__
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ float bssrdf_probability;
+ ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+ /* modify throughput for picking bssrdf or bsdf */
+ *throughput *= bssrdf_probability;
+
+ /* do bssrdf scatter step if we picked a bssrdf closure */
+ if(sc) {
+ uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg,
+ &rng,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg,
+ sd,
+ state,
+ state->flag,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
+ }
+ }
+ else {
+ kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
}
+#endif
}
kernel_split_state.rng[ray_index] = rng;
}
-#ifndef __COMPUTE_DEVICE_GPU__
+# ifdef __BRANCHED_PATH__
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
}
-#endif
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
#endif /* __SUBSURFACE__ */