Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h350
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h227
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h122
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h195
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h325
-rw-r--r--intern/cycles/kernel/split/kernel_enqueue_inactive.h36
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h186
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h72
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h79
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h62
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h350
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h88
-rw-r--r--intern/cycles/kernel/split/kernel_queue_enqueue.h85
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h83
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h57
-rw-r--r--intern/cycles/kernel/split/kernel_shader_setup.h78
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h134
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h49
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h135
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h75
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h39
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h170
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h475
23 files changed, 1713 insertions, 1759 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index ed0a82067f1..e08d87ab618 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
/* sets up the various state needed to do an indirect loop */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- /* save a copy of the state to restore later */
-#define BRANCHED_STORE(name) \
- branched_state->name = kernel_split_state.name[ray_index];
+ /* save a copy of the state to restore later */
+# define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index];
- BRANCHED_STORE(path_state);
- BRANCHED_STORE(throughput);
- BRANCHED_STORE(ray);
- BRANCHED_STORE(isect);
- BRANCHED_STORE(ray_state);
+ BRANCHED_STORE(path_state);
+ BRANCHED_STORE(throughput);
+ BRANCHED_STORE(ray);
+ BRANCHED_STORE(isect);
+ BRANCHED_STORE(ray_state);
- *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
- for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
- kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(branched_state_sd, ray_index)->closure[i] =
+ kernel_split_sd(sd, ray_index)->closure[i];
+ }
-#undef BRANCHED_STORE
+# undef BRANCHED_STORE
- /* set loop counters to intial position */
- branched_state->next_closure = 0;
- branched_state->next_sample = 0;
+ /* set loop counters to intial position */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = 0;
}
/* ends an indirect loop and restores the previous state */
-ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- /* restore state */
-#define BRANCHED_RESTORE(name) \
- kernel_split_state.name[ray_index] = branched_state->name;
+ /* restore state */
+# define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name;
- BRANCHED_RESTORE(path_state);
- BRANCHED_RESTORE(throughput);
- BRANCHED_RESTORE(ray);
- BRANCHED_RESTORE(isect);
- BRANCHED_RESTORE(ray_state);
+ BRANCHED_RESTORE(path_state);
+ BRANCHED_RESTORE(throughput);
+ BRANCHED_RESTORE(ray);
+ BRANCHED_RESTORE(isect);
+ BRANCHED_RESTORE(ray_state);
- *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
- for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
- kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(sd, ray_index)->closure[i] =
+ kernel_split_sd(branched_state_sd, ray_index)->closure[i];
+ }
-#undef BRANCHED_RESTORE
+# undef BRANCHED_RESTORE
- /* leave indirect loop */
- REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+ /* leave indirect loop */
+ REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
}
-ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg,
+ int ray_index)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+ int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ kernel_split_params.queue_index);
- if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
- return false;
- }
+ if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+ return false;
+ }
-#define SPLIT_DATA_ENTRY(type, name, num) \
- if(num) { \
- kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
- }
- SPLIT_DATA_ENTRIES_BRANCHED_SHARED
-#undef SPLIT_DATA_ENTRY
+# define SPLIT_DATA_ENTRY(type, name, num) \
+ if (num) { \
+ kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \
+ }
+ SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+# undef SPLIT_DATA_ENTRY
- *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
- for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
- kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
- }
+ *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index);
+ for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) {
+ kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+ }
- kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
- kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
- kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+ kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+ kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+ kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
- path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
- path_radiance_copy_indirect(inactive_L, L);
+ path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ path_radiance_copy_indirect(inactive_L, L);
- ray_state[inactive_ray] = RAY_REGENERATED;
- ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
- ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+ ray_state[inactive_ray] = RAY_REGENERATED;
+ ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+ ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
- atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+ atomic_fetch_and_inc_uint32(
+ (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count);
- return true;
+ return true;
}
/* bounce off surface and integrate indirect light */
-ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
- int ray_index,
- float num_samples_adjust,
- ShaderData *saved_sd,
- bool reset_path_state,
- bool wait_for_shared)
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
+ KernelGlobals *kg,
+ int ray_index,
+ float num_samples_adjust,
+ ShaderData *saved_sd,
+ bool reset_path_state,
+ bool wait_for_shared)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
- ShaderData *sd = saved_sd;
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- float3 throughput = branched_state->throughput;
- ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
-
- float sum_sample_weight = 0.0f;
-#ifdef __DENOISING_FEATURES__
- if(ps->denoising_feature_weight > 0.0f) {
- for(int i = 0; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- /* transparency is not handled here, but in outer loop */
- if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- continue;
- }
-
- sum_sample_weight += sc->sample_weight;
- }
- }
- else {
- sum_sample_weight = 1.0f;
- }
-#endif /* __DENOISING_FEATURES__ */
-
- for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
- const ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSDF(sc->type))
- continue;
- /* transparency is not handled here, but in outer loop */
- if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
- continue;
-
- int num_samples;
-
- if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
- num_samples = kernel_data.integrator.diffuse_samples;
- else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
- num_samples = 1;
- else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
- num_samples = kernel_data.integrator.glossy_samples;
- else
- num_samples = kernel_data.integrator.transmission_samples;
-
- num_samples = ceil_to_int(num_samples_adjust*num_samples);
-
- float num_samples_inv = num_samples_adjust/num_samples;
-
- for(int j = branched_state->next_sample; j < num_samples; j++) {
- if(reset_path_state) {
- *ps = branched_state->path_state;
- }
-
- ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
- ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
- *tp = throughput;
-
- ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
-
- if(!kernel_branched_path_surface_bounce(kg,
- sd,
- sc,
- j,
- num_samples,
- tp,
- ps,
- &L->state,
- bsdf_ray,
- sum_sample_weight))
- {
- continue;
- }
-
- ps->rng_hash = branched_state->path_state.rng_hash;
-
- /* update state for next iteration */
- branched_state->next_closure = i;
- branched_state->next_sample = j+1;
-
- /* start the indirect path */
- *tp *= num_samples_inv;
-
- if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
- continue;
- }
-
- return true;
- }
-
- branched_state->next_sample = 0;
- }
-
- branched_state->next_closure = sd->num_closure;
-
- if(wait_for_shared) {
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
- }
-
- return false;
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = saved_sd;
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ float3 throughput = branched_state->throughput;
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+
+ float sum_sample_weight = 0.0f;
+# ifdef __DENOISING_FEATURES__
+ if (ps->denoising_feature_weight > 0.0f) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+ else {
+ sum_sample_weight = 1.0f;
+ }
+# endif /* __DENOISING_FEATURES__ */
+
+ for (int i = branched_state->next_closure; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF(sc->type))
+ continue;
+ /* transparency is not handled here, but in outer loop */
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+ continue;
+
+ int num_samples;
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+ float num_samples_inv = num_samples_adjust / num_samples;
+
+ for (int j = branched_state->next_sample; j < num_samples; j++) {
+ if (reset_path_state) {
+ *ps = branched_state->path_state;
+ }
+
+ ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = throughput;
+
+ ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+ if (!kernel_branched_path_surface_bounce(
+ kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) {
+ continue;
+ }
+
+ ps->rng_hash = branched_state->path_state.rng_hash;
+
+ /* update state for next iteration */
+ branched_state->next_closure = i;
+ branched_state->next_sample = j + 1;
+
+ /* start the indirect path */
+ *tp *= num_samples_inv;
+
+ if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+
+ return true;
+ }
+
+ branched_state->next_sample = 0;
+ }
+
+ branched_state->next_closure = sd->num_closure;
+
+ if (wait_for_shared) {
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+ }
+
+ return false;
}
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 18eec6372f1..e77743350dc 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_buffer_update(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(ray_index == 0) {
- /* We will empty this queue in this kernel. */
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
- char enqueue_flag = 0;
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (ray_index == 0) {
+ /* We will empty this queue in this kernel. */
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+ char enqueue_flag = 0;
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
#endif
#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
+ if (ray_index != QUEUE_EMPTY_SLOT) {
#endif
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- bool ray_was_updated = false;
-
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ray_was_updated = true;
- uint sample = state->sample;
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
- /* accumulate result in output buffer */
- kernel_write_result(kg, buffer, sample, L);
-
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
- }
-
- if(kernel_data.film.cryptomatte_passes) {
- /* Make sure no thread is writing to the buffers. */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
- ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
- kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
- /* We have completed current work; So get next work */
- ccl_global uint *work_pools = kernel_split_params.work_pools;
- uint total_work_size = kernel_split_params.total_work_size;
- uint work_index;
-
- if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
- /* If work is invalid, this means no more work is available and the thread may exit */
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
- ccl_global WorkTile *tile = &kernel_split_params.tile;
- uint x, y, sample;
- get_work_pixel(tile, work_index, &x, &y, &sample);
-
- /* Store buffer offset for writing to passes. */
- uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
- kernel_split_state.buffer_offset[ray_index] = buffer_offset;
-
- /* Initialize random numbers and ray. */
- uint rng_hash;
- kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
-
- if(ray->t != 0.0f) {
- /* Initialize throughput, path radiance, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- *throughput = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(L, kernel_data.film.use_light_pass);
- path_state_init(kg,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- state,
- rng_hash,
- sample,
- ray);
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ bool ray_was_updated = false;
+
+ if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ray_was_updated = true;
+ uint sample = state->sample;
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+ /* accumulate result in output buffer */
+ kernel_write_result(kg, buffer, sample, L);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ /* Make sure no thread is writing to the buffers. */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+ /* We have completed current work; So get next work */
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
+
+ if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+ /* If work is invalid, this means no more work is available and the thread may exit */
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
+
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+
+ /* Initialize random numbers and ray. */
+ uint rng_hash;
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray);
+
+ if (ray->t != 0.0f) {
+ /* Initialize throughput, path radiance, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ *throughput = make_float3(1.0f, 1.0f, 1.0f);
+ path_radiance_init(L, kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ state,
+ rng_hash,
+ sample,
+ ray);
#ifdef __SUBSURFACE__
- kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- enqueue_flag = 1;
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
- }
- }
- }
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ enqueue_flag = 1;
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+ }
+ }
#ifndef __COMPUTE_DEVICE_GPU__
- }
+ }
#endif
- /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
- * These rays will be made active during next SceneIntersectkernel.
- */
- enqueue_ray_index_local(ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ * These rays will be made active during next SceneIntersectkernel.
+ */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 77fb61b80a8..52930843f56 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -28,82 +28,88 @@ ccl_device void kernel_data_init(
#else
void KERNEL_FUNCTION_FULL_NAME(data_init)(
#endif
- KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
+ KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ ccl_global void *split_data_buffer,
+ int num_elements,
+ ccl_global char *ray_state,
#ifdef __KERNEL_OPENCL__
- KERNEL_BUFFER_PARAMS,
+ KERNEL_BUFFER_PARAMS,
#endif
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index, /* Tracks the number of elements in queues */
- int queuesize, /* size (capacity) of the queue */
- ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
- ccl_global unsigned int *work_pools, /* Work pool for each work group */
- unsigned int num_samples,
- ccl_global float *buffer)
+ int start_sample,
+ int end_sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride,
+ ccl_global int *Queue_index, /* Tracks the number of elements in queues */
+ int queuesize, /* size (capacity) of the queue */
+ ccl_global char *
+ use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
+ ccl_global unsigned int *work_pools, /* Work pool for each work group */
+ unsigned int num_samples,
+ ccl_global float *buffer)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, data_init);
+ STUB_ASSERT(KERNEL_ARCH, data_init);
#else
-#ifdef __KERNEL_OPENCL__
- kg->data = data;
-#endif
+# ifdef __KERNEL_OPENCL__
+ kg->data = data;
+# endif
- kernel_split_params.tile.x = sx;
- kernel_split_params.tile.y = sy;
- kernel_split_params.tile.w = sw;
- kernel_split_params.tile.h = sh;
+ kernel_split_params.tile.x = sx;
+ kernel_split_params.tile.y = sy;
+ kernel_split_params.tile.w = sw;
+ kernel_split_params.tile.h = sh;
- kernel_split_params.tile.start_sample = start_sample;
- kernel_split_params.tile.num_samples = num_samples;
+ kernel_split_params.tile.start_sample = start_sample;
+ kernel_split_params.tile.num_samples = num_samples;
- kernel_split_params.tile.offset = offset;
- kernel_split_params.tile.stride = stride;
+ kernel_split_params.tile.offset = offset;
+ kernel_split_params.tile.stride = stride;
- kernel_split_params.tile.buffer = buffer;
+ kernel_split_params.tile.buffer = buffer;
- kernel_split_params.total_work_size = sw * sh * num_samples;
+ kernel_split_params.total_work_size = sw * sh * num_samples;
- kernel_split_params.work_pools = work_pools;
+ kernel_split_params.work_pools = work_pools;
- kernel_split_params.queue_index = Queue_index;
- kernel_split_params.queue_size = queuesize;
- kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.queue_index = Queue_index;
+ kernel_split_params.queue_size = queuesize;
+ kernel_split_params.use_queues_flag = use_queues_flag;
- split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
+ split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
-#ifdef __KERNEL_OPENCL__
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-#endif
+# ifdef __KERNEL_OPENCL__
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+# endif
+
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ /* Initialize queue data and queue index. */
+ if (thread_index < queuesize) {
+ for (int i = 0; i < NUM_QUEUES; i++) {
+ kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
+ }
+
+ if (thread_index == 0) {
+ for (int i = 0; i < NUM_QUEUES; i++) {
+ Queue_index[i] = 0;
+ }
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
- /* Initialize queue data and queue index. */
- if(thread_index < queuesize) {
- for(int i = 0; i < NUM_QUEUES; i++) {
- kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- }
- }
-
- if(thread_index == 0) {
- for(int i = 0; i < NUM_QUEUES; i++) {
- Queue_index[i] = 0;
- }
-
- /* The scene-intersect kernel should not use the queues very first time.
- * since the queue would be empty.
- */
- *use_queues_flag = 0;
- }
-#endif /* KERENL_STUB */
+ /* The scene-intersect kernel should not use the queues very first time.
+ * since the queue would be empty.
+ */
+ *use_queues_flag = 0;
+ }
+#endif /* KERENL_STUB */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index ca79602c565..b2ca59d60cc 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_direct_lighting(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- char enqueue_flag = 0;
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
-
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
-
- /* direct lighting */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ char enqueue_flag = 0;
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+ /* direct lighting */
#ifdef __EMISSION__
- bool flag = (kernel_data.integrator.use_direct_light &&
- (sd->flag & SD_BSDF_HAS_EVAL));
+ bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL));
# ifdef __BRANCHED_PATH__
- if(flag && kernel_data.integrator.branched) {
- flag = false;
- enqueue_flag = 1;
- }
-# endif /* __BRANCHED_PATH__ */
+ if (flag && kernel_data.integrator.branched) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __BRANCHED_PATH__ */
# ifdef __SHADOW_TRICKS__
- if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
- flag = false;
- enqueue_flag = 1;
- }
-# endif /* __SHADOW_TRICKS__ */
-
- if(flag) {
- /* Sample illumination from lights to find path contribution. */
- float light_u, light_v;
- path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
- float terminate = path_state_rng_light_termination(kg, state);
-
- LightSample ls;
- if(light_sample(kg,
- light_u, light_v,
- sd->time,
- sd->P,
- state->bounce,
- &ls)) {
-
- Ray light_ray;
- light_ray.time = sd->time;
-
- BsdfEval L_light;
- bool is_lamp;
- if(direct_emission(kg,
- sd,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- &ls,
- state,
- &light_ray,
- &L_light,
- &is_lamp,
- terminate))
- {
- /* Write intermediate data to global memory to access from
- * the next kernel.
- */
- kernel_split_state.light_ray[ray_index] = light_ray;
- kernel_split_state.bsdf_eval[ray_index] = L_light;
- kernel_split_state.is_lamp[ray_index] = is_lamp;
- /* Mark ray state for next shadow kernel. */
- enqueue_flag = 1;
- }
- }
- }
-#endif /* __EMISSION__ */
- }
+ if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __SHADOW_TRICKS__ */
+
+ if (flag) {
+ /* Sample illumination from lights to find path contribution. */
+ float light_u, light_v;
+ path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
+ float terminate = path_state_rng_light_termination(kg, state);
+
+ LightSample ls;
+ if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+
+ Ray light_ray;
+ light_ray.time = sd->time;
+
+ BsdfEval L_light;
+ bool is_lamp;
+ if (direct_emission(kg,
+ sd,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ &ls,
+ state,
+ &light_ray,
+ &L_light,
+ &is_lamp,
+ terminate)) {
+ /* Write intermediate data to global memory to access from
+ * the next kernel.
+ */
+ kernel_split_state.light_ray[ray_index] = light_ray;
+ kernel_split_state.bsdf_eval[ray_index] = L_light;
+ kernel_split_state.is_lamp[ray_index] = is_lamp;
+ /* Mark ray state for next shadow kernel. */
+ enqueue_flag = 1;
+ }
+ }
+ }
+#endif /* __EMISSION__ */
+ }
#ifdef __EMISSION__
- /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#endif
#ifdef __BRANCHED_PATH__
- /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
- * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
- */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_LIGHT_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
-#endif /* __BRANCHED_PATH__ */
+ /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+ * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+ */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_LIGHT_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index fb5bd3d48dd..45b839db05f 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
-ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg,
+ int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
}
-ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg,
+ int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- /* GPU: no decoupled ray marching, scatter probalistically */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
- Ray volume_ray = branched_state->ray;
- volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+ Ray volume_ray = branched_state->ray;
+ volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ?
+ branched_state->isect.t :
+ FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
- for(int j = branched_state->next_sample; j < num_samples; j++) {
- ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
- *ps = branched_state->path_state;
+ for (int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ *ps = branched_state->path_state;
- ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
- *pray = branched_state->ray;
+ ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+ *pray = branched_state->ray;
- ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
- *tp = branched_state->throughput * num_samples_inv;
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = branched_state->throughput * num_samples_inv;
- /* branch RNG state */
- path_state_branch(ps, j, num_samples);
+ /* branch RNG state */
+ path_state_branch(ps, j, num_samples);
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, ps, sd, &volume_ray, L, tp, heterogeneous);
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, ps, sd, &volume_ray, L, tp, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
-
- /* indirect light bounce */
- if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
- continue;
- }
-
- /* start the indirect path */
- branched_state->next_closure = 0;
- branched_state->next_sample = j+1;
-
- /* Attempting to share too many samples is slow for volumes as it causes us to
- * loop here more and have many calls to kernel_volume_integrate which evaluates
- * shaders. The many expensive shader evaluations cause the work load to become
- * unbalanced and many threads to become idle in this kernel. Limiting the
- * number of shared samples here helps quite a lot.
- */
- if(branched_state->shared_sample_count < 2) {
- if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
- continue;
- }
- }
-
- return true;
- }
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+ /* indirect light bounce */
+ if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
+ continue;
+ }
+
+ /* start the indirect path */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = j + 1;
+
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if (branched_state->shared_sample_count < 2) {
+ if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
+ return true;
+ }
# endif
- }
+ }
- branched_state->next_sample = num_samples;
+ branched_state->next_sample = num_samples;
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- /* todo: avoid this calculation using decoupled ray marching */
- float3 throughput = kernel_split_state.throughput[ray_index];
- kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
- kernel_split_state.throughput[ray_index] = throughput;
+ /* todo: avoid this calculation using decoupled ray marching */
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ kernel_volume_shadow(
+ kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+ kernel_split_state.throughput[ray_index] = throughput;
- return false;
+ return false;
}
-#endif /* __BRANCHED_PATH__ && __VOLUME__ */
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
#ifdef __VOLUME__
- /* We will empty this queue in this kernel. */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ /* We will empty this queue in this kernel. */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
# ifdef __BRANCHED_PATH__
- kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
-# endif /* __BRANCHED_PATH__ */
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
-
- if(*kernel_split_params.use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- }
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
-
- /* Sanitize volume stack. */
- if(!hit) {
- kernel_volume_clean_stack(kg, state->volume_stack);
- }
- /* volume attenuation, emission, scatter */
- if(state->volume_stack[0].shader != SHADER_NONE) {
- Ray volume_ray = *ray;
- volume_ray.t = (hit)? isect->t: FLT_MAX;
+ kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+# endif /* __BRANCHED_PATH__ */
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+
+ if (*kernel_split_params.use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+ /* volume attenuation, emission, scatter */
+ if (state->volume_stack[0].shader != SHADER_NONE) {
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
# ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
-# endif /* __BRANCHED_PATH__ */
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif /* __BRANCHED_PATH__ */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_path_end(kg, ray_index);
- }
- }
-# endif /* __VOLUME_SCATTER__ */
- }
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
# ifdef __BRANCHED_PATH__
- }
- else {
- kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
- }
- }
+ }
+ else {
+ kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+ }
+ }
# ifdef __BRANCHED_PATH__
- /* iter loop */
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_VOLUME_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
- path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
- if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
-
-#endif /* __VOLUME__ */
+ /* iter loop */
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_VOLUME_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
index 496355bbc3a..31d2daef616 100644
--- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h
+++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h
@@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
#ifdef __BRANCHED_PATH__
- /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- char enqueue_flag = 0;
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
- enqueue_flag = 1;
- }
+ char enqueue_flag = 0;
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) {
+ enqueue_flag = 1;
+ }
- enqueue_ray_index_local(ray_index,
- QUEUE_INACTIVE_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-#endif /* __BRANCHED_PATH__ */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_INACTIVE_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index f14eecec2f2..63bc5a8e0ce 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
- KernelGlobals *kg,
- ccl_local_param BackgroundAOLocals *locals)
+ KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- locals->queue_atomics_bg = 0;
- locals->queue_atomics_ao = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ locals->queue_atomics_bg = 0;
+ locals->queue_atomics_ao = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
#ifdef __AO__
- char enqueue_flag = 0;
+ char enqueue_flag = 0;
#endif
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif /* __COMPUTE_DEVICE_GPU__ */
+ /* If we are executing on a GPU device, we exit all threads that are not
+ * required.
+ *
+ * If we are executing on a CPU device, then we need to keep all threads
+ * active since we have barrier() calls later in the kernel. CPU devices,
+ * expect all threads to execute barrier statement.
+ */
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+#endif /* __COMPUTE_DEVICE_GPU__ */
#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
+ if (ray_index != QUEUE_EMPTY_SLOT) {
#endif
- ccl_global PathState *state = 0x0;
- float3 throughput;
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ShaderData *sd = kernel_split_sd(sd, ray_index);
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
-
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
- throughput = kernel_split_state.throughput[ray_index];
- state = &kernel_split_state.path_state[ray_index];
-
- if(!kernel_path_shader_apply(kg,
- sd,
- state,
- ray,
- throughput,
- emission_sd,
- L,
- buffer))
- {
- kernel_split_path_end(kg, ray_index);
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- /* Path termination. this is a strange place to put the termination, it's
- * mainly due to the mixed in MIS that we use. gives too many unneeded
- * shader evaluations, only need emission if we are going to terminate.
- */
- float probability = path_state_continuation_probability(kg, state, throughput);
-
- if(probability == 0.0f) {
- kernel_split_path_end(kg, ray_index);
- }
- else if(probability < 1.0f) {
- float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
- if(terminate >= probability) {
- kernel_split_path_end(kg, ray_index);
- }
- else {
- kernel_split_state.throughput[ray_index] = throughput/probability;
- }
- }
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- kernel_update_denoising_features(kg, sd, state, L);
- }
- }
+ ccl_global PathState *state = 0x0;
+ float3 throughput;
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ throughput = kernel_split_state.throughput[ray_index];
+ state = &kernel_split_state.path_state[ray_index];
+
+ if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* Path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate.
+ */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else if (probability < 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ if (terminate >= probability) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else {
+ kernel_split_state.throughput[ray_index] = throughput / probability;
+ }
+ }
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ kernel_update_denoising_features(kg, sd, state, L);
+ }
+ }
#ifdef __AO__
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- /* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion) {
- enqueue_flag = 1;
- }
- }
-#endif /* __AO__ */
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ enqueue_flag = 1;
+ }
+ }
+#endif /* __AO__ */
#ifndef __COMPUTE_DEVICE_GPU__
- }
+ }
#endif
#ifdef __AO__
- /* Enqueue to-shadow-ray-cast rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- &locals->queue_atomics_ao,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* Enqueue to-shadow-ray-cast rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ &locals->queue_atomics_ao,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#endif
}
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 4cf88a02590..b1c65f61e2c 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_background(KernelGlobals *kg)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- int ray_index;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int ray_index;
- if(kernel_data.integrator.ao_bounces != INT_MAX) {
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ if (kernel_data.integrator.ao_bounces != INT_MAX) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index != QUEUE_EMPTY_SLOT) {
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- if(path_state_ao_bounce(kg, state)) {
- kernel_split_path_end(kg, ray_index);
- }
- }
- }
- }
+ if (ray_index != QUEUE_EMPTY_SLOT) {
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ if (path_state_ao_bounce(kg, state)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+ }
+ }
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- kernel_path_background(kg, state, ray, throughput, sd, L);
- kernel_split_path_end(kg, ray_index);
- }
+ kernel_path_background(kg, state, ray, throughput, sd, L);
+ kernel_split_path_end(kg, ray_index);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index 236c94e983c..3f48f8d6f56 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
{
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index == 0) {
- /* We will empty both queues in this kernel. */
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
- int ray_index;
- get_ray_index(kg, thread_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- ray_index = get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ int ray_index;
+ get_ray_index(kg,
+ thread_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __SUBSURFACE__
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ccl_global char *ray_state = kernel_split_state.ray_state;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect->num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- ss_indirect,
- state,
- ray,
- L,
- throughput);
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-#endif /* __SUBSURFACE__ */
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect->num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index 5b2c554b922..7ecb099208d 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_lamp_emission(KernelGlobals *kg)
{
#ifndef __VOLUME__
- /* We will empty this queue in this kernel. */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- }
+ /* We will empty this queue in this kernel. */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ }
#endif
- /* Fetch use_queues_flag. */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Fetch use_queues_flag. */
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (local_use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
#ifndef __VOLUME__
- 1
+ 1
#else
- 0
+ 0
#endif
- );
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
- }
+ );
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ }
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND))
- {
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
- }
+ kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index e388955f1af..781ce869374 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
}
ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index)
{
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
# ifdef __VOLUME__
- if(!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+ if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
# endif
- /* continue in case of transparency */
- *throughput *= shader_bsdf_transparency(kg, sd);
+ /* continue in case of transparency */
+ *throughput *= shader_bsdf_transparency(kg, sd);
- if(is_zero(*throughput)) {
- kernel_split_path_end(kg, ray_index);
- return;
- }
+ if (is_zero(*throughput)) {
+ kernel_split_path_end(kg, ray_index);
+ return;
+ }
- /* Update Path State */
- path_state_next(kg, state, LABEL_TRANSPARENT);
+ /* Update Path State */
+ path_state_next(kg, state, LABEL_TRANSPARENT);
# ifdef __VOLUME__
- }
- else {
- if(!path_state_volume_next(kg, state)) {
- kernel_split_path_end(kg, ray_index);
- return;
- }
- }
+ }
+ else {
+ if (!path_state_volume_next(kg, state)) {
+ kernel_split_path_end(kg, ray_index);
+ return;
+ }
+ }
# endif
- ray->P = ray_offset(sd->P, -sd->Ng);
- ray->t -= sd->ray_length; /* clipping works through transparent */
+ ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->t -= sd->ray_length; /* clipping works through transparent */
# ifdef __RAY_DIFFERENTIALS__
- ray->dP = sd->dP;
- ray->dD.dx = -sd->dI.dx;
- ray->dD.dy = -sd->dI.dy;
-# endif /* __RAY_DIFFERENTIALS__ */
+ ray->dP = sd->dP;
+ ray->dD.dx = -sd->dI.dx;
+ ray->dD.dy = -sd->dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
# ifdef __VOLUME__
- /* enter/exit volume */
- kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
-# endif /* __VOLUME__ */
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+# endif /* __VOLUME__ */
}
-#endif /* __BRANCHED_PATH__ */
+#endif /* __BRANCHED_PATH__ */
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- /* If we are here, then it means that scene-intersect kernel
- * has already been executed atleast once. From the next time,
- * scene-intersect kernel may operate on queues to fetch ray index
- */
- *kernel_split_params.use_queues_flag = 1;
-
- /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
- * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
- * previous kernel.
- */
- kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
-
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
-# ifdef __VOLUME__
- /* Reactivate only volume rays here, most surface work was skipped. */
- if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
- }
-# endif
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ /* If we are here, then it means that scene-intersect kernel
+ * has already been executed atleast once. From the next time,
+ * scene-intersect kernel may operate on queues to fetch ray index
+ */
+ *kernel_split_params.use_queues_flag = 1;
+
+ /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and
+ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the
+ * previous kernel.
+ */
+ kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __VOLUME__
+ /* Reactivate only volume rays here, most surface work was skipped. */
+ if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE);
+ }
+#endif
- bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
- if(active) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+ if (active) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
- /* Compute direct lighting and next bounce. */
- if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
- kernel_split_path_end(kg, ray_index);
- }
+ /* Compute direct lighting and next bounce. */
+ if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) {
+ kernel_split_path_end(kg, ray_index);
+ }
#ifdef __BRANCHED_PATH__
- }
- else if(sd->flag & SD_HAS_ONLY_VOLUME) {
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- else {
- kernel_split_branched_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- 1.0f,
- kernel_split_sd(branched_state_sd, ray_index),
- true,
- true))
- {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- }
-#endif /* __BRANCHED_PATH__ */
- }
-
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ }
+ else if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ else {
+ kernel_split_branched_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ }
+#endif /* __BRANCHED_PATH__ */
+ }
+
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
#ifdef __BRANCHED_PATH__
- /* iter loop */
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
- }
-
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_LIGHT_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
-
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
-
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- 1.0f,
- kernel_split_sd(branched_state_sd, ray_index),
- true,
- true))
- {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
- kernel_split_branched_transparent_bounce(kg, ray_index);
- }
- }
+ /* iter loop */
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+ }
+
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_LIGHT_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+ kernel_split_branched_transparent_bounce(kg, ray_index);
+ }
+ }
# ifdef __VOLUME__
- /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_VOLUME_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
-# endif /* __VOLUME__ */
+ /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_VOLUME_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+# endif /* __VOLUME__ */
# ifdef __SUBSURFACE__
- /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
-
- ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- enqueue_ray_index_local(ray_index,
- QUEUE_SUBSURFACE_INDIRECT_ITER,
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-# endif /* __SUBSURFACE__ */
-#endif /* __BRANCHED_PATH__ */
+ /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(
+ ray_index,
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+# endif /* __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index fdd54225b07..3faa3208341 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN
*
* Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
*/
-ccl_device void kernel_path_init(KernelGlobals *kg) {
- int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+ccl_device void kernel_path_init(KernelGlobals *kg)
+{
+ int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
- /* This is the first assignment to ray_state;
- * So we dont use ASSIGN_RAY_STATE macro.
- */
- kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+ /* This is the first assignment to ray_state;
+ * So we dont use ASSIGN_RAY_STATE macro.
+ */
+ kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
- /* Get work. */
- ccl_global uint *work_pools = kernel_split_params.work_pools;
- uint total_work_size = kernel_split_params.total_work_size;
- uint work_index;
+ /* Get work. */
+ ccl_global uint *work_pools = kernel_split_params.work_pools;
+ uint total_work_size = kernel_split_params.total_work_size;
+ uint work_index;
- if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
- /* No more work, mark ray as inactive */
- kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+ if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
+ /* No more work, mark ray as inactive */
+ kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
- return;
- }
+ return;
+ }
- ccl_global WorkTile *tile = &kernel_split_params.tile;
- uint x, y, sample;
- get_work_pixel(tile, work_index, &x, &y, &sample);
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- /* Store buffer offset for writing to passes. */
- uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
- kernel_split_state.buffer_offset[ray_index] = buffer_offset;
+ /* Store buffer offset for writing to passes. */
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ kernel_split_state.buffer_offset[ray_index] = buffer_offset;
- /* Initialize random numbers and ray. */
- uint rng_hash;
- kernel_path_trace_setup(kg,
- sample,
- x, y,
- &rng_hash,
- &kernel_split_state.ray[ray_index]);
+ /* Initialize random numbers and ray. */
+ uint rng_hash;
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]);
- if(kernel_split_state.ray[ray_index].t != 0.0f) {
- /* Initialize throughput, path radiance, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
- path_state_init(kg,
- AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
- &kernel_split_state.path_state[ray_index],
- rng_hash,
- sample,
- &kernel_split_state.ray[ray_index]);
+ if (kernel_split_state.ray[ray_index].t != 0.0f) {
+ /* Initialize throughput, path radiance, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+ path_radiance_init(&kernel_split_state.path_radiance[ray_index],
+ kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ &kernel_split_state.path_state[ray_index],
+ rng_hash,
+ sample,
+ &kernel_split_state.ray[ray_index]);
#ifdef __SUBSURFACE__
- kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
- }
- else {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
- }
+ }
+ else {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index df67fabab19..2db87f7a671 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays.
*/
-ccl_device void kernel_queue_enqueue(KernelGlobals *kg,
- ccl_local_param QueueEnqueueLocals *locals)
+ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals)
{
- /* We have only 2 cases (Hit/Not-Hit) */
- int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ /* We have only 2 cases (Hit/Not-Hit) */
+ int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(lidx == 0) {
- locals->queue_atomics[0] = 0;
- locals->queue_atomics[1] = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (lidx == 0) {
+ locals->queue_atomics[0] = 0;
+ locals->queue_atomics[1] = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int queue_number = -1;
+ int queue_number = -1;
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
- queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
- }
- else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
- queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
- }
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+ queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
+ }
+ else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) ||
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+ queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
+ }
- unsigned int my_lqidx;
- if(queue_number != -1) {
- my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int my_lqidx;
+ if (queue_number != -1) {
+ my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- if(lidx == 0) {
- locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
- get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- locals->queue_atomics,
- kernel_split_params.queue_index);
- locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] =
- get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- locals->queue_atomics,
- kernel_split_params.queue_index);
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if (lidx == 0) {
+ locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset(
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index);
+ locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset(
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ locals->queue_atomics,
+ kernel_split_params.queue_index);
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- unsigned int my_gqidx;
- if(queue_number != -1) {
- my_gqidx = get_global_queue_index(queue_number,
- kernel_split_params.queue_size,
- my_lqidx,
- locals->queue_atomics);
- kernel_split_state.queue_data[my_gqidx] = ray_index;
- }
+ unsigned int my_gqidx;
+ if (queue_number != -1) {
+ my_gqidx = get_global_queue_index(
+ queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics);
+ kernel_split_state.queue_data[my_gqidx] = ray_index;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index f5378bc172b..5fef3e045f8 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_scene_intersect(KernelGlobals *kg)
{
- /* Fetch use_queues_flag */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Fetch use_queues_flag */
+ char local_use_queues_flag = *kernel_split_params.use_queues_flag;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (local_use_queues_flag) {
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+ }
- /* All regenerated rays become active here */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+ /* All regenerated rays become active here */
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
#ifdef __BRANCHED_PATH__
- if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
- kernel_split_path_end(kg, ray_index);
- }
- else
-#endif /* __BRANCHED_PATH__ */
- {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
- }
- }
+ if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else
+#endif /* __BRANCHED_PATH__ */
+ {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ }
+ }
- if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- return;
- }
+ if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ return;
+ }
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- Intersection isect;
- bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
- kernel_split_state.isect[ray_index] = isect;
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
+ kernel_split_state.isect[ray_index] = isect;
- if(!hit) {
- /* Change the state of rays that hit the background;
- * These rays undergo special processing in the
- * background_bufferUpdate kernel.
- */
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
- }
+ if (!hit) {
+ /* Change the state of rays that hit the background;
+ * These rays undergo special processing in the
+ * background_bufferUpdate kernel.
+ */
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 2bc2d300699..8e39c9797e5 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_shader_eval(KernelGlobals *kg)
{
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- /* Sorting on cuda split is not implemented */
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ /* Sorting on cuda split is not implemented */
#ifdef __KERNEL_CUDA__
- int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
#else
- int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
+ int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
#endif
- if(ray_index >= queue_index) {
- return;
- }
- ray_index = get_ray_index(kg, ray_index,
+ if (ray_index >= queue_index) {
+ return;
+ }
+ ray_index = get_ray_index(kg,
+ ray_index,
#ifdef __KERNEL_CUDA__
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
#else
- QUEUE_SHADER_SORTED_RAYS,
+ QUEUE_SHADER_SORTED_RAYS,
#endif
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ccl_global char *ray_state = kernel_split_state.ray_state;
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
+ shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag);
#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched) {
- shader_merge_closures(kernel_split_sd(sd, ray_index));
- }
- else
+ if (kernel_data.integrator.branched) {
+ shader_merge_closures(kernel_split_sd(sd, ray_index));
+ }
+ else
#endif
- {
- shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
- }
- }
+ {
+ shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index ea3ec2ec83f..da332db2c98 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN
ccl_device void kernel_shader_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
- /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+ if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
- if(ray_index >= queue_index) {
- return;
- }
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 0);
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if (ray_index >= queue_index) {
+ return;
+ }
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 :
+ 0;
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
- /* Continue on with shader evaluation. */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- Intersection isect = kernel_split_state.isect[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
+ /* Continue on with shader evaluation. */
+ if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ Intersection isect = kernel_split_state.isect[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
- shader_setup_from_ray(kg,
- sd,
- &isect,
- &ray);
+ shader_setup_from_ray(kg, sd, &isect, &ray);
#ifdef __VOLUME__
- if(sd->flag & SD_HAS_ONLY_VOLUME) {
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
- }
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME);
+ }
#endif
- }
-
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 666355de334..95d33a42014 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -16,82 +16,82 @@
CCL_NAMESPACE_BEGIN
-
-ccl_device void kernel_shader_sort(KernelGlobals *kg,
- ccl_local_param ShaderSortLocals *locals)
+ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals)
{
#ifndef __KERNEL_CUDA__
- int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
- if(tid == 0) {
- kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
- }
+ int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if (tid == 0) {
+ kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+ }
- uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
- if(offset >= qsize) {
- return;
- }
+ uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE;
+ if (offset >= qsize) {
+ return;
+ }
- int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
- uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
- uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
- ccl_local uint *local_value = &locals->local_value[0];
- ccl_local ushort *local_index = &locals->local_index[0];
+ int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+ uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
+ uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
+ ccl_local uint *local_value = &locals->local_value[0];
+ ccl_local ushort *local_index = &locals->local_index[0];
- /* copy to local memory */
- for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
- uint idx = offset + i + lid;
- uint add = input + idx;
- uint value = (~0);
- if(idx < qsize) {
- int ray_index = kernel_split_state.queue_data[add];
- bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
- if(valid) {
- value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
- }
- }
- local_value[i + lid] = value;
- local_index[i + lid] = i + lid;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ /* copy to local memory */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint add = input + idx;
+ uint value = (~0);
+ if (idx < qsize) {
+ int ray_index = kernel_split_state.queue_data[add];
+ bool valid = (ray_index != QUEUE_EMPTY_SLOT) &&
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if (valid) {
+ value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
+ }
+ }
+ local_value[i + lid] = value;
+ local_index[i + lid] = i + lid;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- /* skip sorting for cpu split kernel */
+ /* skip sorting for cpu split kernel */
# ifdef __KERNEL_OPENCL__
- /* bitonic sort */
- for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
- for(uint inc = length; inc > 0; inc >>= 1) {
- for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
- uint i = lid + ii;
- bool direction = ((i & (length << 1)) != 0);
- uint j = i ^ inc;
- ushort ioff = local_index[i];
- ushort joff = local_index[j];
- uint iKey = local_value[ioff];
- uint jKey = local_value[joff];
- bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
- bool swap = smaller ^ (j < i) ^ direction;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- local_index[i] = (swap) ? joff : ioff;
- local_index[j] = (swap) ? ioff : joff;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
- }
- }
- }
-# endif /* __KERNEL_OPENCL__ */
+ /* bitonic sort */
+ for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+ for (uint inc = length; inc > 0; inc >>= 1) {
+ for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+ uint i = lid + ii;
+ bool direction = ((i & (length << 1)) != 0);
+ uint j = i ^ inc;
+ ushort ioff = local_index[i];
+ ushort joff = local_index[j];
+ uint iKey = local_value[ioff];
+ uint jKey = local_value[joff];
+ bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
+ bool swap = smaller ^ (j < i) ^ direction;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ local_index[i] = (swap) ? joff : ioff;
+ local_index[j] = (swap) ? ioff : joff;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ }
+ }
+ }
+# endif /* __KERNEL_OPENCL__ */
- /* copy to destination */
- for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
- uint idx = offset + i + lid;
- uint lidx = local_index[i + lid];
- uint outi = output + idx;
- uint ini = input + offset + lidx;
- uint value = local_value[lidx];
- if(idx < qsize) {
- kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
- }
- }
-#endif /* __KERNEL_CUDA__ */
+ /* copy to destination */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint lidx = local_index[i + lid];
+ uint outi = output + idx;
+ uint ini = input + offset + lidx;
+ uint value = local_value[lidx];
+ if (idx < qsize) {
+ kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT :
+ kernel_split_state.queue_data[ini];
+ }
+ }
+#endif /* __KERNEL_CUDA__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index fb08112503a..5d772fc597b 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for AO. */
ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
{
- unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = QUEUE_EMPTY_SLOT;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index < ao_queue_length) {
- ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- }
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index < ao_queue_length) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
+ if (ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
#endif
- kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd));
#ifdef __BRANCHED_PATH__
- }
- else {
- kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
- }
+ }
+ else {
+ kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput);
+ }
#endif
}
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index da072fd5f1a..82990ce9fae 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN
/* Shadow ray cast for direct visible light. */
ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
{
- unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- int ray_index = QUEUE_EMPTY_SLOT;
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index < dl_queue_length) {
- ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS,
- kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
- }
+ int ray_index = QUEUE_EMPTY_SLOT;
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index < dl_queue_length) {
+ ray_index = get_ray_index(kg,
+ thread_index,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ }
#ifdef __BRANCHED_PATH__
- /* TODO(mai): move this somewhere else? */
- if(thread_index == 0) {
- /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
- kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
- }
-#endif /* __BRANCHED_PATH__ */
+ /* TODO(mai): move this somewhere else? */
+ if (thread_index == 0) {
+ /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+ kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+ }
+#endif /* __BRANCHED_PATH__ */
- if(ray_index == QUEUE_EMPTY_SLOT)
- return;
+ if (ray_index == QUEUE_EMPTY_SLOT)
+ return;
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- Ray ray = kernel_split_state.light_ray[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- float3 throughput = kernel_split_state.throughput[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.light_ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ float3 throughput = kernel_split_state.throughput[ray_index];
- BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
- bool is_lamp = kernel_split_state.is_lamp[ray_index];
+ BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+ bool is_lamp = kernel_split_state.is_lamp[ray_index];
-# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
- bool use_branched = false;
- int all = 0;
+#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+ bool use_branched = false;
+ int all = 0;
- if(state->flag & PATH_RAY_SHADOW_CATCHER) {
- use_branched = true;
- all = 1;
- }
-# if defined(__BRANCHED_PATH__)
- else if(kernel_data.integrator.branched) {
- use_branched = true;
+ if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ use_branched = true;
+ all = 1;
+ }
+# if defined(__BRANCHED_PATH__)
+ else if (kernel_data.integrator.branched) {
+ use_branched = true;
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
- all = (kernel_data.integrator.sample_all_lights_indirect);
- }
- else
- {
- all = (kernel_data.integrator.sample_all_lights_direct);
- }
- }
-# endif /* __BRANCHED_PATH__ */
+ if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ all = (kernel_data.integrator.sample_all_lights_indirect);
+ }
+ else {
+ all = (kernel_data.integrator.sample_all_lights_direct);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
- if(use_branched) {
- kernel_branched_path_surface_connect_light(kg,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- all);
- }
- else
-# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
- {
- /* trace shadow ray */
- float3 shadow;
+ if (use_branched) {
+ kernel_branched_path_surface_connect_light(
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+ }
+ else
+#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+ {
+ /* trace shadow ray */
+ float3 shadow;
- if(!shadow_blocked(kg,
- sd,
- emission_sd,
- state,
- &ray,
- &shadow))
- {
- /* accumulate */
- path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
- }
- else {
- path_radiance_accum_total_light(L, state, throughput, &L_light);
- }
- }
+ if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, state, throughput, &L_light);
+ }
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4b86696691a..384bc952460 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __KERNEL_SPLIT_H__
-#define __KERNEL_SPLIT_H__
+#ifndef __KERNEL_SPLIT_H__
+#define __KERNEL_SPLIT_H__
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
@@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
{
- ccl_global char *ray_state = kernel_split_state.ray_state;
+ ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
# ifdef __SUBSURFACE__
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
-
- if(ss_indirect->num_rays) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
- else
-# endif /* __SUBSURFACE__ */
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
- int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
-
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
-
- path_radiance_sum_indirect(L);
- path_radiance_accum_sample(orig_ray_L, L);
-
- atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
-
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
- }
- else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- }
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+
+ if (ss_indirect->num_rays) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+ else
+# endif /* __SUBSURFACE__ */
+ if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+ int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_accum_sample(orig_ray_L, L);
+
+ atomic_fetch_and_dec_uint32(
+ (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+ }
+ else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
#else
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
#endif
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_H__ */
+#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 3f6b3977d79..433b1221a37 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
{
- (void) kg; /* Unused on CPU. */
+ (void)kg; /* Unused on CPU. */
- uint64_t size = 0;
-#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
- size = size SPLIT_DATA_ENTRIES;
+ uint64_t size = 0;
+#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16)
+ size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
#ifdef __BRANCHED_PATH__
- size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
#endif
- size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
- return size;
+ return size;
}
ccl_device_inline void split_data_init(KernelGlobals *kg,
@@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
ccl_global void *data,
ccl_global char *ray_state)
{
- (void) kg; /* Unused on CPU. */
+ (void)kg; /* Unused on CPU. */
- ccl_global char *p = (ccl_global char*)data;
+ ccl_global char *p = (ccl_global char *)data;
#define SPLIT_DATA_ENTRY(type, name, num) \
- split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
- SPLIT_DATA_ENTRIES;
+ split_data->name = (type *)p; \
+ p += align_up(num_elements * num * sizeof(type), 16);
+ SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1);
#ifdef __BRANCHED_PATH__
- split_data->_branched_state_sd = (ShaderData*)p;
- p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ split_data->_branched_state_sd = (ShaderData *)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
#endif
- split_data->_sd = (ShaderData*)p;
- p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+ split_data->_sd = (ShaderData *)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
- split_data->ray_state = ray_state;
+ split_data->ray_state = ray_state;
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_DATA_H__ */
+#endif /* __KERNEL_SPLIT_DATA_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 83df1e2a0a6..6ff3f5bdb55 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN
/* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
typedef struct SplitParams {
- WorkTile tile;
- uint total_work_size;
+ WorkTile tile;
+ uint total_work_size;
- ccl_global unsigned int *work_pools;
+ ccl_global unsigned int *work_pools;
- ccl_global int *queue_index;
- int queue_size;
- ccl_global char *use_queues_flag;
+ ccl_global int *queue_index;
+ int queue_size;
+ ccl_global char *use_queues_flag;
- /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
- int dummy_sd_flag;
+ /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+ int dummy_sd_flag;
} SplitParams;
/* Global memory variables [porting]; These memory is used for
@@ -46,98 +46,98 @@ typedef struct SplitParams {
#ifdef __BRANCHED_PATH__
typedef ccl_global struct SplitBranchedState {
- /* various state that must be kept and restored after an indirect loop */
- PathState path_state;
- float3 throughput;
- Ray ray;
+ /* various state that must be kept and restored after an indirect loop */
+ PathState path_state;
+ float3 throughput;
+ Ray ray;
- Intersection isect;
+ Intersection isect;
- char ray_state;
+ char ray_state;
- /* indirect loop state */
- int next_closure;
- int next_sample;
+ /* indirect loop state */
+ int next_closure;
+ int next_sample;
-#ifdef __SUBSURFACE__
- int ss_next_closure;
- int ss_next_sample;
- int next_hit;
- int num_hits;
-
- uint lcg_state;
- LocalIntersection ss_isect;
-#endif /*__SUBSURFACE__ */
-
- int shared_sample_count; /* number of branched samples shared with other threads */
- int original_ray; /* index of original ray when sharing branched samples */
- bool waiting_on_shared_samples;
+# ifdef __SUBSURFACE__
+ int ss_next_closure;
+ int ss_next_sample;
+ int next_hit;
+ int num_hits;
+
+ uint lcg_state;
+ LocalIntersection ss_isect;
+# endif /*__SUBSURFACE__ */
+
+ int shared_sample_count; /* number of branched samples shared with other threads */
+ int original_ray; /* index of original ray when sharing branched samples */
+ bool waiting_on_shared_samples;
} SplitBranchedState;
-#define SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \
- SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
+# define SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0)
#else
-#define SPLIT_DATA_BRANCHED_ENTRIES
-#endif /* __BRANCHED_PATH__ */
+# define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
#ifdef __SUBSURFACE__
# define SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
+ SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1)
#else
# define SPLIT_DATA_SUBSURFACE_ENTRIES
-#endif /* __SUBSURFACE__ */
+#endif /* __SUBSURFACE__ */
#ifdef __VOLUME__
-# define SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
+# define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1)
#else
# define SPLIT_DATA_VOLUME_ENTRIES
-#endif /* __VOLUME__ */
+#endif /* __VOLUME__ */
#define SPLIT_DATA_ENTRIES \
- SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
- SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
- SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
- SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
- SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY( \
+ ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \
+ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
- SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
- SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
- SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
- SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
- SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
- SPLIT_DATA_SUBSURFACE_ENTRIES \
- SPLIT_DATA_VOLUME_ENTRIES \
- SPLIT_DATA_BRANCHED_ENTRIES \
- SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
- SPLIT_DATA_ENTRIES
+ SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
- * the host easily) but is still used the same as the other data so we have it here in this struct as well
- */
- ccl_global char *ray_state;
+ /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
+ * the host easily) but is still used the same as the other data so we have it here in this struct as well
+ */
+ ccl_global char *ray_state;
} SplitData;
#ifndef __KERNEL_CUDA__
@@ -148,30 +148,30 @@ __device__ SplitData __split_data;
# define kernel_split_state (__split_data)
__device__ SplitParams __split_param_data;
# define kernel_split_params (__split_param_data)
-#endif /* __KERNEL_CUDA__ */
+#endif /* __KERNEL_CUDA__ */
-#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
- ( \
- ((ccl_global char*)kernel_split_state._##sd) + \
- (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
- ))
+#define kernel_split_sd(sd, ray_index) \
+ ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \
+ (sizeof(ShaderData) + \
+ sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \
+ (ray_index)))
/* Local storage for queue_enqueue kernel. */
typedef struct QueueEnqueueLocals {
- uint queue_atomics[2];
+ uint queue_atomics[2];
} QueueEnqueueLocals;
/* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */
typedef struct BackgroundAOLocals {
- uint queue_atomics_bg;
- uint queue_atomics_ao;
+ uint queue_atomics_bg;
+ uint queue_atomics_ao;
} BackgroundAOLocals;
typedef struct ShaderSortLocals {
- uint local_value[SHADER_SORT_BLOCK_SIZE];
- ushort local_index[SHADER_SORT_BLOCK_SIZE];
+ uint local_value[SHADER_SORT_BLOCK_SIZE];
+ ushort local_index[SHADER_SORT_BLOCK_SIZE];
} ShaderSortLocals;
CCL_NAMESPACE_END
-#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
+#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 08769fe303b..ba06ae3bc53 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
-ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg,
+ int ray_index)
{
- kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- branched_state->ss_next_closure = 0;
- branched_state->ss_next_sample = 0;
+ branched_state->ss_next_closure = 0;
+ branched_state->ss_next_sample = 0;
- branched_state->num_hits = 0;
- branched_state->next_hit = 0;
+ branched_state->num_hits = 0;
+ branched_state->next_hit = 0;
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
}
-ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(
+ KernelGlobals *kg, int ray_index)
{
- SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
-
- ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(!CLOSURE_IS_BSSRDF(sc->type))
- continue;
-
- /* Closure memory will be overwritten, so read required variables now. */
- Bssrdf *bssrdf = (Bssrdf *)sc;
- ClosureType bssrdf_type = sc->type;
- float bssrdf_roughness = bssrdf->roughness;
-
- /* set up random number generator */
- if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
- branched_state->next_closure == 0 && branched_state->next_sample == 0)
- {
- branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
- 0x68bc21eb);
- }
- int num_samples = kernel_data.integrator.subsurface_samples * 3;
- float num_samples_inv = 1.0f/num_samples;
- uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
-
- /* do subsurface scatter step with copy of shader data, this will
- * replace the BSSRDF with a diffuse BSDF closure */
- for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
- ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
- *hit_state = branched_state->path_state;
- hit_state->rng_hash = bssrdf_rng_hash;
- path_state_branch(hit_state, j, num_samples);
-
- ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
- float bssrdf_u, bssrdf_v;
- path_branched_rng_2D(kg,
- bssrdf_rng_hash,
- hit_state,
- j,
- num_samples,
- PRNG_BSDF_U,
- &bssrdf_u,
- &bssrdf_v);
-
- /* intersection is expensive so avoid doing multiple times for the same input */
- if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
- uint lcg_state = branched_state->lcg_state;
- LocalIntersection ss_isect_private;
-
- branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
- &ss_isect_private,
- sd,
- hit_state,
- sc,
- &lcg_state,
- bssrdf_u, bssrdf_v,
- true);
-
- branched_state->lcg_state = lcg_state;
- *ss_isect = ss_isect_private;
- }
-
- hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef __VOLUME__
- Ray volume_ray = branched_state->ray;
- bool need_update_volume_stack =
- kernel_data.integrator.use_volumes &&
- sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#endif /* __VOLUME__ */
-
- /* compute lighting with the BSDF closure */
- for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
- ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
- *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
- * important as the indirect path will write into bssrdf_sd */
-
- LocalIntersection ss_isect_private = *ss_isect;
- subsurface_scatter_multi_setup(kg,
- &ss_isect_private,
- hit,
- bssrdf_sd,
- hit_state,
- bssrdf_type,
- bssrdf_roughness);
- *ss_isect = ss_isect_private;
-
-#ifdef __VOLUME__
- if(need_update_volume_stack) {
- /* Setup ray from previous surface point to the new one. */
- float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
- volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
-
- for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
- hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
- }
-
- kernel_volume_stack_update_for_subsurface(kg,
- emission_sd,
- &volume_ray,
- hit_state->volume_stack);
- }
-#endif /* __VOLUME__ */
-
-#ifdef __EMISSION__
- if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
- /* direct light */
- if(kernel_data.integrator.use_direct_light) {
- int all = (kernel_data.integrator.sample_all_lights_direct) ||
- (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
- kernel_branched_path_surface_connect_light(kg,
- bssrdf_sd,
- emission_sd,
- hit_state,
- branched_state->throughput,
- num_samples_inv,
- L,
- all);
- }
- }
-#endif /* __EMISSION__ */
-
- /* indirect light */
- if(kernel_split_branched_path_surface_indirect_light_iter(kg,
- ray_index,
- num_samples_inv,
- bssrdf_sd,
- false,
- false))
- {
- branched_state->ss_next_closure = i;
- branched_state->ss_next_sample = j;
- branched_state->next_hit = hit;
-
- return true;
- }
-
- branched_state->next_closure = 0;
- }
-
- branched_state->next_hit = 0;
- }
-
- branched_state->ss_next_sample = 0;
- }
-
- branched_state->ss_next_closure = sd->num_closure;
-
- branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
- if(branched_state->waiting_on_shared_samples) {
- return true;
- }
-
- kernel_split_branched_path_indirect_loop_end(kg, ray_index);
-
- return false;
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* Closure memory will be overwritten, so read required variables now. */
+ Bssrdf *bssrdf = (Bssrdf *)sc;
+ ClosureType bssrdf_type = sc->type;
+ float bssrdf_roughness = bssrdf->roughness;
+
+ /* set up random number generator */
+ if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+ branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state,
+ 0x68bc21eb);
+ }
+ int num_samples = kernel_data.integrator.subsurface_samples * 3;
+ float num_samples_inv = 1.0f / num_samples;
+ uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for (int j = branched_state->ss_next_sample; j < num_samples; j++) {
+ ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+ *hit_state = branched_state->path_state;
+ hit_state->rng_hash = bssrdf_rng_hash;
+ path_state_branch(hit_state, j, num_samples);
+
+ ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(
+ kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+ /* intersection is expensive so avoid doing multiple times for the same input */
+ if (branched_state->next_hit == 0 && branched_state->next_closure == 0 &&
+ branched_state->next_sample == 0) {
+ uint lcg_state = branched_state->lcg_state;
+ LocalIntersection ss_isect_private;
+
+ branched_state->num_hits = subsurface_scatter_multi_intersect(
+ kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+ branched_state->lcg_state = lcg_state;
+ *ss_isect = ss_isect_private;
+ }
+
+ hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+# ifdef __VOLUME__
+ Ray volume_ray = branched_state->ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+# endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+ ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
+ *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+ * important as the indirect path will write into bssrdf_sd */
+
+ LocalIntersection ss_isect_private = *ss_isect;
+ subsurface_scatter_multi_setup(
+ kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness);
+ *ss_isect = ss_isect_private;
+
+# ifdef __VOLUME__
+ if (need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(
+ kg, emission_sd, &volume_ray, hit_state->volume_stack);
+ }
+# endif /* __VOLUME__ */
+
+# ifdef __EMISSION__
+ if (branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (hit_state->flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(kg,
+ bssrdf_sd,
+ emission_sd,
+ hit_state,
+ branched_state->throughput,
+ num_samples_inv,
+ L,
+ all);
+ }
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ if (kernel_split_branched_path_surface_indirect_light_iter(
+ kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) {
+ branched_state->ss_next_closure = i;
+ branched_state->ss_next_sample = j;
+ branched_state->next_hit = hit;
+
+ return true;
+ }
+
+ branched_state->next_closure = 0;
+ }
+
+ branched_state->next_hit = 0;
+ }
+
+ branched_state->ss_next_sample = 0;
+ }
+
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if (branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ return false;
}
-#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
{
- int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(thread_index == 0) {
- /* We will empty both queues in this kernel. */
- kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- }
-
- int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- ray_index = get_ray_index(kg, ray_index,
- QUEUE_ACTIVE_AND_REGENERATED_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
- get_ray_index(kg, thread_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+ }
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ ray_index = get_ray_index(kg,
+ ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+ get_ray_index(kg,
+ thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
#ifdef __SUBSURFACE__
- ccl_global char *ray_state = kernel_split_state.ray_state;
-
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- ShaderData *sd = kernel_split_sd(sd, ray_index);
- ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
-
- if(sd->flag & SD_BSSRDF) {
-
-#ifdef __BRANCHED_PATH__
- if(!kernel_data.integrator.branched ||
- IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
- {
-#endif
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- ray,
- throughput,
- ss_indirect))
- {
- kernel_split_path_end(kg, ray_index);
- }
-#ifdef __BRANCHED_PATH__
- }
- else {
- kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
-
- if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-#endif
- }
- }
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
+
+ if (sd->flag & SD_BSSRDF) {
# ifdef __BRANCHED_PATH__
- if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
- kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
- }
-
- /* iter loop */
- ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
- QUEUE_SUBSURFACE_INDIRECT_ITER,
- kernel_split_state.queue_data,
- kernel_split_params.queue_size,
- 1);
-
- if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
- path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
-
- if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- }
-# endif /* __BRANCHED_PATH__ */
-
-#endif /* __SUBSURFACE__ */
+ if (!kernel_data.integrator.branched ||
+ IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif
+ if (kernel_path_subsurface_scatter(
+ kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+# ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+ if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif
+ }
+ }
+# ifdef __BRANCHED_PATH__
+ if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
+ }
+
+ /* iter loop */
+ ray_index = get_ray_index(kg,
+ ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END