diff options
author | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:17:24 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:21:24 +0300 |
commit | e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch) | |
tree | 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel/split | |
parent | b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff) |
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211.
For details on usage and instructions for migrating branches
without conflicts, see:
https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/cycles/kernel/split')
23 files changed, 1713 insertions, 1759 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index ed0a82067f1..e08d87ab618 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ /* sets up the various state needed to do an indirect loop */ -ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - /* save a copy of the state to restore later */ -#define BRANCHED_STORE(name) \ - branched_state->name = kernel_split_state.name[ray_index]; + /* save a copy of the state to restore later */ +# define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index]; - BRANCHED_STORE(path_state); - BRANCHED_STORE(throughput); - BRANCHED_STORE(ray); - BRANCHED_STORE(isect); - BRANCHED_STORE(ray_state); + BRANCHED_STORE(path_state); + BRANCHED_STORE(throughput); + BRANCHED_STORE(ray); + BRANCHED_STORE(isect); + BRANCHED_STORE(ray_state); - *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index); - for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { - kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; - } + *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index); + for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { + kernel_split_sd(branched_state_sd, ray_index)->closure[i] = + kernel_split_sd(sd, ray_index)->closure[i]; + } -#undef BRANCHED_STORE +# undef BRANCHED_STORE - /* set loop counters to intial position */ - branched_state->next_closure = 0; - branched_state->next_sample = 0; + /* set loop counters to intial position */ + branched_state->next_closure = 0; + branched_state->next_sample = 0; } /* ends an indirect loop and restores the previous state */ -ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - /* restore state */ -#define BRANCHED_RESTORE(name) \ - kernel_split_state.name[ray_index] = branched_state->name; + /* restore state */ +# define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name; - BRANCHED_RESTORE(path_state); - BRANCHED_RESTORE(throughput); - BRANCHED_RESTORE(ray); - BRANCHED_RESTORE(isect); - BRANCHED_RESTORE(ray_state); + BRANCHED_RESTORE(path_state); + BRANCHED_RESTORE(throughput); + BRANCHED_RESTORE(ray); + BRANCHED_RESTORE(isect); + BRANCHED_RESTORE(ray_state); - *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index); - for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { - kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i]; - } + *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index); + for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { + kernel_split_sd(sd, ray_index)->closure[i] = + kernel_split_sd(branched_state_sd, ray_index)->closure[i]; + } -#undef BRANCHED_RESTORE +# undef BRANCHED_RESTORE - /* leave indirect loop */ - REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); + /* leave indirect loop */ + REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); } -ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index) +ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, + int ray_index) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; - int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index); + int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + kernel_split_params.queue_index); - if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { - return false; - } + if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { + return false; + } -#define SPLIT_DATA_ENTRY(type, name, num) \ - if(num) { \ - kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \ - } - SPLIT_DATA_ENTRIES_BRANCHED_SHARED -#undef SPLIT_DATA_ENTRY +# define SPLIT_DATA_ENTRY(type, name, num) \ + if (num) { \ + kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \ + } + SPLIT_DATA_ENTRIES_BRANCHED_SHARED +# undef SPLIT_DATA_ENTRY - *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index); - for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) { - kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; - } + *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index); + for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) { + kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; + } - kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; - kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; - kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; + kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; + kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; + kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; - path_radiance_init(inactive_L, kernel_data.film.use_light_pass); - path_radiance_copy_indirect(inactive_L, L); + path_radiance_init(inactive_L, kernel_data.film.use_light_pass); + path_radiance_copy_indirect(inactive_L, L); - ray_state[inactive_ray] = RAY_REGENERATED; - ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); - ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); + ray_state[inactive_ray] = RAY_REGENERATED; + ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); + ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); - atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count); + atomic_fetch_and_inc_uint32( + (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count); - return true; + return true; } /* bounce off surface and integrate indirect light */ -ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg, - int ray_index, - float num_samples_adjust, - ShaderData *saved_sd, - bool reset_path_state, - bool wait_for_shared) +ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( + KernelGlobals *kg, + int ray_index, + float num_samples_adjust, + ShaderData *saved_sd, + bool reset_path_state, + bool wait_for_shared) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - - ShaderData *sd = saved_sd; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - float3 throughput = branched_state->throughput; - ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; - - float sum_sample_weight = 0.0f; -#ifdef __DENOISING_FEATURES__ - if(ps->denoising_feature_weight > 0.0f) { - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - /* transparency is not handled here, but in outer loop */ - if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - continue; - } - - sum_sample_weight += sc->sample_weight; - } - } - else { - sum_sample_weight = 1.0f; - } -#endif /* __DENOISING_FEATURES__ */ - - for(int i = branched_state->next_closure; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSDF(sc->type)) - continue; - /* transparency is not handled here, but in outer loop */ - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) - continue; - - int num_samples; - - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - num_samples = kernel_data.integrator.diffuse_samples; - else if(CLOSURE_IS_BSDF_BSSRDF(sc->type)) - num_samples = 1; - else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - num_samples = kernel_data.integrator.glossy_samples; - else - num_samples = kernel_data.integrator.transmission_samples; - - num_samples = ceil_to_int(num_samples_adjust*num_samples); - - float num_samples_inv = num_samples_adjust/num_samples; - - for(int j = branched_state->next_sample; j < num_samples; j++) { - if(reset_path_state) { - *ps = branched_state->path_state; - } - - ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); - - ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; - *tp = throughput; - - ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index]; - - if(!kernel_branched_path_surface_bounce(kg, - sd, - sc, - j, - num_samples, - tp, - ps, - &L->state, - bsdf_ray, - sum_sample_weight)) - { - continue; - } - - ps->rng_hash = branched_state->path_state.rng_hash; - - /* update state for next iteration */ - branched_state->next_closure = i; - branched_state->next_sample = j+1; - - /* start the indirect path */ - *tp *= num_samples_inv; - - if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { - continue; - } - - return true; - } - - branched_state->next_sample = 0; - } - - branched_state->next_closure = sd->num_closure; - - if(wait_for_shared) { - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } - } - - return false; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + + ShaderData *sd = saved_sd; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + float3 throughput = branched_state->throughput; + ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; + + float sum_sample_weight = 0.0f; +# ifdef __DENOISING_FEATURES__ + if (ps->denoising_feature_weight > 0.0f) { + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + /* transparency is not handled here, but in outer loop */ + if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + continue; + } + + sum_sample_weight += sc->sample_weight; + } + } + else { + sum_sample_weight = 1.0f; + } +# endif /* __DENOISING_FEATURES__ */ + + for (int i = branched_state->next_closure; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF(sc->type)) + continue; + /* transparency is not handled here, but in outer loop */ + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) + continue; + + int num_samples; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + num_samples = kernel_data.integrator.diffuse_samples; + else if (CLOSURE_IS_BSDF_BSSRDF(sc->type)) + num_samples = 1; + else if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + num_samples = kernel_data.integrator.glossy_samples; + else + num_samples = kernel_data.integrator.transmission_samples; + + num_samples = ceil_to_int(num_samples_adjust * num_samples); + + float num_samples_inv = num_samples_adjust / num_samples; + + for (int j = branched_state->next_sample; j < num_samples; j++) { + if (reset_path_state) { + *ps = branched_state->path_state; + } + + ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); + + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; + *tp = throughput; + + ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index]; + + if (!kernel_branched_path_surface_bounce( + kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) { + continue; + } + + ps->rng_hash = branched_state->path_state.rng_hash; + + /* update state for next iteration */ + branched_state->next_closure = i; + branched_state->next_sample = j + 1; + + /* start the indirect path */ + *tp *= num_samples_inv; + + if (kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + + return true; + } + + branched_state->next_sample = 0; + } + + branched_state->next_closure = sd->num_closure; + + if (wait_for_shared) { + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } + } + + return false; } -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 18eec6372f1..e77743350dc 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_buffer_update(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(ray_index == 0) { - /* We will empty this queue in this kernel. */ - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - char enqueue_flag = 0; - ray_index = get_ray_index(kg, ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (ray_index == 0) { + /* We will empty this queue in this kernel. */ + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + char enqueue_flag = 0; + ray_index = get_ray_index(kg, + ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } #endif #ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { + if (ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global char *ray_state = kernel_split_state.ray_state; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - bool ray_was_updated = false; - - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { - ray_was_updated = true; - uint sample = state->sample; - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - - /* accumulate result in output buffer */ - kernel_write_result(kg, buffer, sample, L); - - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); - } - - if(kernel_data.film.cryptomatte_passes) { - /* Make sure no thread is writing to the buffers. */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; - kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); - } - } - - if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { - /* We have completed current work; So get next work */ - ccl_global uint *work_pools = kernel_split_params.work_pools; - uint total_work_size = kernel_split_params.total_work_size; - uint work_index; - - if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { - /* If work is invalid, this means no more work is available and the thread may exit */ - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); - } - - if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { - ccl_global WorkTile *tile = &kernel_split_params.tile; - uint x, y, sample; - get_work_pixel(tile, work_index, &x, &y, &sample); - - /* Store buffer offset for writing to passes. */ - uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; - kernel_split_state.buffer_offset[ray_index] = buffer_offset; - - /* Initialize random numbers and ray. */ - uint rng_hash; - kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray); - - if(ray->t != 0.0f) { - /* Initialize throughput, path radiance, Ray, PathState; - * These rays proceed with path-iteration. - */ - *throughput = make_float3(1.0f, 1.0f, 1.0f); - path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - state, - rng_hash, - sample, - ray); + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + bool ray_was_updated = false; + + if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ray_was_updated = true; + uint sample = state->sample; + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + + /* accumulate result in output buffer */ + kernel_write_result(kg, buffer, sample, L); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + + if (kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } + } + + if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { + /* We have completed current work; So get next work */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; + + if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { + /* If work is invalid, this means no more work is available and the thread may exit */ + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + + if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); + + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; + + /* Initialize random numbers and ray. */ + uint rng_hash; + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray); + + if (ray->t != 0.0f) { + /* Initialize throughput, path radiance, Ray, PathState; + * These rays proceed with path-iteration. + */ + *throughput = make_float3(1.0f, 1.0f, 1.0f); + path_radiance_init(L, kernel_data.film.use_light_pass); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + state, + rng_hash, + sample, + ray); #ifdef __SUBSURFACE__ - kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - enqueue_flag = 1; - } - else { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); - } - } - } + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + enqueue_flag = 1; + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + } + } #ifndef __COMPUTE_DEVICE_GPU__ - } + } #endif - /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; - * These rays will be made active during next SceneIntersectkernel. - */ - enqueue_ray_index_local(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; + * These rays will be made active during next SceneIntersectkernel. + */ + enqueue_ray_index_local(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 77fb61b80a8..52930843f56 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -28,82 +28,88 @@ ccl_device void kernel_data_init( #else void KERNEL_FUNCTION_FULL_NAME(data_init)( #endif - KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, + KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, #ifdef __KERNEL_OPENCL__ - KERNEL_BUFFER_PARAMS, + KERNEL_BUFFER_PARAMS, #endif - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* size (capacity) of the queue */ - ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_pools, /* Work pool for each work group */ - unsigned int num_samples, - ccl_global float *buffer) + int start_sample, + int end_sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride, + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char * + use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_pools, /* Work pool for each work group */ + unsigned int num_samples, + ccl_global float *buffer) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, data_init); + STUB_ASSERT(KERNEL_ARCH, data_init); #else -#ifdef __KERNEL_OPENCL__ - kg->data = data; -#endif +# ifdef __KERNEL_OPENCL__ + kg->data = data; +# endif - kernel_split_params.tile.x = sx; - kernel_split_params.tile.y = sy; - kernel_split_params.tile.w = sw; - kernel_split_params.tile.h = sh; + kernel_split_params.tile.x = sx; + kernel_split_params.tile.y = sy; + kernel_split_params.tile.w = sw; + kernel_split_params.tile.h = sh; - kernel_split_params.tile.start_sample = start_sample; - kernel_split_params.tile.num_samples = num_samples; + kernel_split_params.tile.start_sample = start_sample; + kernel_split_params.tile.num_samples = num_samples; - kernel_split_params.tile.offset = offset; - kernel_split_params.tile.stride = stride; + kernel_split_params.tile.offset = offset; + kernel_split_params.tile.stride = stride; - kernel_split_params.tile.buffer = buffer; + kernel_split_params.tile.buffer = buffer; - kernel_split_params.total_work_size = sw * sh * num_samples; + kernel_split_params.total_work_size = sw * sh * num_samples; - kernel_split_params.work_pools = work_pools; + kernel_split_params.work_pools = work_pools; - kernel_split_params.queue_index = Queue_index; - kernel_split_params.queue_size = queuesize; - kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.queue_index = Queue_index; + kernel_split_params.queue_size = queuesize; + kernel_split_params.use_queues_flag = use_queues_flag; - split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); + split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); -#ifdef __KERNEL_OPENCL__ - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); -#endif +# ifdef __KERNEL_OPENCL__ + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); +# endif + + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + /* Initialize queue data and queue index. */ + if (thread_index < queuesize) { + for (int i = 0; i < NUM_QUEUES; i++) { + kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + } + + if (thread_index == 0) { + for (int i = 0; i < NUM_QUEUES; i++) { + Queue_index[i] = 0; + } - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - - /* Initialize queue data and queue index. */ - if(thread_index < queuesize) { - for(int i = 0; i < NUM_QUEUES; i++) { - kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT; - } - } - - if(thread_index == 0) { - for(int i = 0; i < NUM_QUEUES; i++) { - Queue_index[i] = 0; - } - - /* The scene-intersect kernel should not use the queues very first time. - * since the queue would be empty. - */ - *use_queues_flag = 0; - } -#endif /* KERENL_STUB */ + /* The scene-intersect kernel should not use the queues very first time. + * since the queue would be empty. + */ + *use_queues_flag = 0; + } +#endif /* KERENL_STUB */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index ca79602c565..b2ca59d60cc 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_direct_lighting(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); - - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - - /* direct lighting */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + + /* direct lighting */ #ifdef __EMISSION__ - bool flag = (kernel_data.integrator.use_direct_light && - (sd->flag & SD_BSDF_HAS_EVAL)); + bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)); # ifdef __BRANCHED_PATH__ - if(flag && kernel_data.integrator.branched) { - flag = false; - enqueue_flag = 1; - } -# endif /* __BRANCHED_PATH__ */ + if (flag && kernel_data.integrator.branched) { + flag = false; + enqueue_flag = 1; + } +# endif /* __BRANCHED_PATH__ */ # ifdef __SHADOW_TRICKS__ - if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) { - flag = false; - enqueue_flag = 1; - } -# endif /* __SHADOW_TRICKS__ */ - - if(flag) { - /* Sample illumination from lights to find path contribution. */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_state_rng_light_termination(kg, state); - - LightSample ls; - if(light_sample(kg, - light_u, light_v, - sd->time, - sd->P, - state->bounce, - &ls)) { - - Ray light_ray; - light_ray.time = sd->time; - - BsdfEval L_light; - bool is_lamp; - if(direct_emission(kg, - sd, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - &ls, - state, - &light_ray, - &L_light, - &is_lamp, - terminate)) - { - /* Write intermediate data to global memory to access from - * the next kernel. - */ - kernel_split_state.light_ray[ray_index] = light_ray; - kernel_split_state.bsdf_eval[ray_index] = L_light; - kernel_split_state.is_lamp[ray_index] = is_lamp; - /* Mark ray state for next shadow kernel. */ - enqueue_flag = 1; - } - } - } -#endif /* __EMISSION__ */ - } + if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) { + flag = false; + enqueue_flag = 1; + } +# endif /* __SHADOW_TRICKS__ */ + + if (flag) { + /* Sample illumination from lights to find path contribution. */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_state_rng_light_termination(kg, state); + + LightSample ls; + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + + Ray light_ray; + light_ray.time = sd->time; + + BsdfEval L_light; + bool is_lamp; + if (direct_emission(kg, + sd, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &ls, + state, + &light_ray, + &L_light, + &is_lamp, + terminate)) { + /* Write intermediate data to global memory to access from + * the next kernel. + */ + kernel_split_state.light_ray[ray_index] = light_ray; + kernel_split_state.bsdf_eval[ray_index] = L_light; + kernel_split_state.is_lamp[ray_index] = is_lamp; + /* Mark ray state for next shadow kernel. */ + enqueue_flag = 1; + } + } + } +#endif /* __EMISSION__ */ + } #ifdef __EMISSION__ - /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_DL_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #endif #ifdef __BRANCHED_PATH__ - /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays - * this is the last kernel before next_iteration_setup that uses local atomics so we do this here - */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_LIGHT_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); - -#endif /* __BRANCHED_PATH__ */ + /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays + * this is the last kernel before next_iteration_setup that uses local atomics so we do this here + */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_LIGHT_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index fb5bd3d48dd..45b839db05f 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN #if defined(__BRANCHED_PATH__) && defined(__VOLUME__) -ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, + int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT); } -ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index) +ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + ShaderData *sd = kernel_split_sd(sd, ray_index); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - /* GPU: no decoupled ray marching, scatter probalistically */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f / num_samples; - Ray volume_ray = branched_state->ray; - volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX; + Ray volume_ray = branched_state->ray; + volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? + branched_state->isect.t : + FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); + bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); - for(int j = branched_state->next_sample; j < num_samples; j++) { - ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; - *ps = branched_state->path_state; + for (int j = branched_state->next_sample; j < num_samples; j++) { + ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; + *ps = branched_state->path_state; - ccl_global Ray *pray = &kernel_split_state.ray[ray_index]; - *pray = branched_state->ray; + ccl_global Ray *pray = &kernel_split_state.ray[ray_index]; + *pray = branched_state->ray; - ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; - *tp = branched_state->throughput * num_samples_inv; + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; + *tp = branched_state->throughput * num_samples_inv; - /* branch RNG state */ - path_state_branch(ps, j, num_samples); + /* branch RNG state */ + path_state_branch(ps, j, num_samples); - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, ps, sd, &volume_ray, L, tp, heterogeneous); + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, ps, sd, &volume_ray, L, tp, heterogeneous); # ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); - - /* indirect light bounce */ - if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { - continue; - } - - /* start the indirect path */ - branched_state->next_closure = 0; - branched_state->next_sample = j+1; - - /* Attempting to share too many samples is slow for volumes as it causes us to - * loop here more and have many calls to kernel_volume_integrate which evaluates - * shaders. The many expensive shader evaluations cause the work load to become - * unbalanced and many threads to become idle in this kernel. Limiting the - * number of shared samples here helps quite a lot. - */ - if(branched_state->shared_sample_count < 2) { - if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { - continue; - } - } - - return true; - } + if (result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); + + /* indirect light bounce */ + if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { + continue; + } + + /* start the indirect path */ + branched_state->next_closure = 0; + branched_state->next_sample = j + 1; + + /* Attempting to share too many samples is slow for volumes as it causes us to + * loop here more and have many calls to kernel_volume_integrate which evaluates + * shaders. The many expensive shader evaluations cause the work load to become + * unbalanced and many threads to become idle in this kernel. Limiting the + * number of shared samples here helps quite a lot. + */ + if (branched_state->shared_sample_count < 2) { + if (kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + } + + return true; + } # endif - } + } - branched_state->next_sample = num_samples; + branched_state->next_sample = num_samples; - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } - kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_path_indirect_loop_end(kg, ray_index); - /* todo: avoid this calculation using decoupled ray marching */ - float3 throughput = kernel_split_state.throughput[ray_index]; - kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput); - kernel_split_state.throughput[ray_index] = throughput; + /* todo: avoid this calculation using decoupled ray marching */ + float3 throughput = kernel_split_state.throughput[ray_index]; + kernel_volume_shadow( + kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput); + kernel_split_state.throughput[ray_index] = throughput; - return false; + return false; } -#endif /* __BRANCHED_PATH__ && __VOLUME__ */ +#endif /* __BRANCHED_PATH__ && __VOLUME__ */ ccl_device void kernel_do_volume(KernelGlobals *kg) { #ifdef __VOLUME__ - /* We will empty this queue in this kernel. */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + /* We will empty this queue in this kernel. */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; # ifdef __BRANCHED_PATH__ - kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0; -# endif /* __BRANCHED_PATH__ */ - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - - if(*kernel_split_params.use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - } - - ccl_global char *ray_state = kernel_split_state.ray_state; - - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); - - /* Sanitize volume stack. */ - if(!hit) { - kernel_volume_clean_stack(kg, state->volume_stack); - } - /* volume attenuation, emission, scatter */ - if(state->volume_stack[0].shader != SHADER_NONE) { - Ray volume_ray = *ray; - volume_ray.t = (hit)? isect->t: FLT_MAX; + kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0; +# endif /* __BRANCHED_PATH__ */ + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + if (*kernel_split_params.use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } + + ccl_global char *ray_state = kernel_split_state.ray_state; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); + + /* Sanitize volume stack. */ + if (!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + /* volume attenuation, emission, scatter */ + if (state->volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = *ray; + volume_ray.t = (hit) ? isect->t : FLT_MAX; # ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { -# endif /* __BRANCHED_PATH__ */ - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + if (!kernel_data.integrator.branched || + IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +# endif /* __BRANCHED_PATH__ */ + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); - { - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, heterogeneous); # ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); - - /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_path_end(kg, ray_index); - } - } -# endif /* __VOLUME_SCATTER__ */ - } + if (result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); + + /* indirect light bounce */ + if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_path_end(kg, ray_index); + } + } +# endif /* __VOLUME_SCATTER__ */ + } # ifdef __BRANCHED_PATH__ - } - else { - kernel_split_branched_path_volume_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - } - } + } + else { + kernel_split_branched_path_volume_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + } + } # ifdef __BRANCHED_PATH__ - /* iter loop */ - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_VOLUME_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); - path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); - - if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - -#endif /* __VOLUME__ */ + /* iter loop */ + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_VOLUME_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); + path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); + + if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + +#endif /* __VOLUME__ */ } - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h index 496355bbc3a..31d2daef616 100644 --- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h +++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h @@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { #ifdef __BRANCHED_PATH__ - /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - char enqueue_flag = 0; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { - enqueue_flag = 1; - } + char enqueue_flag = 0; + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { + enqueue_flag = 1; + } - enqueue_ray_index_local(ray_index, - QUEUE_INACTIVE_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); -#endif /* __BRANCHED_PATH__ */ + enqueue_ray_index_local(ray_index, + QUEUE_INACTIVE_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index f14eecec2f2..63bc5a8e0ce 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( - KernelGlobals *kg, - ccl_local_param BackgroundAOLocals *locals) + KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - locals->queue_atomics_bg = 0; - locals->queue_atomics_ao = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + locals->queue_atomics_bg = 0; + locals->queue_atomics_ao = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); #ifdef __AO__ - char enqueue_flag = 0; + char enqueue_flag = 0; #endif - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); #ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif /* __COMPUTE_DEVICE_GPU__ */ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif /* __COMPUTE_DEVICE_GPU__ */ #ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { + if (ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global PathState *state = 0x0; - float3 throughput; - - ccl_global char *ray_state = kernel_split_state.ray_state; - ShaderData *sd = kernel_split_sd(sd, ray_index); - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - - throughput = kernel_split_state.throughput[ray_index]; - state = &kernel_split_state.path_state[ray_index]; - - if(!kernel_path_shader_apply(kg, - sd, - state, - ray, - throughput, - emission_sd, - L, - buffer)) - { - kernel_split_path_end(kg, ray_index); - } - } - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - /* Path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate. - */ - float probability = path_state_continuation_probability(kg, state, throughput); - - if(probability == 0.0f) { - kernel_split_path_end(kg, ray_index); - } - else if(probability < 1.0f) { - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); - if(terminate >= probability) { - kernel_split_path_end(kg, ray_index); - } - else { - kernel_split_state.throughput[ray_index] = throughput/probability; - } - } - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - kernel_update_denoising_features(kg, sd, state, L); - } - } + ccl_global PathState *state = 0x0; + float3 throughput; + + ccl_global char *ray_state = kernel_split_state.ray_state; + ShaderData *sd = kernel_split_sd(sd, ray_index); + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + + throughput = kernel_split_state.throughput[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + + if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) { + kernel_split_path_end(kg, ray_index); + } + } + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* Path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate. + */ + float probability = path_state_continuation_probability(kg, state, throughput); + + if (probability == 0.0f) { + kernel_split_path_end(kg, ray_index); + } + else if (probability < 1.0f) { + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + if (terminate >= probability) { + kernel_split_path_end(kg, ray_index); + } + else { + kernel_split_state.throughput[ray_index] = throughput / probability; + } + } + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + kernel_update_denoising_features(kg, sd, state, L); + } + } #ifdef __AO__ - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion) { - enqueue_flag = 1; - } - } -#endif /* __AO__ */ + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* ambient occlusion */ + if (kernel_data.integrator.use_ambient_occlusion) { + enqueue_flag = 1; + } + } +#endif /* __AO__ */ #ifndef __COMPUTE_DEVICE_GPU__ - } + } #endif #ifdef __AO__ - /* Enqueue to-shadow-ray-cast rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_AO_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - &locals->queue_atomics_ao, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue to-shadow-ray-cast rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &locals->queue_atomics_ao, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #endif } diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index 4cf88a02590..b1c65f61e2c 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_background(KernelGlobals *kg) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - int ray_index; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int ray_index; - if(kernel_data.integrator.ao_bounces != INT_MAX) { - ray_index = get_ray_index(kg, thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + if (kernel_data.integrator.ao_bounces != INT_MAX) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index != QUEUE_EMPTY_SLOT) { - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - if(path_state_ao_bounce(kg, state)) { - kernel_split_path_end(kg, ray_index); - } - } - } - } + if (ray_index != QUEUE_EMPTY_SLOT) { + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + if (path_state_ao_bounce(kg, state)) { + kernel_split_path_end(kg, ray_index); + } + } + } + } - ray_index = get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + ray_index = get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - kernel_path_background(kg, state, ray, throughput, sd, L); - kernel_split_path_end(kg, ray_index); - } + kernel_path_background(kg, state, ray, throughput, sd, L); + kernel_split_path_end(kg, ray_index); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h index 236c94e983c..3f48f8d6f56 100644 --- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h +++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h @@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_subsurface(KernelGlobals *kg) { - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index == 0) { - /* We will empty both queues in this kernel. */ - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } - int ray_index; - get_ray_index(kg, thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - ray_index = get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + int ray_index; + get_ray_index(kg, + thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + ray_index = get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __SUBSURFACE__ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ccl_global char *ray_state = kernel_split_state.ray_state; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { - ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - /* Trace indirect subsurface rays by restarting the loop. this uses less - * stack memory than invoking kernel_path_indirect. - */ - if(ss_indirect->num_rays) { - kernel_path_subsurface_setup_indirect(kg, - ss_indirect, - state, - ray, - L, - throughput); - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -#endif /* __SUBSURFACE__ */ + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if (ss_indirect->num_rays) { + kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +#endif /* __SUBSURFACE__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index 5b2c554b922..7ecb099208d 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_lamp_emission(KernelGlobals *kg) { #ifndef __VOLUME__ - /* We will empty this queue in this kernel. */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - } + /* We will empty this queue in this kernel. */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } #endif - /* Fetch use_queues_flag. */ - char local_use_queues_flag = *kernel_split_params.use_queues_flag; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Fetch use_queues_flag. */ + char local_use_queues_flag = *kernel_split_params.use_queues_flag; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(local_use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (local_use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, #ifndef __VOLUME__ - 1 + 1 #else - 0 + 0 #endif - ); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } + ); + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) - { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + float3 throughput = kernel_split_state.throughput[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); - } + kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index e388955f1af..781ce869374 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT); } ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; # ifdef __VOLUME__ - if(!(sd->flag & SD_HAS_ONLY_VOLUME)) { + if (!(sd->flag & SD_HAS_ONLY_VOLUME)) { # endif - /* continue in case of transparency */ - *throughput *= shader_bsdf_transparency(kg, sd); + /* continue in case of transparency */ + *throughput *= shader_bsdf_transparency(kg, sd); - if(is_zero(*throughput)) { - kernel_split_path_end(kg, ray_index); - return; - } + if (is_zero(*throughput)) { + kernel_split_path_end(kg, ray_index); + return; + } - /* Update Path State */ - path_state_next(kg, state, LABEL_TRANSPARENT); + /* Update Path State */ + path_state_next(kg, state, LABEL_TRANSPARENT); # ifdef __VOLUME__ - } - else { - if(!path_state_volume_next(kg, state)) { - kernel_split_path_end(kg, ray_index); - return; - } - } + } + else { + if (!path_state_volume_next(kg, state)) { + kernel_split_path_end(kg, ray_index); + return; + } + } # endif - ray->P = ray_offset(sd->P, -sd->Ng); - ray->t -= sd->ray_length; /* clipping works through transparent */ + ray->P = ray_offset(sd->P, -sd->Ng); + ray->t -= sd->ray_length; /* clipping works through transparent */ # ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; - ray->dD.dx = -sd->dI.dx; - ray->dD.dy = -sd->dI.dy; -# endif /* __RAY_DIFFERENTIALS__ */ + ray->dP = sd->dP; + ray->dD.dx = -sd->dI.dx; + ray->dD.dy = -sd->dI.dy; +# endif /* __RAY_DIFFERENTIALS__ */ # ifdef __VOLUME__ - /* enter/exit volume */ - kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); -# endif /* __VOLUME__ */ + /* enter/exit volume */ + kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); +# endif /* __VOLUME__ */ } -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - /* If we are here, then it means that scene-intersect kernel - * has already been executed atleast once. From the next time, - * scene-intersect kernel may operate on queues to fetch ray index - */ - *kernel_split_params.use_queues_flag = 1; - - /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and - * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the - * previous kernel. - */ - kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); - - ccl_global char *ray_state = kernel_split_state.ray_state; - -# ifdef __VOLUME__ - /* Reactivate only volume rays here, most surface work was skipped. */ - if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); - } -# endif + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + /* If we are here, then it means that scene-intersect kernel + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ + *kernel_split_params.use_queues_flag = 1; + + /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the + * previous kernel. + */ + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + ccl_global char *ray_state = kernel_split_state.ray_state; + +#ifdef __VOLUME__ + /* Reactivate only volume rays here, most surface work was skipped. */ + if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); + } +#endif - bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE); - if(active) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE); + if (active) { + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; #ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif - /* Compute direct lighting and next bounce. */ - if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) { - kernel_split_path_end(kg, ray_index); - } + /* Compute direct lighting and next bounce. */ + if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) { + kernel_split_path_end(kg, ray_index); + } #ifdef __BRANCHED_PATH__ - } - else if(sd->flag & SD_HAS_ONLY_VOLUME) { - kernel_split_branched_transparent_bounce(kg, ray_index); - } - else { - kernel_split_branched_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - 1.0f, - kernel_split_sd(branched_state_sd, ray_index), - true, - true)) - { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - kernel_split_branched_transparent_bounce(kg, ray_index); - } - } -#endif /* __BRANCHED_PATH__ */ - } - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + } + else if (sd->flag & SD_HAS_ONLY_VOLUME) { + kernel_split_branched_transparent_bounce(kg, ray_index); + } + else { + kernel_split_branched_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_transparent_bounce(kg, ray_index); + } + } +#endif /* __BRANCHED_PATH__ */ + } + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #ifdef __BRANCHED_PATH__ - /* iter loop */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0; - } - - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_LIGHT_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - 1.0f, - kernel_split_sd(branched_state_sd, ray_index), - true, - true)) - { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - kernel_split_branched_transparent_bounce(kg, ray_index); - } - } + /* iter loop */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0; + } + + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_LIGHT_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_transparent_bounce(kg, ray_index); + } + } # ifdef __VOLUME__ - /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_VOLUME_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); - -# endif /* __VOLUME__ */ + /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_VOLUME_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +# endif /* __VOLUME__ */ # ifdef __SUBSURFACE__ - /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_SUBSURFACE_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); -# endif /* __SUBSURFACE__ */ -#endif /* __BRANCHED_PATH__ */ + /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_SUBSURFACE_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +# endif /* __SUBSURFACE__ */ +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index fdd54225b07..3faa3208341 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN * * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE */ -ccl_device void kernel_path_init(KernelGlobals *kg) { - int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); +ccl_device void kernel_path_init(KernelGlobals *kg) +{ + int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); - /* This is the first assignment to ray_state; - * So we dont use ASSIGN_RAY_STATE macro. - */ - kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; + /* This is the first assignment to ray_state; + * So we dont use ASSIGN_RAY_STATE macro. + */ + kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; - /* Get work. */ - ccl_global uint *work_pools = kernel_split_params.work_pools; - uint total_work_size = kernel_split_params.total_work_size; - uint work_index; + /* Get work. */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; - if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { - /* No more work, mark ray as inactive */ - kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; + if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { + /* No more work, mark ray as inactive */ + kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; - return; - } + return; + } - ccl_global WorkTile *tile = &kernel_split_params.tile; - uint x, y, sample; - get_work_pixel(tile, work_index, &x, &y, &sample); + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - /* Store buffer offset for writing to passes. */ - uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; - kernel_split_state.buffer_offset[ray_index] = buffer_offset; + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; - /* Initialize random numbers and ray. */ - uint rng_hash; - kernel_path_trace_setup(kg, - sample, - x, y, - &rng_hash, - &kernel_split_state.ray[ray_index]); + /* Initialize random numbers and ray. */ + uint rng_hash; + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]); - if(kernel_split_state.ray[ray_index].t != 0.0f) { - /* Initialize throughput, path radiance, Ray, PathState; - * These rays proceed with path-iteration. - */ - kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); - path_state_init(kg, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - &kernel_split_state.path_state[ray_index], - rng_hash, - sample, - &kernel_split_state.ray[ray_index]); + if (kernel_split_state.ray[ray_index].t != 0.0f) { + /* Initialize throughput, path radiance, Ray, PathState; + * These rays proceed with path-iteration. + */ + kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); + path_radiance_init(&kernel_split_state.path_radiance[ray_index], + kernel_data.film.use_light_pass); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &kernel_split_state.path_state[ray_index], + rng_hash, + sample, + &kernel_split_state.ray[ray_index]); #ifdef __SUBSURFACE__ - kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - } - else { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); - } + } + else { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index df67fabab19..2db87f7a671 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. */ -ccl_device void kernel_queue_enqueue(KernelGlobals *kg, - ccl_local_param QueueEnqueueLocals *locals) +ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals) { - /* We have only 2 cases (Hit/Not-Hit) */ - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + /* We have only 2 cases (Hit/Not-Hit) */ + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(lidx == 0) { - locals->queue_atomics[0] = 0; - locals->queue_atomics[1] = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (lidx == 0) { + locals->queue_atomics[0] = 0; + locals->queue_atomics[1] = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int queue_number = -1; + int queue_number = -1; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { - queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - } - else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { - queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; - } + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { + queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + } + else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { + queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; + } - unsigned int my_lqidx; - if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int my_lqidx; + if (queue_number != -1) { + my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(lidx == 0) { - locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = - get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - locals->queue_atomics, - kernel_split_params.queue_index); - locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = - get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - locals->queue_atomics, - kernel_split_params.queue_index); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (lidx == 0) { + locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset( + QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index); + locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset( + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + locals->queue_atomics, + kernel_split_params.queue_index); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - unsigned int my_gqidx; - if(queue_number != -1) { - my_gqidx = get_global_queue_index(queue_number, - kernel_split_params.queue_size, - my_lqidx, - locals->queue_atomics); - kernel_split_state.queue_data[my_gqidx] = ray_index; - } + unsigned int my_gqidx; + if (queue_number != -1) { + my_gqidx = get_global_queue_index( + queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics); + kernel_split_state.queue_data[my_gqidx] = ray_index; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index f5378bc172b..5fef3e045f8 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_scene_intersect(KernelGlobals *kg) { - /* Fetch use_queues_flag */ - char local_use_queues_flag = *kernel_split_params.use_queues_flag; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Fetch use_queues_flag */ + char local_use_queues_flag = *kernel_split_params.use_queues_flag; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(local_use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (local_use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } - /* All regenerated rays become active here */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { + /* All regenerated rays become active here */ + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { #ifdef __BRANCHED_PATH__ - if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { - kernel_split_path_end(kg, ray_index); - } - else -#endif /* __BRANCHED_PATH__ */ - { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); - } - } + if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { + kernel_split_path_end(kg, ray_index); + } + else +#endif /* __BRANCHED_PATH__ */ + { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + } + } - if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - return; - } + if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + return; + } - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - Intersection isect; - bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L); - kernel_split_state.isect[ray_index] = isect; + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L); + kernel_split_state.isect[ray_index] = isect; - if(!hit) { - /* Change the state of rays that hit the background; - * These rays undergo special processing in the - * background_bufferUpdate kernel. - */ - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); - } + if (!hit) { + /* Change the state of rays that hit the background; + * These rays undergo special processing in the + * background_bufferUpdate kernel. + */ + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 2bc2d300699..8e39c9797e5 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_shader_eval(KernelGlobals *kg) { - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - /* Sorting on cuda split is not implemented */ + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + /* Sorting on cuda split is not implemented */ #ifdef __KERNEL_CUDA__ - int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; #else - int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS]; + int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS]; #endif - if(ray_index >= queue_index) { - return; - } - ray_index = get_ray_index(kg, ray_index, + if (ray_index >= queue_index) { + return; + } + ray_index = get_ray_index(kg, + ray_index, #ifdef __KERNEL_CUDA__ - QUEUE_ACTIVE_AND_REGENERATED_RAYS, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, #else - QUEUE_SHADER_SORTED_RAYS, + QUEUE_SHADER_SORTED_RAYS, #endif - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ccl_global char *ray_state = kernel_split_state.ray_state; - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global char *ray_state = kernel_split_state.ray_state; + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag); + shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag); #ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) { - shader_merge_closures(kernel_split_sd(sd, ray_index)); - } - else + if (kernel_data.integrator.branched) { + shader_merge_closures(kernel_split_sd(sd, ray_index)); + } + else #endif - { - shader_prepare_closures(kernel_split_sd(sd, ray_index), state); - } - } + { + shader_prepare_closures(kernel_split_sd(sd, ray_index), state); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h index ea3ec2ec83f..da332db2c98 100644 --- a/intern/cycles/kernel/split/kernel_shader_setup.h +++ b/intern/cycles/kernel/split/kernel_shader_setup.h @@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_shader_setup(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; - if(ray_index >= queue_index) { - return; - } - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + if (ray_index >= queue_index) { + return; + } + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : + 0; + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); - /* Continue on with shader evaluation. */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - Intersection isect = kernel_split_state.isect[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + /* Continue on with shader evaluation. */ + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + Intersection isect = kernel_split_state.isect[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - shader_setup_from_ray(kg, - sd, - &isect, - &ray); + shader_setup_from_ray(kg, sd, &isect, &ray); #ifdef __VOLUME__ - if(sd->flag & SD_HAS_ONLY_VOLUME) { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME); - } + if (sd->flag & SD_HAS_ONLY_VOLUME) { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME); + } #endif - } - + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h index 666355de334..95d33a42014 100644 --- a/intern/cycles/kernel/split/kernel_shader_sort.h +++ b/intern/cycles/kernel/split/kernel_shader_sort.h @@ -16,82 +16,82 @@ CCL_NAMESPACE_BEGIN - -ccl_device void kernel_shader_sort(KernelGlobals *kg, - ccl_local_param ShaderSortLocals *locals) +ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals) { #ifndef __KERNEL_CUDA__ - int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; - if(tid == 0) { - kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize; - } + int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + if (tid == 0) { + kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize; + } - uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE; - if(offset >= qsize) { - return; - } + uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE; + if (offset >= qsize) { + return; + } - int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size); - uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size); - ccl_local uint *local_value = &locals->local_value[0]; - ccl_local ushort *local_index = &locals->local_index[0]; + int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size); + uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size); + ccl_local uint *local_value = &locals->local_value[0]; + ccl_local ushort *local_index = &locals->local_index[0]; - /* copy to local memory */ - for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { - uint idx = offset + i + lid; - uint add = input + idx; - uint value = (~0); - if(idx < qsize) { - int ray_index = kernel_split_state.queue_data[add]; - bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); - if(valid) { - value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK; - } - } - local_value[i + lid] = value; - local_index[i + lid] = i + lid; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* copy to local memory */ + for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + uint idx = offset + i + lid; + uint add = input + idx; + uint value = (~0); + if (idx < qsize) { + int ray_index = kernel_split_state.queue_data[add]; + bool valid = (ray_index != QUEUE_EMPTY_SLOT) && + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + if (valid) { + value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK; + } + } + local_value[i + lid] = value; + local_index[i + lid] = i + lid; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - /* skip sorting for cpu split kernel */ + /* skip sorting for cpu split kernel */ # ifdef __KERNEL_OPENCL__ - /* bitonic sort */ - for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { - for(uint inc = length; inc > 0; inc >>= 1) { - for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { - uint i = lid + ii; - bool direction = ((i & (length << 1)) != 0); - uint j = i ^ inc; - ushort ioff = local_index[i]; - ushort joff = local_index[j]; - uint iKey = local_value[ioff]; - uint jKey = local_value[joff]; - bool smaller = (jKey < iKey) || (jKey == iKey && j < i); - bool swap = smaller ^ (j < i) ^ direction; - ccl_barrier(CCL_LOCAL_MEM_FENCE); - local_index[i] = (swap) ? joff : ioff; - local_index[j] = (swap) ? ioff : joff; - ccl_barrier(CCL_LOCAL_MEM_FENCE); - } - } - } -# endif /* __KERNEL_OPENCL__ */ + /* bitonic sort */ + for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { + for (uint inc = length; inc > 0; inc >>= 1) { + for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { + uint i = lid + ii; + bool direction = ((i & (length << 1)) != 0); + uint j = i ^ inc; + ushort ioff = local_index[i]; + ushort joff = local_index[j]; + uint iKey = local_value[ioff]; + uint jKey = local_value[joff]; + bool smaller = (jKey < iKey) || (jKey == iKey && j < i); + bool swap = smaller ^ (j < i) ^ direction; + ccl_barrier(CCL_LOCAL_MEM_FENCE); + local_index[i] = (swap) ? joff : ioff; + local_index[j] = (swap) ? ioff : joff; + ccl_barrier(CCL_LOCAL_MEM_FENCE); + } + } + } +# endif /* __KERNEL_OPENCL__ */ - /* copy to destination */ - for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { - uint idx = offset + i + lid; - uint lidx = local_index[i + lid]; - uint outi = output + idx; - uint ini = input + offset + lidx; - uint value = local_value[lidx]; - if(idx < qsize) { - kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini]; - } - } -#endif /* __KERNEL_CUDA__ */ + /* copy to destination */ + for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + uint idx = offset + i + lid; + uint lidx = local_index[i + lid]; + uint outi = output + idx; + uint ini = input + offset + lidx; + uint value = local_value[lidx]; + if (idx < qsize) { + kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : + kernel_split_state.queue_data[ini]; + } + } +#endif /* __KERNEL_CUDA__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index fb08112503a..5d772fc597b 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for AO. */ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) { - unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - } + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index < ao_queue_length) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; #ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + if (!kernel_data.integrator.branched || + IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif - kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); + kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); #ifdef __BRANCHED_PATH__ - } - else { - kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); - } + } + else { + kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); + } #endif } diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index da072fd5f1a..82990ce9fae 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for direct visible light. */ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) { - unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < dl_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - } + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index < dl_queue_length) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } #ifdef __BRANCHED_PATH__ - /* TODO(mai): move this somewhere else? */ - if(thread_index == 0) { - /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ - kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; - } -#endif /* __BRANCHED_PATH__ */ + /* TODO(mai): move this somewhere else? */ + if (thread_index == 0) { + /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ + kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; + } +#endif /* __BRANCHED_PATH__ */ - if(ray_index == QUEUE_EMPTY_SLOT) - return; + if (ray_index == QUEUE_EMPTY_SLOT) + return; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - Ray ray = kernel_split_state.light_ray[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - float3 throughput = kernel_split_state.throughput[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.light_ray[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + float3 throughput = kernel_split_state.throughput[ray_index]; - BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - bool is_lamp = kernel_split_state.is_lamp[ray_index]; + BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + bool is_lamp = kernel_split_state.is_lamp[ray_index]; -# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) - bool use_branched = false; - int all = 0; +#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) + bool use_branched = false; + int all = 0; - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - use_branched = true; - all = 1; - } -# if defined(__BRANCHED_PATH__) - else if(kernel_data.integrator.branched) { - use_branched = true; + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + use_branched = true; + all = 1; + } +# if defined(__BRANCHED_PATH__) + else if (kernel_data.integrator.branched) { + use_branched = true; - if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - all = (kernel_data.integrator.sample_all_lights_indirect); - } - else - { - all = (kernel_data.integrator.sample_all_lights_direct); - } - } -# endif /* __BRANCHED_PATH__ */ + if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + all = (kernel_data.integrator.sample_all_lights_indirect); + } + else { + all = (kernel_data.integrator.sample_all_lights_direct); + } + } +# endif /* __BRANCHED_PATH__ */ - if(use_branched) { - kernel_branched_path_surface_connect_light(kg, - sd, - emission_sd, - state, - throughput, - 1.0f, - L, - all); - } - else -# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/ - { - /* trace shadow ray */ - float3 shadow; + if (use_branched) { + kernel_branched_path_surface_connect_light( + kg, sd, emission_sd, state, throughput, 1.0f, L, all); + } + else +#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/ + { + /* trace shadow ray */ + float3 shadow; - if(!shadow_blocked(kg, - sd, - emission_sd, - state, - &ray, - &shadow)) - { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput, &L_light); - } - } + if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput, &L_light); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 4b86696691a..384bc952460 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __KERNEL_SPLIT_H__ -#define __KERNEL_SPLIT_H__ +#ifndef __KERNEL_SPLIT_H__ +#define __KERNEL_SPLIT_H__ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" @@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; #ifdef __BRANCHED_PATH__ # ifdef __SUBSURFACE__ - ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - - if(ss_indirect->num_rays) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - else -# endif /* __SUBSURFACE__ */ - if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { - int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; - - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; - - path_radiance_sum_indirect(L); - path_radiance_accum_sample(orig_ray_L, L); - - atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count); - - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER); - } - else { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + + if (ss_indirect->num_rays) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + else +# endif /* __SUBSURFACE__ */ + if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { + int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; + + path_radiance_sum_indirect(L); + path_radiance_accum_sample(orig_ray_L, L); + + atomic_fetch_and_dec_uint32( + (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER); + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } #else - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); #endif } CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_H__ */ +#endif /* __KERNEL_SPLIT_H__ */ diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 3f6b3977d79..433b1221a37 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements) { - (void) kg; /* Unused on CPU. */ + (void)kg; /* Unused on CPU. */ - uint64_t size = 0; -#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16) - size = size SPLIT_DATA_ENTRIES; + uint64_t size = 0; +#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16) + size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY - uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1); #ifdef __BRANCHED_PATH__ - size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); #endif - size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); - return size; + return size; } ccl_device_inline void split_data_init(KernelGlobals *kg, @@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg, ccl_global void *data, ccl_global char *ray_state) { - (void) kg; /* Unused on CPU. */ + (void)kg; /* Unused on CPU. */ - ccl_global char *p = (ccl_global char*)data; + ccl_global char *p = (ccl_global char *)data; #define SPLIT_DATA_ENTRY(type, name, num) \ - split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16); - SPLIT_DATA_ENTRIES; + split_data->name = (type *)p; \ + p += align_up(num_elements * num * sizeof(type), 16); + SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY - uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1); #ifdef __BRANCHED_PATH__ - split_data->_branched_state_sd = (ShaderData*)p; - p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + split_data->_branched_state_sd = (ShaderData *)p; + p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); #endif - split_data->_sd = (ShaderData*)p; - p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + split_data->_sd = (ShaderData *)p; + p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); - split_data->ray_state = ray_state; + split_data->ray_state = ray_state; } CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_DATA_H__ */ +#endif /* __KERNEL_SPLIT_DATA_H__ */ diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 83df1e2a0a6..6ff3f5bdb55 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ typedef struct SplitParams { - WorkTile tile; - uint total_work_size; + WorkTile tile; + uint total_work_size; - ccl_global unsigned int *work_pools; + ccl_global unsigned int *work_pools; - ccl_global int *queue_index; - int queue_size; - ccl_global char *use_queues_flag; + ccl_global int *queue_index; + int queue_size; + ccl_global char *use_queues_flag; - /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ - int dummy_sd_flag; + /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ + int dummy_sd_flag; } SplitParams; /* Global memory variables [porting]; These memory is used for @@ -46,98 +46,98 @@ typedef struct SplitParams { #ifdef __BRANCHED_PATH__ typedef ccl_global struct SplitBranchedState { - /* various state that must be kept and restored after an indirect loop */ - PathState path_state; - float3 throughput; - Ray ray; + /* various state that must be kept and restored after an indirect loop */ + PathState path_state; + float3 throughput; + Ray ray; - Intersection isect; + Intersection isect; - char ray_state; + char ray_state; - /* indirect loop state */ - int next_closure; - int next_sample; + /* indirect loop state */ + int next_closure; + int next_sample; -#ifdef __SUBSURFACE__ - int ss_next_closure; - int ss_next_sample; - int next_hit; - int num_hits; - - uint lcg_state; - LocalIntersection ss_isect; -#endif /*__SUBSURFACE__ */ - - int shared_sample_count; /* number of branched samples shared with other threads */ - int original_ray; /* index of original ray when sharing branched samples */ - bool waiting_on_shared_samples; +# ifdef __SUBSURFACE__ + int ss_next_closure; + int ss_next_sample; + int next_hit; + int num_hits; + + uint lcg_state; + LocalIntersection ss_isect; +# endif /*__SUBSURFACE__ */ + + int shared_sample_count; /* number of branched samples shared with other threads */ + int original_ray; /* index of original ray when sharing branched samples */ + bool waiting_on_shared_samples; } SplitBranchedState; -#define SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \ - SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0) +# define SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \ + SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0) #else -#define SPLIT_DATA_BRANCHED_ENTRIES -#endif /* __BRANCHED_PATH__ */ +# define SPLIT_DATA_BRANCHED_ENTRIES +#endif /* __BRANCHED_PATH__ */ #ifdef __SUBSURFACE__ # define SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1) + SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1) #else # define SPLIT_DATA_SUBSURFACE_ENTRIES -#endif /* __SUBSURFACE__ */ +#endif /* __SUBSURFACE__ */ #ifdef __VOLUME__ -# define SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1) +# define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1) #else # define SPLIT_DATA_VOLUME_ENTRIES -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ #define SPLIT_DATA_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ - SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ - SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ - SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ - SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY(ShaderData, _sd, 0) + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY( \ + ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \ + SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ - SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ - SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ - SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY(ShaderData, _sd, 0) + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { #define SPLIT_DATA_ENTRY(type, name, num) type *name; - SPLIT_DATA_ENTRIES + SPLIT_DATA_ENTRIES #undef SPLIT_DATA_ENTRY - /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from - * the host easily) but is still used the same as the other data so we have it here in this struct as well - */ - ccl_global char *ray_state; + /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from + * the host easily) but is still used the same as the other data so we have it here in this struct as well + */ + ccl_global char *ray_state; } SplitData; #ifndef __KERNEL_CUDA__ @@ -148,30 +148,30 @@ __device__ SplitData __split_data; # define kernel_split_state (__split_data) __device__ SplitParams __split_param_data; # define kernel_split_params (__split_param_data) -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_CUDA__ */ -#define kernel_split_sd(sd, ray_index) ((ShaderData*) \ - ( \ - ((ccl_global char*)kernel_split_state._##sd) + \ - (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \ - )) +#define kernel_split_sd(sd, ray_index) \ + ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \ + (sizeof(ShaderData) + \ + sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \ + (ray_index))) /* Local storage for queue_enqueue kernel. */ typedef struct QueueEnqueueLocals { - uint queue_atomics[2]; + uint queue_atomics[2]; } QueueEnqueueLocals; /* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */ typedef struct BackgroundAOLocals { - uint queue_atomics_bg; - uint queue_atomics_ao; + uint queue_atomics_bg; + uint queue_atomics_ao; } BackgroundAOLocals; typedef struct ShaderSortLocals { - uint local_value[SHADER_SORT_BLOCK_SIZE]; - ushort local_index[SHADER_SORT_BLOCK_SIZE]; + uint local_value[SHADER_SORT_BLOCK_SIZE]; + ushort local_index[SHADER_SORT_BLOCK_SIZE]; } ShaderSortLocals; CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ +#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 08769fe303b..ba06ae3bc53 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN #if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__) -ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, + int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - branched_state->ss_next_closure = 0; - branched_state->ss_next_sample = 0; + branched_state->ss_next_closure = 0; + branched_state->ss_next_sample = 0; - branched_state->num_hits = 0; - branched_state->next_hit = 0; + branched_state->num_hits = 0; + branched_state->next_hit = 0; - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT); } -ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index) +ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter( + KernelGlobals *kg, int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - - ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSSRDF(sc->type)) - continue; - - /* Closure memory will be overwritten, so read required variables now. */ - Bssrdf *bssrdf = (Bssrdf *)sc; - ClosureType bssrdf_type = sc->type; - float bssrdf_roughness = bssrdf->roughness; - - /* set up random number generator */ - if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 && - branched_state->next_closure == 0 && branched_state->next_sample == 0) - { - branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state, - 0x68bc21eb); - } - int num_samples = kernel_data.integrator.subsurface_samples * 3; - float num_samples_inv = 1.0f/num_samples; - uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); - - /* do subsurface scatter step with copy of shader data, this will - * replace the BSSRDF with a diffuse BSDF closure */ - for(int j = branched_state->ss_next_sample; j < num_samples; j++) { - ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index]; - *hit_state = branched_state->path_state; - hit_state->rng_hash = bssrdf_rng_hash; - path_state_branch(hit_state, j, num_samples); - - ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect; - float bssrdf_u, bssrdf_v; - path_branched_rng_2D(kg, - bssrdf_rng_hash, - hit_state, - j, - num_samples, - PRNG_BSDF_U, - &bssrdf_u, - &bssrdf_v); - - /* intersection is expensive so avoid doing multiple times for the same input */ - if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) { - uint lcg_state = branched_state->lcg_state; - LocalIntersection ss_isect_private; - - branched_state->num_hits = subsurface_scatter_multi_intersect(kg, - &ss_isect_private, - sd, - hit_state, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - true); - - branched_state->lcg_state = lcg_state; - *ss_isect = ss_isect_private; - } - - hit_state->rng_offset += PRNG_BOUNCE_NUM; - -#ifdef __VOLUME__ - Ray volume_ray = branched_state->ray; - bool need_update_volume_stack = - kernel_data.integrator.use_volumes && - sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; -#endif /* __VOLUME__ */ - - /* compute lighting with the BSDF closure */ - for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) { - ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index); - *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is - * important as the indirect path will write into bssrdf_sd */ - - LocalIntersection ss_isect_private = *ss_isect; - subsurface_scatter_multi_setup(kg, - &ss_isect_private, - hit, - bssrdf_sd, - hit_state, - bssrdf_type, - bssrdf_roughness); - *ss_isect = ss_isect_private; - -#ifdef __VOLUME__ - if(need_update_volume_stack) { - /* Setup ray from previous surface point to the new one. */ - float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng); - volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); - - for(int k = 0; k < VOLUME_STACK_SIZE; k++) { - hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k]; - } - - kernel_volume_stack_update_for_subsurface(kg, - emission_sd, - &volume_ray, - hit_state->volume_stack); - } -#endif /* __VOLUME__ */ - -#ifdef __EMISSION__ - if(branched_state->next_closure == 0 && branched_state->next_sample == 0) { - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_direct) || - (hit_state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light(kg, - bssrdf_sd, - emission_sd, - hit_state, - branched_state->throughput, - num_samples_inv, - L, - all); - } - } -#endif /* __EMISSION__ */ - - /* indirect light */ - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - num_samples_inv, - bssrdf_sd, - false, - false)) - { - branched_state->ss_next_closure = i; - branched_state->ss_next_sample = j; - branched_state->next_hit = hit; - - return true; - } - - branched_state->next_closure = 0; - } - - branched_state->next_hit = 0; - } - - branched_state->ss_next_sample = 0; - } - - branched_state->ss_next_closure = sd->num_closure; - - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } - - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - - return false; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + + ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSSRDF(sc->type)) + continue; + + /* Closure memory will be overwritten, so read required variables now. */ + Bssrdf *bssrdf = (Bssrdf *)sc; + ClosureType bssrdf_type = sc->type; + float bssrdf_roughness = bssrdf->roughness; + + /* set up random number generator */ + if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 && + branched_state->next_closure == 0 && branched_state->next_sample == 0) { + branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state, + 0x68bc21eb); + } + int num_samples = kernel_data.integrator.subsurface_samples * 3; + float num_samples_inv = 1.0f / num_samples; + uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); + + /* do subsurface scatter step with copy of shader data, this will + * replace the BSSRDF with a diffuse BSDF closure */ + for (int j = branched_state->ss_next_sample; j < num_samples; j++) { + ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index]; + *hit_state = branched_state->path_state; + hit_state->rng_hash = bssrdf_rng_hash; + path_state_branch(hit_state, j, num_samples); + + ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect; + float bssrdf_u, bssrdf_v; + path_branched_rng_2D( + kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + + /* intersection is expensive so avoid doing multiple times for the same input */ + if (branched_state->next_hit == 0 && branched_state->next_closure == 0 && + branched_state->next_sample == 0) { + uint lcg_state = branched_state->lcg_state; + LocalIntersection ss_isect_private; + + branched_state->num_hits = subsurface_scatter_multi_intersect( + kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true); + + branched_state->lcg_state = lcg_state; + *ss_isect = ss_isect_private; + } + + hit_state->rng_offset += PRNG_BOUNCE_NUM; + +# ifdef __VOLUME__ + Ray volume_ray = branched_state->ray; + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; +# endif /* __VOLUME__ */ + + /* compute lighting with the BSDF closure */ + for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) { + ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index); + *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is + * important as the indirect path will write into bssrdf_sd */ + + LocalIntersection ss_isect_private = *ss_isect; + subsurface_scatter_multi_setup( + kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness); + *ss_isect = ss_isect_private; + +# ifdef __VOLUME__ + if (need_update_volume_stack) { + /* Setup ray from previous surface point to the new one. */ + float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng); + volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); + + for (int k = 0; k < VOLUME_STACK_SIZE; k++) { + hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k]; + } + + kernel_volume_stack_update_for_subsurface( + kg, emission_sd, &volume_ray, hit_state->volume_stack); + } +# endif /* __VOLUME__ */ + +# ifdef __EMISSION__ + if (branched_state->next_closure == 0 && branched_state->next_sample == 0) { + /* direct light */ + if (kernel_data.integrator.use_direct_light) { + int all = (kernel_data.integrator.sample_all_lights_direct) || + (hit_state->flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light(kg, + bssrdf_sd, + emission_sd, + hit_state, + branched_state->throughput, + num_samples_inv, + L, + all); + } + } +# endif /* __EMISSION__ */ + + /* indirect light */ + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) { + branched_state->ss_next_closure = i; + branched_state->ss_next_sample = j; + branched_state->next_hit = hit; + + return true; + } + + branched_state->next_closure = 0; + } + + branched_state->next_hit = 0; + } + + branched_state->ss_next_sample = 0; + } + + branched_state->ss_next_closure = sd->num_closure; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } + + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + + return false; } -#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */ +#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) { - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index == 0) { - /* We will empty both queues in this kernel. */ - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __SUBSURFACE__ - ccl_global char *ray_state = kernel_split_state.ray_state; - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - if(sd->flag & SD_BSSRDF) { - -#ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || - IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) - { -#endif - if(kernel_path_subsurface_scatter(kg, - sd, - emission_sd, - L, - state, - ray, - throughput, - ss_indirect)) - { - kernel_split_path_end(kg, ray_index); - } -#ifdef __BRANCHED_PATH__ - } - else { - kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -#endif - } - } + ccl_global char *ray_state = kernel_split_state.ray_state; + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + if (sd->flag & SD_BSSRDF) { # ifdef __BRANCHED_PATH__ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0; - } - - /* iter loop */ - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_SUBSURFACE_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); - path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); - - if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - -#endif /* __SUBSURFACE__ */ + if (!kernel_data.integrator.branched || + IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +# endif + if (kernel_path_subsurface_scatter( + kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) { + kernel_split_path_end(kg, ray_index); + } +# ifdef __BRANCHED_PATH__ + } + else { + kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif + } + } +# ifdef __BRANCHED_PATH__ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0; + } + + /* iter loop */ + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_SUBSURFACE_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); + path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); + + if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + +#endif /* __SUBSURFACE__ */ } CCL_NAMESPACE_END |