diff options
Diffstat (limited to 'intern/cycles/kernel/split/kernel_do_volume.h')
-rw-r--r-- | intern/cycles/kernel/split/kernel_do_volume.h | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h new file mode 100644 index 00000000000..491487f1230 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -0,0 +1,220 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +#if defined(__BRANCHED_PATH__) && defined(__VOLUME__) + +ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index) +{ + kernel_split_branched_path_indirect_loop_init(kg, ray_index); + + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT); +} + +ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index) +{ + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + + ShaderData *sd = &kernel_split_state.sd[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + Ray volume_ray = branched_state->ray; + volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); + + for(int j = branched_state->next_sample; j < num_samples; j++) { + ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; + *ps = branched_state->path_state; + + ccl_global Ray *pray = &kernel_split_state.ray[ray_index]; + *pray = branched_state->ray; + + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; + *tp = branched_state->throughput * num_samples_inv; + + /* branch RNG state */ + path_state_branch(ps, j, num_samples); + + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, ps, sd, &volume_ray, L, tp, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); + + /* indirect light bounce */ + if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { + continue; + } + + /* start the indirect path */ + branched_state->next_closure = 0; + branched_state->next_sample = j+1; + + /* Attempting to share too many samples is slow for volumes as it causes us to + * loop here more and have many calls to kernel_volume_integrate which evaluates + * shaders. The many expensive shader evaluations cause the work load to become + * unbalanced and many threads to become idle in this kernel. Limiting the + * number of shared samples here helps quite a lot. + */ + if(branched_state->shared_sample_count < 2) { + if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + } + + return true; + } +# endif + } + + branched_state->next_sample = num_samples; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if(branched_state->waiting_on_shared_samples) { + return true; + } + + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + + /* todo: avoid this calculation using decoupled ray marching */ + float3 throughput = kernel_split_state.throughput[ray_index]; + kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput); + kernel_split_state.throughput[ray_index] = throughput; + + return false; +} + +#endif /* __BRANCHED_PATH__ && __VOLUME__ */ + +ccl_device void kernel_do_volume(KernelGlobals *kg) +{ +#ifdef __VOLUME__ + /* We will empty this queue in this kernel. */ + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; +# ifdef __BRANCHED_PATH__ + kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0; +# endif /* __BRANCHED_PATH__ */ + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + if(*kernel_split_params.use_queues_flag) { + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } + + ccl_global char *ray_state = kernel_split_state.ray_state; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + + bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); + + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + /* volume attenuation, emission, scatter */ + if(state->volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = *ray; + volume_ray.t = (hit)? isect->t: FLT_MAX; + +# ifdef __BRANCHED_PATH__ + if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +# endif /* __BRANCHED_PATH__ */ + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); + + /* indirect light bounce */ + if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_path_end(kg, ray_index); + } + } +# endif /* __VOLUME_SCATTER__ */ + } + +# ifdef __BRANCHED_PATH__ + } + else { + kernel_split_branched_path_volume_indirect_light_init(kg, ray_index); + + if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + } + } + +# ifdef __BRANCHED_PATH__ + /* iter loop */ + ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_VOLUME_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); + path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); + + if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + +#endif /* __VOLUME__ */ +} + + +CCL_NAMESPACE_END |