Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/split/kernel_do_volume.h')
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h190
1 files changed, 160 insertions, 30 deletions
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 47d3c280831..9f8dd2392d9 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -16,6 +16,100 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
+
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ Ray volume_ray = branched_state->ray;
+ volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+
+ for(int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ *ps = branched_state->path_state;
+
+ ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+ *pray = branched_state->ray;
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = branched_state->throughput * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(ps, j, num_samples);
+
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+ /* indirect light bounce */
+ if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) {
+ continue;
+ }
+
+ /* start the indirect path */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = j+1;
+ branched_state->num_samples = num_samples;
+
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if(branched_state->shared_sample_count < 2) {
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
+ return true;
+ }
+# endif
+ }
+
+ branched_state->next_sample = num_samples;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+ kernel_split_state.throughput[ray_index] = throughput;
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
@@ -23,37 +117,36 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+# ifdef __BRANCHED_PATH__
+ kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+# endif /* __BRANCHED_PATH__ */
}
- /* Fetch use_queues_flag. */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
+
+ if(*kernel_split_params.use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
}
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
-
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
RNG rng = kernel_split_state.rng[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
/* Sanitize volume stack. */
if(!hit) {
@@ -64,31 +157,68 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+# ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif /* __BRANCHED_PATH__ */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, &rng, sd, sd_input, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
- else
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
}
-# endif
+
+# ifdef __BRANCHED_PATH__
}
+ else {
+ kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
}
+
kernel_split_state.rng[ray_index] = rng;
}
-#endif
+# ifdef __BRANCHED_PATH__
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_VOLUME_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
}