Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/blender/addon/ui.py3
-rw-r--r--intern/cycles/device/device_split_kernel.cpp1
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernel_path.h6
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h5
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h2
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h4
-rw-r--r--intern/cycles/kernel/kernel_types.h59
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h2
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl3
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h150
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h19
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h66
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h180
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h105
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h6
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h37
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h220
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h25
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h42
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h79
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h61
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h45
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h309
25 files changed, 1043 insertions, 389 deletions
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 19ab9053e9a..ab1fe8924bc 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -78,7 +78,7 @@ def use_cuda(context):
def use_branched_path(context):
cscene = context.scene.cycles
- return (cscene.progressive == 'BRANCHED_PATH' and not use_opencl(context))
+ return (cscene.progressive == 'BRANCHED_PATH')
def use_sample_all_lights(context):
@@ -156,7 +156,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
row = layout.row()
sub = row.row()
- sub.active = get_device_type(context) != 'OPENCL' or use_cpu(context)
sub.prop(cscene, "progressive", text="")
row.prop(cscene, "use_square_samples")
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 71d52bb8097..abd26f5be49 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -240,6 +240,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 3750225571d..a92e8bc4aee 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -235,6 +235,7 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
+ split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index e7957042182..58da141aed3 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -58,7 +58,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
- PathState *state,
+ ccl_addr_space PathState *state,
RNG *rng,
float3 throughput,
float3 ao_alpha)
@@ -98,6 +98,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
}
}
+#ifndef __SPLIT_KERNEL__
+
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -818,5 +820,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
path_rng_end(kg, rng_state, rng);
}
+#endif /* __SPLIT_KERNEL__ */
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 085eb42325d..ddcb57161ea 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -22,7 +22,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
- PathState *state,
+ ccl_addr_space PathState *state,
RNG *rng,
float3 throughput)
{
@@ -65,6 +65,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
}
}
+#ifndef __SPLIT_KERNEL__
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
@@ -648,6 +649,8 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
path_rng_end(kg, rng_state, rng);
}
+#endif /* __SPLIT_KERNEL__ */
+
#endif /* __BRANCHED_PATH__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 076c82f3853..bd4ba775b4d 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -155,7 +155,7 @@ ccl_device bool kernel_branched_path_surface_bounce(
ccl_addr_space float3 *throughput,
ccl_addr_space PathState *state,
PathRadiance *L,
- Ray *ray)
+ ccl_addr_space Ray *ray)
{
/* sample BSDF */
float bsdf_pdf;
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index baf629342b9..274713addc2 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -417,9 +417,8 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N);
}
-#ifndef __SPLIT_KERNEL__
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
+ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
@@ -507,7 +506,6 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N);
}
-#endif /* ! __SPLIT_KERNEL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index b1269cdb6b4..6417f621c8f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -71,22 +71,18 @@ CCL_NAMESPACE_BEGIN
# endif
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# ifndef __SPLIT_KERNEL__
-# define __BRANCHED_PATH__
-# endif
+# define __BRANCHED_PATH__
# ifdef WITH_OSL
# define __OSL__
# endif
-# define __SUBSURFACE__
# define __PRINCIPLED__
+# define __SUBSURFACE__
# define __CMJ__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
-# ifndef __SPLIT_KERNEL__
-# define __VOLUME_DECOUPLED__
-# define __VOLUME_RECORD_ALL__
-# endif
+# define __VOLUME_DECOUPLED__
+# define __VOLUME_RECORD_ALL__
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
@@ -138,6 +134,7 @@ CCL_NAMESPACE_BEGIN
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# define __CMJ__
+# define __BRANCHED_PATH__
# endif /* __KERNEL_OPENCL_AMD__ */
# ifdef __KERNEL_OPENCL_INTEL_CPU__
@@ -1300,7 +1297,6 @@ typedef ccl_addr_space struct DebugData {
* Queue 3 - Shadow ray cast kernel - AO
* Queeu 4 - Shadow ray cast kernel - direct lighting
*/
-#define NUM_QUEUES 4
/* Queue names */
enum QueueNumber {
@@ -1313,22 +1309,37 @@ enum QueueNumber {
* 3. Rays to be regenerated
* are enqueued here.
*/
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contribution for AO are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contributing for direct lighting are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+#ifdef __BRANCHED_PATH__
+ /* All rays moving to next iteration of the indirect loop for light */
+ QUEUE_LIGHT_INDIRECT_ITER,
+# ifdef __VOLUME__
+ /* All rays moving to next iteration of the indirect loop for volumes */
+ QUEUE_VOLUME_INDIRECT_ITER,
+# endif
+# ifdef __SUBSURFACE__
+ /* All rays moving to next iteration of the indirect loop for subsurface */
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+# endif
+#endif /* __BRANCHED_PATH__ */
+
+ NUM_QUEUES
};
-/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
-#define RAY_STATE_MASK 0x007
-#define RAY_FLAG_MASK 0x0F8
+/* We use RAY_STATE_MASK to get ray_state */
+#define RAY_STATE_MASK 0x0F
+#define RAY_FLAG_MASK 0xF0
enum RayState {
RAY_INVALID = 0,
/* Denotes ray is actively involved in path-iteration. */
@@ -1343,14 +1354,22 @@ enum RayState {
RAY_TO_REGENERATE,
/* Denotes ray has been regenerated */
RAY_REGENERATED,
- /* Flag's ray has to execute shadow blocked function in AO part */
- RAY_SHADOW_RAY_CAST_AO = 16,
- /* Flag's ray has to execute shadow blocked function in direct lighting part. */
- RAY_SHADOW_RAY_CAST_DL = 32,
+ /* Denotes ray is moving to next iteration of the branched indirect loop */
+ RAY_LIGHT_INDIRECT_NEXT_ITER,
+ RAY_VOLUME_INDIRECT_NEXT_ITER,
+ RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+ /* Ray flags */
+
+ /* Flags to denote that the ray is currently evaluating the branched indirect loop */
+ RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+ RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+ RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+ RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 148b2eef568..96f54bb427e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -183,7 +183,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index a679eff8409..585b91876a9 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -110,7 +110,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 7a1838e485f..99b74a1802b 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter(
ccl_global char *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
+ kernel_subsurface_scatter((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
new file mode 100644
index 00000000000..c7bc1b4df0a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+/* sets up the various state needed to do an indirect loop */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ /* save a copy of the state to restore later */
+#define BRANCHED_STORE(name) \
+ branched_state->name = kernel_split_state.name[ray_index];
+
+ BRANCHED_STORE(path_state);
+ BRANCHED_STORE(throughput);
+ BRANCHED_STORE(ray);
+ BRANCHED_STORE(sd);
+ BRANCHED_STORE(isect);
+ BRANCHED_STORE(ray_state);
+
+#undef BRANCHED_STORE
+
+ /* set loop counters to intial position */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = 0;
+}
+
+/* ends an indirect loop and restores the previous state */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ /* restore state */
+#define BRANCHED_RESTORE(name) \
+ kernel_split_state.name[ray_index] = branched_state->name;
+
+ BRANCHED_RESTORE(path_state);
+ BRANCHED_RESTORE(throughput);
+ BRANCHED_RESTORE(ray);
+ BRANCHED_RESTORE(sd);
+ BRANCHED_RESTORE(isect);
+ BRANCHED_RESTORE(ray_state);
+
+#undef BRANCHED_RESTORE
+
+ /* leave indirect loop */
+ REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+}
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
+ int ray_index,
+ float num_samples_adjust,
+ ShaderData *saved_sd,
+ bool reset_path_state)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = saved_sd;
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ float3 throughput = branched_state->throughput;
+
+ for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if(!CLOSURE_IS_BSDF(sc->type))
+ continue;
+ /* transparency is not handled here, but in outer loop */
+ if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+ continue;
+
+ int num_samples;
+
+ if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust*num_samples);
+
+ float num_samples_inv = num_samples_adjust/num_samples;
+ RNG bsdf_rng = cmj_hash(rng, i);
+
+ for(int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ if(reset_path_state) {
+ *ps = branched_state->path_state;
+ }
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = throughput;
+
+ ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+ if(!kernel_branched_path_surface_bounce(kg,
+ &bsdf_rng,
+ sd,
+ sc,
+ j,
+ num_samples,
+ tp,
+ ps,
+ L,
+ bsdf_ray))
+ {
+ continue;
+ }
+
+ /* update state for next iteration */
+ branched_state->next_closure = i;
+ branched_state->next_sample = j+1;
+ branched_state->num_samples = num_samples;
+
+ /* start the indirect path */
+ *tp *= num_samples_inv;
+
+ return true;
+ }
+
+ branched_state->next_sample = 0;
+ }
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 9d3d01fff75..642ccac8239 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -105,21 +105,16 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
/* Initialize queue data and queue index. */
if(thread_index < queuesize) {
- /* Initialize active ray queue. */
- kernel_split_state.queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize background and buffer update queue. */
- kernel_split_state.queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of AO queue. */
- kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of direct lighting queue. */
- kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
}
if(thread_index == 0) {
- Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ Queue_index[i] = 0;
+ }
+
/* The scene-intersect kernel should not use the queues very first time.
* since the queue would be empty.
*/
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index bdbf7387b95..3336c968a44 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -56,23 +56,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_params.queue_size,
0);
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
@@ -80,25 +63,24 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
/* direct lighting */
#ifdef __EMISSION__
RNG rng = kernel_split_state.rng[ray_index];
+
bool flag = (kernel_data.integrator.use_direct_light &&
(sd->flag & SD_BSDF_HAS_EVAL));
+
+# ifdef __BRANCHED_PATH__
+ if(flag && kernel_data.integrator.branched) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __BRANCHED_PATH__ */
+
# ifdef __SHADOW_TRICKS__
if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
flag = false;
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- kernel_branched_path_surface_connect_light(kg,
- &rng,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- 1);
+ enqueue_flag = 1;
}
# endif /* __SHADOW_TRICKS__ */
+
if(flag) {
/* Sample illumination from lights to find path contribution. */
float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT);
@@ -129,7 +111,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_state.bsdf_eval[ray_index] = L_light;
kernel_split_state.is_lamp[ray_index] = is_lamp;
/* Mark ray state for next shadow kernel. */
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
enqueue_flag = 1;
}
}
@@ -138,10 +119,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
#endif /* __EMISSION__ */
}
-#ifndef __COMPUTE_DEVICE_GPU__
- }
-#endif
-
#ifdef __EMISSION__
/* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
enqueue_ray_index_local(ray_index,
@@ -152,6 +129,27 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
#endif
+
+#ifdef __BRANCHED_PATH__
+ /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+ * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+ */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(ray_index,
+ QUEUE_LIGHT_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 47d3c280831..182e6c6e4fa 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -16,6 +16,81 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
+
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ Ray volume_ray = branched_state->ray;
+ volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+
+ for(int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ *ps = branched_state->path_state;
+
+ ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+ *pray = branched_state->ray;
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = branched_state->throughput * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(ps, j, num_samples);
+
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+ /* indirect light bounce */
+ if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) {
+ continue;
+ }
+
+ /* start the indirect path */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = j+1;
+ branched_state->num_samples = num_samples;
+
+ return true;
+ }
+# endif
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+ kernel_split_state.throughput[ray_index] = throughput;
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
@@ -23,37 +98,37 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+# ifdef __BRANCHED_PATH__
+ kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+# endif /* __BRANCHED_PATH__ */
}
- /* Fetch use_queues_flag. */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
+
+ if(*kernel_split_params.use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
}
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+
+ bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
/* Sanitize volume stack. */
if(!hit) {
@@ -64,31 +139,68 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+# ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif /* __BRANCHED_PATH__ */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, &rng, sd, sd_input, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
- else
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+# ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
-# endif
}
+# endif /* __BRANCHED_PATH__ */
}
+
kernel_split_state.rng[ray_index] = rng;
}
-#endif
+# ifdef __BRANCHED_PATH__
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_VOLUME_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
}
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 89adeb64c8a..87498910d38 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -52,6 +52,7 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
* flag RAY_SHADOW_RAY_CAST_AO
*/
+
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
KernelGlobals *kg,
ccl_local_param BackgroundAOLocals *locals)
@@ -62,8 +63,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
+#ifdef __AO__
char enqueue_flag = 0;
- char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
+#endif
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -155,8 +157,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
}
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
}
#endif /* __HOLDOUT__ */
@@ -164,18 +165,31 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- /* Holdout mask objects do not write data passes. */
- kernel_write_data_passes(kg,
- buffer,
- L,
- sd,
- sample,
- state,
- throughput);
+
+#ifdef __BRANCHED_PATH__
+ if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
+#endif /* __BRANCHED_PATH__ */
+ {
+ /* Holdout mask objects do not write data passes. */
+ kernel_write_data_passes(kg,
+ buffer,
+ L,
+ sd,
+ sample,
+ state,
+ throughput);
+ }
+
/* Blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy.
*/
- if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+#ifndef __BRANCHED_PATH__
+ if(kernel_data.integrator.filter_glossy != FLT_MAX)
+#else
+ if(kernel_data.integrator.filter_glossy != FLT_MAX &&
+ (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)))
+#endif /* __BRANCHED_PATH__ */
+ {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
@@ -201,19 +215,32 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate.
*/
+#ifndef __BRANCHED_PATH__
float probability = path_state_terminate_probability(kg, state, throughput);
+#else
+ float probability = 1.0f;
+
+ if(!kernel_data.integrator.branched) {
+ probability = path_state_terminate_probability(kg, state, throughput);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ int num_samples = kernel_split_state.branched_state[ray_index].num_samples;
+ probability = path_state_terminate_probability(kg, state, throughput*num_samples);
+ }
+ else if(state->flag & PATH_RAY_TRANSPARENT) {
+ probability = path_state_terminate_probability(kg, state, throughput);
+ }
+#endif
if(probability == 0.0f) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
if(terminate >= probability) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
else {
kernel_split_state.throughput[ray_index] = throughput/probability;
@@ -225,61 +252,23 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
#ifdef __AO__
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion ||
- (sd->flag & SD_AO))
- {
- /* todo: solve correlation */
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, &rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
-
- float3 ao_D;
- float ao_pdf;
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray _ray;
- _ray.P = ray_offset(sd->P, sd->Ng);
- _ray.D = ao_D;
- _ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- _ray.time = sd->time;
-#endif
- _ray.dP = sd->dP;
- _ray.dD = differential3_zero();
- kernel_split_state.ao_light_ray[ray_index] = _ray;
-
- ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
- enqueue_flag_AO_SHADOW_RAY_CAST = 1;
- }
+ if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
+ enqueue_flag = 1;
}
}
#endif /* __AO__ */
- kernel_split_state.rng[ray_index] = rng;
+ kernel_split_state.rng[ray_index] = rng;
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- &locals->queue_atomics_bg,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
#ifdef __AO__
/* Enqueue to-shadow-ray-cast rays. */
enqueue_ray_index_local(ray_index,
QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- enqueue_flag_AO_SHADOW_RAY_CAST,
+ enqueue_flag,
kernel_split_params.queue_size,
&locals->queue_atomics_ao,
kernel_split_state.queue_data,
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 8192528622e..6fbc888e358 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -34,7 +34,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
if(state->bounce > kernel_data.integrator.ao_bounces) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
}
}
@@ -63,7 +63,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
@@ -72,7 +72,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
path_radiance_accum_background(L, state, (*throughput), L_background);
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
}
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index a56e85abeb9..82bc2f01fd7 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -49,26 +49,29 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- kernel_path_subsurface_accum_indirect(ss_indirect, L);
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+#endif
+ if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ kernel_path_subsurface_accum_indirect(ss_indirect, L);
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect->num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- ss_indirect,
- state,
- ray,
- L,
- throughput);
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if(ss_indirect->num_rays) {
+ kernel_path_subsurface_setup_indirect(kg,
+ ss_indirect,
+ state,
+ ray,
+ L,
+ throughput);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
}
+#ifdef __BRANCHED_PATH__
}
+#endif
#endif /* __SUBSURFACE__ */
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 1bebc16e25b..71017fed19e 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -44,6 +44,52 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
*/
+
+#ifdef __BRANCHED_PATH__
+ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+}
+
+ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+
+ /* continue in case of transparency */
+ *throughput *= shader_bsdf_transparency(kg, sd);
+
+ if(is_zero(*throughput)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else {
+ /* Update Path State */
+ state->flag |= PATH_RAY_TRANSPARENT;
+ state->transparent_bounce++;
+
+ ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->t -= sd->ray_length; /* clipping works through transparent */
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD.dx = -sd->dI.dx;
+ ray->dD.dy = -sd->dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
+
+# ifdef __VOLUME__
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+# endif /* __VOLUME__ */
+ }
+}
+#endif /* __BRANCHED_PATH__ */
+
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
@@ -67,7 +113,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
}
- char enqueue_flag = 0;
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -75,102 +120,125 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
kernel_split_params.queue_size,
0);
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
- /* Load ShaderData structure. */
- PathRadiance *L = NULL;
- ccl_global PathState *state = NULL;
ccl_global char *ray_state = kernel_split_state.ray_state;
- /* Path radiance update for AO/Direct_lighting's shadow blocked. */
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
- IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
- {
- state = &kernel_split_state.path_state[ray_index];
- L = &kernel_split_state.path_radiance[ray_index];
- float3 _throughput = kernel_split_state.throughput[ray_index];
-
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
- float3 shadow = kernel_split_state.ao_light_ray[ray_index].P;
- // TODO(mai): investigate correctness here
- char update_path_radiance = (char)kernel_split_state.ao_light_ray[ray_index].t;
- if(update_path_radiance) {
- path_radiance_accum_ao(L,
- _throughput,
- kernel_split_state.ao_alpha[ray_index],
- kernel_split_state.ao_bsdf[ray_index],
- shadow,
- state->bounce);
- }
- else {
- path_radiance_accum_total_ao(L, _throughput, kernel_split_state.ao_bsdf[ray_index]);
+ bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+ if(active) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+ /* Compute direct lighting and next bounce. */
+ if(!kernel_path_surface_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ kernel_split_path_end(kg, ray_index);
}
- REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
+#ifdef __BRANCHED_PATH__
}
-
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
- float3 shadow = kernel_split_state.light_ray[ray_index].P;
- // TODO(mai): investigate correctness here
- char update_path_radiance = (char)kernel_split_state.light_ray[ray_index].t;
- BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
- if(update_path_radiance) {
- path_radiance_accum_light(L,
- _throughput,
- &L_light,
- shadow,
- 1.0f,
- state->bounce,
- kernel_split_state.is_lamp[ray_index]);
+ else {
+ kernel_split_branched_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ 1.0f,
+ &kernel_split_state.branched_state[ray_index].sd,
+ true))
+ {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
else {
- path_radiance_accum_total_light(L, _throughput, &L_light);
+ kernel_split_branched_indirect_light_end(kg, ray_index);
}
- REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
}
+#endif /* __BRANCHED_PATH__ */
+
+ kernel_split_state.rng[ray_index] = rng;
}
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- state = &kernel_split_state.path_state[ray_index];
- L = &kernel_split_state.path_radiance[ray_index];
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#ifdef __BRANCHED_PATH__
+ /* iter loop */
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+ }
- /* Compute direct lighting and next bounce. */
- if(!kernel_path_surface_bounce(kg, &rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_LIGHT_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ 1.0f,
+ &kernel_split_state.branched_state[ray_index].sd,
+ true))
+ {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_indirect_light_end(kg, ray_index);
}
- kernel_split_state.rng[ray_index] = rng;
}
-#ifndef __COMPUTE_DEVICE_GPU__
+# ifdef __VOLUME__
+ /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
}
-#endif
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- /* Enqueue RAY_UPDATE_BUFFER rays. */
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
+ QUEUE_VOLUME_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+# endif /* __VOLUME__ */
+
+# ifdef __SUBSURFACE__
+ /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
kernel_split_params.queue_size,
local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
+# endif /* __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 0f1696e34a0..957d70958d9 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -38,8 +38,10 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
kernel_split_params.queue_size,
0);
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
char enqueue_flag = 0;
- if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
+ if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
enqueue_flag = 1;
}
@@ -52,7 +54,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
kernel_split_params.queue_index);
/* Continue on with shader evaluation. */
- if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
Intersection isect = kernel_split_state.isect[ray_index];
RNG rng = kernel_split_state.rng[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
@@ -62,8 +64,27 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg,
&kernel_split_state.sd[ray_index],
&isect,
&ray);
+
+#ifndef __BRANCHED_PATH__
float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+#else
+ ShaderContext ctx = SHADER_CONTEXT_MAIN;
+ float rbsdf = 0.0f;
+
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
+
+ }
+
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ ctx = SHADER_CONTEXT_INDIRECT;
+ }
+
+ shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, ctx);
+ shader_merge_closures(&kernel_split_state.sd[ray_index]);
+#endif /* __BRANCHED_PATH__ */
+
kernel_split_state.rng[ray_index] = rng;
}
}
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 4243e18de72..474286285a9 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -29,31 +29,29 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
- if(ray_index == QUEUE_EMPTY_SLOT)
+ if(ray_index == QUEUE_EMPTY_SLOT) {
return;
+ }
- /* Flag determining if we need to update L. */
- char update_path_radiance = 0;
-
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *light_ray_global = &kernel_split_state.ao_light_ray[ray_index];
-
- float3 shadow;
- Ray ray = *light_ray_global;
- update_path_radiance = !(shadow_blocked(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
- state,
- &ray,
- &shadow));
-
- *light_ray_global = ray;
- /* We use light_ray_global's P and t to store shadow and
- * update_path_radiance.
- */
- light_ray_global->P = shadow;
- light_ray_global->t = update_path_radiance;
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+ kernel_path_ao(kg, sd, emission_sd, L, state, &rng, throughput, shader_bsdf_alpha(kg, sd));
+#ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_branched_path_ao(kg, sd, emission_sd, L, state, &rng, throughput);
}
+#endif
+
+ kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index bb8f0157965..452b6e45a36 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -32,28 +32,71 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
if(ray_index == QUEUE_EMPTY_SLOT)
return;
- /* Flag determining if we need to update L. */
- char update_path_radiance = 0;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.light_ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *light_ray_global = &kernel_split_state.light_ray[ray_index];
+ BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ bool is_lamp = kernel_split_state.is_lamp[ray_index];
+# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+ bool use_branched = false;
+ int all = 0;
+
+ if(state->flag & PATH_RAY_SHADOW_CATCHER) {
+ use_branched = true;
+ all = 1;
+ }
+# if defined(__BRANCHED_PATH__)
+ else if(kernel_data.integrator.branched) {
+ use_branched = true;
+
+ if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ all = (kernel_data.integrator.sample_all_lights_indirect);
+ }
+ else
+ {
+ all = (kernel_data.integrator.sample_all_lights_direct);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+ if(use_branched) {
+ kernel_branched_path_surface_connect_light(kg,
+ &rng,
+ sd,
+ emission_sd,
+ state,
+ throughput,
+ 1.0f,
+ L,
+ all);
+ }
+ else
+# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+ {
+ /* trace shadow ray */
float3 shadow;
- Ray ray = *light_ray_global;
- update_path_radiance = !(shadow_blocked(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
- state,
- &ray,
- &shadow));
-
- *light_ray_global = ray;
- /* We use light_ray_global's P and t to store shadow and
- * update_path_radiance.
- */
- light_ray_global->P = shadow;
- light_ray_global->t = update_path_radiance;
+
+ if(!shadow_blocked(kg,
+ emission_sd,
+ state,
+ &ray,
+ &shadow))
+ {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, throughput, &L_light);
+ }
}
+
+ kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4303ba0a905..57f070d51e0 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -37,41 +37,42 @@
#include "util/util_atomic.h"
-#include "kernel/kernel_random.h"
-#include "kernel/kernel_projection.h"
-#include "kernel/kernel_montecarlo.h"
-#include "kernel/kernel_differential.h"
-#include "kernel/kernel_camera.h"
-
-#include "kernel/geom/geom.h"
-#include "kernel/bvh/bvh.h"
-
-#include "kernel/kernel_accumulate.h"
-#include "kernel/kernel_shader.h"
-#include "kernel/kernel_light.h"
-#include "kernel/kernel_passes.h"
-
-#ifdef __SUBSURFACE__
-# include "kernel/kernel_subsurface.h"
+#include "kernel/kernel_path.h"
+#ifdef __BRANCHED_PATH__
+# include "kernel/kernel_path_branched.h"
#endif
-#ifdef __VOLUME__
-# include "kernel/kernel_volume.h"
-#endif
+#include "kernel/kernel_queues.h"
+#include "kernel/kernel_work_stealing.h"
-#include "kernel/kernel_path_state.h"
-#include "kernel/kernel_shadow.h"
-#include "kernel/kernel_emission.h"
-#include "kernel/kernel_path_common.h"
-#include "kernel/kernel_path_surface.h"
-#include "kernel/kernel_path_volume.h"
-#include "kernel/kernel_path_subsurface.h"
+#ifdef __BRANCHED_PATH__
+# include "kernel/split/kernel_branched.h"
+#endif
-#ifdef __KERNEL_DEBUG__
-# include "kernel/kernel_debug.h"
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
+{
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __BRANCHED_PATH__
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+#else
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
#endif
+}
-#include "kernel/kernel_queues.h"
-#include "kernel/kernel_work_stealing.h"
+CCL_NAMESPACE_END
#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 0af8bfc89d5..a2cb4f6ae98 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -62,7 +62,46 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(DebugData, debug_data, 1)
#else
# define SPLIT_DATA_DEBUG_ENTRIES
-#endif
+#endif /* DEBUG */
+
+#ifdef __BRANCHED_PATH__
+
+typedef ccl_global struct SplitBranchedState {
+ /* various state that must be kept and restored after an indirect loop */
+ PathState path_state;
+ float3 throughput;
+ Ray ray;
+
+ struct ShaderData sd;
+ Intersection isect;
+
+ char ray_state;
+
+ /* indirect loop state */
+ int next_closure;
+ int next_sample;
+ int num_samples;
+
+#ifdef __SUBSURFACE__
+ int ss_next_closure;
+ int ss_next_sample;
+ int next_hit;
+ int num_hits;
+
+ uint lcg_state;
+ SubsurfaceIntersection ss_isect;
+
+# ifdef __VOLUME__
+ VolumeStack volume_stack[VOLUME_STACK_SIZE];
+# endif /* __VOLUME__ */
+#endif /*__SUBSURFACE__ */
+} SplitBranchedState;
+
+#define SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1)
+#else
+#define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
#define SPLIT_DATA_ENTRIES \
SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
@@ -72,9 +111,6 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
- SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
@@ -82,6 +118,7 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 0b4d50c70ee..8364f185d75 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -16,42 +16,206 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
- ccl_local_param unsigned int* local_queue_atomics)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
{
-#ifdef __SUBSURFACE__
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ branched_state->ss_next_closure = 0;
+ branched_state->ss_next_sample = 0;
+
+ branched_state->num_hits = 0;
+ branched_state->next_hit = 0;
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &branched_state->sd;
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if(!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+ branched_state->next_closure == 0 && branched_state->next_sample == 0)
+ {
+ branched_state->lcg_state = lcg_state_init(&rng,
+ branched_state->path_state.rng_offset,
+ branched_state->path_state.sample,
+ 0x68bc21eb);
+ }
+ int num_samples = kernel_data.integrator.subsurface_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(rng, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
+ ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg,
+ &bssrdf_rng,
+ &branched_state->path_state,
+ j,
+ num_samples,
+ PRNG_BSDF_U,
+ &bssrdf_u,
+ &bssrdf_v);
+
+ /* intersection is expensive so avoid doing multiple times for the same input */
+ if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ RNG lcg_state = branched_state->lcg_state;
+ SubsurfaceIntersection ss_isect_private;
+
+ branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
+ &ss_isect_private,
+ sd,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ true);
+
+ branched_state->lcg_state = lcg_state;
+ *ss_isect = ss_isect_private;
+ }
+
+#ifdef __VOLUME__
+ Ray volume_ray = branched_state->ray;
+ bool need_update_volume_stack =
+ kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+ ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+ *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+ * important as the indirect path will write into bssrdf_sd */
+
+ SubsurfaceIntersection ss_isect_private = *ss_isect;
+ subsurface_scatter_multi_setup(kg,
+ &ss_isect_private,
+ hit,
+ bssrdf_sd,
+ &branched_state->path_state,
+ branched_state->path_state.flag,
+ sc,
+ true);
+ *ss_isect = ss_isect_private;
+
+ ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+ *hit_state = branched_state->path_state;
+
+ path_state_branch(hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ /* this next part is expensive as it does scene intersection so only do once */
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ branched_state->volume_stack[k] = hit_state->volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(kg,
+ emission_sd,
+ &volume_ray,
+ branched_state->volume_stack);
+ }
+
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state->volume_stack[k] = branched_state->volume_stack[k];
+ }
+ }
+#endif /* __VOLUME__ */
+
+#ifdef __EMISSION__
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(kg,
+ &rng,
+ bssrdf_sd,
+ emission_sd,
+ hit_state,
+ branched_state->throughput,
+ num_samples_inv,
+ L,
+ all);
+ }
+ }
+#endif /* __EMISSION__ */
+
+ /* indirect light */
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ num_samples_inv,
+ bssrdf_sd,
+ false))
+ {
+ branched_state->ss_next_closure = i;
+ branched_state->ss_next_sample = j;
+ branched_state->next_hit = hit;
+
+ return true;
+ }
+
+ branched_state->next_closure = 0;
+ }
+
+ branched_state->next_hit = 0;
+ }
+
+ branched_state->ss_next_sample = 0;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
}
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
- 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
- char enqueue_flag = 0;
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
+ 1);
+ get_ray_index(kg, thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+#ifdef __SUBSURFACE__
ccl_global char *ray_state = kernel_split_state.ray_state;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
@@ -64,34 +228,85 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(sd->flag & SD_BSSRDF) {
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- &rng,
- ray,
- throughput,
- ss_indirect)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+#endif
+ if(kernel_path_subsurface_scatter(kg,
+ sd,
+ emission_sd,
+ L,
+ state,
+ &rng,
+ ray,
+ throughput,
+ ss_indirect)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+#ifdef __BRANCHED_PATH__
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ float bssrdf_probability;
+ ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+ /* modify throughput for picking bssrdf or bsdf */
+ *throughput *= bssrdf_probability;
+
+ /* do bssrdf scatter step if we picked a bssrdf closure */
+ if(sc) {
+ uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
+
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg,
+ &rng,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg,
+ sd,
+ state,
+ state->flag,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
+ }
+ }
+ else {
+ kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
}
+#endif
}
kernel_split_state.rng[ray_index] = rng;
}
-#ifndef __COMPUTE_DEVICE_GPU__
+# ifdef __BRANCHED_PATH__
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
}
-#endif
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
#endif /* __SUBSURFACE__ */