Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_split_kernel.cpp1
-rw-r--r--intern/cycles/kernel/kernel_types.h3
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h52
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h19
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h2
-rw-r--r--intern/cycles/kernel/split/kernel_scene_intersect.h16
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h8
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h15
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h23
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h8
10 files changed, 142 insertions, 5 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index bb289a51912..d2b3a89fa98 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -259,6 +259,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index c9860e8d181..2c3859bf7c1 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1431,6 +1431,9 @@ enum RayState {
RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
+
+ /* Ray is evaluating an iteration of an indirect loop for another thread */
+ RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index dc74a2ada53..e2762a85fc8 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -63,12 +63,49 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal
REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
}
+ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index)
+{
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+ int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS,
+ kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index);
+
+ if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) {
+ return false;
+ }
+
+#define SPLIT_DATA_ENTRY(type, name, num) \
+ kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index];
+ SPLIT_DATA_ENTRIES_BRANCHED_SHARED
+#undef SPLIT_DATA_ENTRY
+
+ kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0;
+ kernel_split_state.branched_state[inactive_ray].original_ray = ray_index;
+ kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray];
+
+ path_radiance_init(inactive_L, kernel_data.film.use_light_pass);
+ inactive_L->direct_throughput = L->direct_throughput;
+ path_radiance_copy_indirect(inactive_L, L);
+
+ ray_state[inactive_ray] = RAY_REGENERATED;
+ ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED);
+ ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT));
+
+ atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count);
+
+ return true;
+}
+
/* bounce off surface and integrate indirect light */
ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
int ray_index,
float num_samples_adjust,
ShaderData *saved_sd,
- bool reset_path_state)
+ bool reset_path_state,
+ bool wait_for_shared)
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
@@ -155,12 +192,25 @@ ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(
/* start the indirect path */
*tp *= num_samples_inv;
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+
return true;
}
branched_state->next_sample = 0;
}
+ branched_state->next_closure = sd->num_closure;
+
+ if(wait_for_shared) {
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+ }
+
return false;
}
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 694b777f429..9f8dd2392d9 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -75,11 +75,30 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
branched_state->next_sample = j+1;
branched_state->num_samples = num_samples;
+ /* Attempting to share too many samples is slow for volumes as it causes us to
+ * loop here more and have many calls to kernel_volume_integrate which evaluates
+ * shaders. The many expensive shader evaluations cause the work load to become
+ * unbalanced and many threads to become idle in this kernel. Limiting the
+ * number of shared samples here helps quite a lot.
+ */
+ if(branched_state->shared_sample_count < 2) {
+ if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
+ continue;
+ }
+ }
+
return true;
}
# endif
}
+ branched_state->next_sample = num_samples;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
/* todo: avoid this calculation using decoupled ray marching */
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 71017fed19e..7758e35fd32 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -147,6 +147,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
@@ -193,6 +194,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ray_index,
1.0f,
&kernel_split_state.branched_state[ray_index].sd,
+ true,
true))
{
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 5dc94caec85..45984ca509b 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -43,11 +43,21 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
}
/* All regenerated rays become active here */
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
+#ifdef __BRANCHED_PATH__
+ if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else
+#endif /* __BRANCHED_PATH__ */
+ {
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ }
+ }
- if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE))
+ if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
return;
+ }
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 386fbbc4d09..78e61709b01 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -29,6 +29,14 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
+#ifdef __BRANCHED_PATH__
+ /* TODO(mai): move this somewhere else? */
+ if(thread_index == 0) {
+ /* Clear QUEUE_INACTIVE_RAYS before next kernel. */
+ kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0;
+ }
+#endif /* __BRANCHED_PATH__ */
+
if(ray_index == QUEUE_EMPTY_SLOT)
return;
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 57f070d51e0..08f0124b529 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -56,7 +56,20 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
- if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
+ int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
+
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_accum_sample(orig_ray_L, L, 1);
+
+ atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count);
+
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index bb1aca2acbf..4bb2f0d3d80 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -95,6 +95,10 @@ typedef ccl_global struct SplitBranchedState {
VolumeStack volume_stack[VOLUME_STACK_SIZE];
# endif /* __VOLUME__ */
#endif /*__SUBSURFACE__ */
+
+ int shared_sample_count; /* number of branched samples shared with other threads */
+ int original_ray; /* index of original ray when sharing branched samples */
+ bool waiting_on_shared_samples;
} SplitBranchedState;
#define SPLIT_DATA_BRANCHED_ENTRIES \
@@ -137,6 +141,25 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_DEBUG_ENTRIES \
+/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
+#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
+ SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \
+ SPLIT_DATA_ENTRY(ccl_global float, L_transparent, 1) \
+ SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
+ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
+ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
+ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
+ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_SUBSURFACE_ENTRIES \
+ SPLIT_DATA_VOLUME_ENTRIES \
+ SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_DEBUG_ENTRIES \
+
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
#define SPLIT_DATA_ENTRY(type, name, num) type *name;
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 1dffe1b179e..4998714f28c 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -169,6 +169,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
ray_index,
num_samples_inv,
bssrdf_sd,
+ false,
false))
{
branched_state->ss_next_closure = i;
@@ -187,6 +188,13 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
branched_state->ss_next_sample = 0;
}
+ branched_state->ss_next_closure = sd->num_closure;
+
+ branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
+ if(branched_state->waiting_on_shared_samples) {
+ return true;
+ }
+
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
return false;