Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-11-09 08:49:15 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-11-09 09:04:06 +0300
commit087331c495b04ebd37903c0dc0e46262354cf026 (patch)
treeef63fbab4859021585d002f4782840d6e91365a2 /intern/cycles/kernel/split
parent6febe6e725381456f39966e0f685da67cfe52bce (diff)
Cycles: Replace __MAX_CLOSURE__ build option with runtime integrator variable
Goal is to reduce OpenCL kernel recompilations. Currently viewport renders are still set to use 64 closures as this seems to be faster and we don't want to cause a performance regression there. Needs to be investigated. Reviewed By: brecht Differential Revision: https://developer.blender.org/D2775
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h12
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h2
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h4
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h2
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h2
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h4
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h6
-rw-r--r--intern/cycles/kernel/split/kernel_shader_setup.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h17
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h14
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h4
15 files changed, 55 insertions, 22 deletions
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index 2313feac089..6456636caaa 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -30,10 +30,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGloba
BRANCHED_STORE(path_state);
BRANCHED_STORE(throughput);
BRANCHED_STORE(ray);
- BRANCHED_STORE(sd);
BRANCHED_STORE(isect);
BRANCHED_STORE(ray_state);
+ branched_state->sd = *kernel_split_sd(sd, ray_index);
+ for(int i = 0; i < branched_state->sd.num_closure; i++) {
+ branched_state->sd.closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+ }
+
#undef BRANCHED_STORE
/* set loop counters to intial position */
@@ -53,10 +57,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal
BRANCHED_RESTORE(path_state);
BRANCHED_RESTORE(throughput);
BRANCHED_RESTORE(ray);
- BRANCHED_RESTORE(sd);
BRANCHED_RESTORE(isect);
BRANCHED_RESTORE(ray_state);
+ *kernel_split_sd(sd, ray_index) = branched_state->sd;
+ for(int i = 0; i < branched_state->sd.num_closure; i++) {
+ kernel_split_sd(sd, ray_index)->closure[i] = branched_state->sd.closure[i];
+ }
+
#undef BRANCHED_RESTORE
/* leave indirect loop */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 832b0e5b265..ca79602c565 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -58,7 +58,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
/* direct lighting */
#ifdef __EMISSION__
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 02881da6c04..fb5bd3d48dd 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -29,7 +29,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
@@ -140,7 +140,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index bc8ca3aa3ca..88919f47c7a 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -94,7 +94,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
float3 throughput;
ccl_global char *ray_state = kernel_split_state.ray_state;
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 0c894bd3d71..4cf88a02590 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -55,7 +55,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
kernel_path_background(kg, state, ray, throughput, sd, L);
kernel_split_path_end(kg, ray_index);
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index d5099ac66e6..c14f66f664f 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -58,7 +58,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
float3 throughput = kernel_split_state.throughput[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index c3373174582..bb6bf1cc7e6 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -58,7 +58,7 @@ ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
@@ -126,7 +126,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if(active) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 22602537524..2409d1ba28b 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -50,15 +50,15 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE);
+ shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag, kernel_data.integrator.max_closures);
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched) {
- shader_merge_closures(&kernel_split_state.sd[ray_index]);
+ shader_merge_closures(kernel_split_sd(sd, ray_index));
}
else
#endif
{
- shader_prepare_closures(&kernel_split_state.sd[ray_index], state);
+ shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
}
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index 0432689d9fa..9d428ee8139 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -61,7 +61,7 @@ ccl_device void kernel_shader_setup(KernelGlobals *kg,
Ray ray = kernel_split_state.ray[ray_index];
shader_setup_from_ray(kg,
- &kernel_split_state.sd[ray_index],
+ kernel_split_sd(sd, ray_index),
&isect,
&ray);
}
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 5a55b680695..2132c42220f 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -47,7 +47,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
int ray_index = kernel_split_state.queue_data[add];
bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
if(valid) {
- value = kernel_split_state.sd[ray_index].shader & SHADER_MASK;
+ value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
}
}
local_value[i + lid] = value;
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index b50de615fc8..a4cffd77eff 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -33,7 +33,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
return;
}
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 9a6bdfbdffe..da072fd5f1a 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -43,7 +43,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.light_ray[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
float3 throughput = kernel_split_state.throughput[ray_index];
BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index eac22050a38..fa2f0b20a83 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -31,6 +31,14 @@ ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+
+#ifdef __BRANCHED_PATH__
+ size += align_up(closure_size * num_elements, 16);
+#endif
+
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+
return size;
}
@@ -49,6 +57,15 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+
+#ifdef __BRANCHED_PATH__
+ p += align_up(closure_size * num_elements, 16);
+#endif
+
+ split_data->_sd = (ShaderData*)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+
split_data->ray_state = ray_state;
}
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 5c2aadcf4ec..9ac3f904819 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -51,7 +51,6 @@ typedef ccl_global struct SplitBranchedState {
float3 throughput;
Ray ray;
- struct ShaderData sd;
Intersection isect;
char ray_state;
@@ -77,6 +76,9 @@ typedef ccl_global struct SplitBranchedState {
int shared_sample_count; /* number of branched samples shared with other threads */
int original_ray; /* index of original ray when sharing branched samples */
bool waiting_on_shared_samples;
+
+ /* Must be last in to allow for dynamic size of closures */
+ struct ShaderData sd;
} SplitBranchedState;
#define SPLIT_DATA_BRANCHED_ENTRIES \
@@ -110,11 +112,11 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
@@ -126,11 +128,11 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
@@ -154,6 +156,12 @@ __device__ SplitParams __split_param_data;
# define kernel_split_params (__split_param_data)
#endif /* __KERNEL_CUDA__ */
+#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
+ ( \
+ ((ccl_global char*)kernel_split_state._##sd) + \
+ (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
+ ))
+
/* Local storage for queue_enqueue kernel. */
typedef struct QueueEnqueueLocals {
uint queue_atomics[2];
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index c5504d0a89b..887c3e313d1 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -98,7 +98,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
/* compute lighting with the BSDF closure */
for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
- ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+ ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
*bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
* important as the indirect path will write into bssrdf_sd */
@@ -228,7 +228,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
if(sd->flag & SD_BSSRDF) {