Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device.cpp1
-rw-r--r--intern/cycles/device/device.h6
-rw-r--r--intern/cycles/device/device_cpu.cpp1
-rw-r--r--intern/cycles/device/device_cuda.cpp4
-rw-r--r--intern/cycles/device/device_split_kernel.cpp3
-rw-r--r--intern/cycles/device/device_split_kernel.h3
-rw-r--r--intern/cycles/kernel/kernel_bake.h8
-rw-r--r--intern/cycles/kernel/kernel_path.h4
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h2
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h10
-rw-r--r--intern/cycles/kernel/kernel_types.h13
-rw-r--r--intern/cycles/kernel/kernel_volume.h2
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h12
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h2
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h4
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h2
-rw-r--r--intern/cycles/kernel/split/kernel_lamp_emission.h2
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h4
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h6
-rw-r--r--intern/cycles/kernel/split/kernel_shader_setup.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h17
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h14
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h4
-rw-r--r--intern/cycles/render/session.cpp12
28 files changed, 87 insertions, 59 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 641e3fde140..a3ff5481cef 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -47,7 +47,6 @@ std::ostream& operator <<(std::ostream &os,
{
os << "Experimental features: "
<< (requested_features.experimental ? "On" : "Off") << std::endl;
- os << "Max closure count: " << requested_features.max_closure << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 6bf3bbe6c25..35b545388f2 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -91,9 +91,6 @@ public:
/* Use experimental feature set. */
bool experimental;
- /* Maximum number of closures in shader trees. */
- int max_closure;
-
/* Selective nodes compilation. */
/* Identifier of a node group up to which all the nodes needs to be
@@ -146,7 +143,6 @@ public:
{
/* TODO(sergey): Find more meaningful defaults. */
experimental = false;
- max_closure = 0;
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
@@ -167,7 +163,6 @@ public:
bool modified(const DeviceRequestedFeatures& requested_features)
{
return !(experimental == requested_features.experimental &&
- max_closure == requested_features.max_closure &&
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
@@ -198,7 +193,6 @@ public:
string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" +
string_printf("%d", nodes_features);
- build_options += string_printf(" -D__MAX_CLOSURE__=%d", max_closure);
if(!use_hair) {
build_options += " -D__NO_HAIR__";
}
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 1a54c3380ee..0f4001ab1a6 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -760,7 +760,6 @@ public:
CPUSplitKernel *split_kernel = NULL;
if(use_split_kernel) {
split_kernel = new CPUSplitKernel(this);
- requested_features.max_closure = MAX_CLOSURE;
if(!split_kernel->load_kernels(requested_features)) {
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
kgbuffer.free();
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 14e3ddc8c7b..d230a0c565d 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1861,10 +1861,6 @@ public:
DeviceRequestedFeatures requested_features;
if(use_split_kernel()) {
- if(!use_adaptive_compilation()) {
- requested_features.max_closure = 64;
- }
-
if(split_kernel == NULL) {
split_kernel = new CUDASplitKernel(this);
split_kernel->load_kernels(requested_features);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 115273d9f0a..566d4020b33 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -34,7 +34,6 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device)
use_queues_flag(device, "use_queues_flag"),
work_pool_wgs(device, "work_pool_wgs")
{
- current_max_closure = -1;
first_tile = true;
avg_time_per_sample = 0.0;
@@ -116,8 +115,6 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
#undef LOAD_KERNEL
- current_max_closure = requested_features.max_closure;
-
return true;
}
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 0647c664447..2ec0261e847 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -92,9 +92,6 @@ private:
/* Work pool with respect to each work group. */
device_only_memory<unsigned int> work_pool_wgs;
- /* clos_max value for which the kernels have been loaded currently. */
- int current_max_closure;
-
/* Marked True in constructor and marked false at the end of path_trace(). */
bool first_tile;
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 73cddeb27f7..8788e89c40e 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
/* evaluate surface shader */
- shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE);
+ shader_eval_surface(kg, sd, &state, state.flag, kernel_data.integrator.max_closures);
/* TODO, disable more closures we don't need besides transparent */
shader_bsdf_disable_transparency(kg, sd);
@@ -228,12 +228,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
}
else {
/* surface color of the pass only */
- shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
+ shader_eval_surface(kg, sd, state, 0, kernel_data.integrator.max_closures);
return kernel_bake_shader_bsdf(kg, sd, type);
}
}
else {
- shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
+ shader_eval_surface(kg, sd, state, 0, kernel_data.integrator.max_closures);
color = kernel_bake_shader_bsdf(kg, sd, type);
}
@@ -333,7 +333,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
{
float3 N = sd.N;
if((sd.flag & SD_HAS_BUMP)) {
- shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE);
+ shader_eval_surface(kg, &sd, &state, 0, kernel_data.integrator.max_closures);
N = shader_bsdf_average_normal(kg, &sd);
}
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 8519e0682e1..207ba741e6f 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
sd,
&isect,
ray);
- shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE);
+ shader_eval_surface(kg, sd, state, state->flag, kernel_data.integrator.max_closures);
shader_prepare_closures(sd, state);
/* Apply shadow catcher, holdout, emission. */
@@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate(
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, ray);
- shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE);
+ shader_eval_surface(kg, &sd, state, state->flag, kernel_data.integrator.max_closures);
shader_prepare_closures(&sd, state);
/* Apply shadow catcher, holdout, emission. */
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index b37bc65f4df..9996f52f9a4 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -474,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, &ray);
- shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
+ shader_eval_surface(kg, &sd, &state, state.flag, kernel_data.integrator.max_closures);
shader_merge_closures(&sd);
/* Apply shadow catcher, holdout, emission. */
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 87e7d7ff398..616ad71af3c 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -76,11 +76,11 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
}
/* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
{
sd->flag &= ~SD_CLOSURE_FLAGS;
sd->num_closure = 0;
- sd->num_closure_left = MAX_CLOSURE;
+ sd->num_closure_left = kernel_data.integrator.max_closures;
if(hit) {
Bssrdf *bssrdf = (Bssrdf *)sc;
@@ -154,7 +154,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
if(bump || texture_blur > 0.0f) {
/* average color and normal at incoming point */
- shader_eval_surface(kg, sd, state, state_flag, MAX_CLOSURE);
+ shader_eval_surface(kg, sd, state, state_flag, kernel_data.integrator.max_closures);
float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
/* we simply divide out the average color and multiply with the average
@@ -342,7 +342,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
/* Setup diffuse BSDF. */
- subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N);
+ subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
@@ -439,7 +439,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_a
subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
/* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N);
+ subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index fc3e7b3da98..919dafbc780 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -782,10 +782,14 @@ typedef struct AttributeDescriptor {
/* Closure data */
#ifdef __MULTI_CLOSURE__
-# ifndef __MAX_CLOSURE__
-# define MAX_CLOSURE 64
+# ifdef __SPLIT_KERNEL__
+# define MAX_CLOSURE 1
# else
-# define MAX_CLOSURE __MAX_CLOSURE__
+# ifndef __MAX_CLOSURE__
+# define MAX_CLOSURE 64
+# else
+# define MAX_CLOSURE __MAX_CLOSURE__
+# endif
# endif
#else
# define MAX_CLOSURE 1
@@ -1313,7 +1317,8 @@ typedef struct KernelIntegrator {
int volume_samples;
int start_sample;
- int pad;
+
+ int max_closures;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index fb3c5437275..5604d8e5163 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -62,7 +62,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
VolumeShaderCoefficients *coeff)
{
sd->P = P;
- shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, MAX_CLOSURE);
+ shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, kernel_data.integrator.max_closures);
if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION)))
return false;
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
index 2313feac089..6456636caaa 100644
--- a/intern/cycles/kernel/split/kernel_branched.h
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -30,10 +30,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGloba
BRANCHED_STORE(path_state);
BRANCHED_STORE(throughput);
BRANCHED_STORE(ray);
- BRANCHED_STORE(sd);
BRANCHED_STORE(isect);
BRANCHED_STORE(ray_state);
+ branched_state->sd = *kernel_split_sd(sd, ray_index);
+ for(int i = 0; i < branched_state->sd.num_closure; i++) {
+ branched_state->sd.closure[i] = kernel_split_sd(sd, ray_index)->closure[i];
+ }
+
#undef BRANCHED_STORE
/* set loop counters to intial position */
@@ -53,10 +57,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal
BRANCHED_RESTORE(path_state);
BRANCHED_RESTORE(throughput);
BRANCHED_RESTORE(ray);
- BRANCHED_RESTORE(sd);
BRANCHED_RESTORE(isect);
BRANCHED_RESTORE(ray_state);
+ *kernel_split_sd(sd, ray_index) = branched_state->sd;
+ for(int i = 0; i < branched_state->sd.num_closure; i++) {
+ kernel_split_sd(sd, ray_index)->closure[i] = branched_state->sd.closure[i];
+ }
+
#undef BRANCHED_RESTORE
/* leave indirect loop */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 832b0e5b265..ca79602c565 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -58,7 +58,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
/* direct lighting */
#ifdef __EMISSION__
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 02881da6c04..fb5bd3d48dd 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -29,7 +29,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
{
SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
@@ -140,7 +140,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index bc8ca3aa3ca..88919f47c7a 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -94,7 +94,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
float3 throughput;
ccl_global char *ray_state = kernel_split_state.ray_state;
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 0c894bd3d71..4cf88a02590 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -55,7 +55,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
kernel_path_background(kg, state, ray, throughput, sd, L);
kernel_split_path_end(kg, ray_index);
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index d5099ac66e6..c14f66f664f 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -58,7 +58,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
float3 throughput = kernel_split_state.throughput[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L);
}
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index c3373174582..bb6bf1cc7e6 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -58,7 +58,7 @@ ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int
kernel_split_branched_path_indirect_loop_end(kg, ray_index);
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
@@ -126,7 +126,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
if(active) {
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 22602537524..2409d1ba28b 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -50,15 +50,15 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE);
+ shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag, kernel_data.integrator.max_closures);
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched) {
- shader_merge_closures(&kernel_split_state.sd[ray_index]);
+ shader_merge_closures(kernel_split_sd(sd, ray_index));
}
else
#endif
{
- shader_prepare_closures(&kernel_split_state.sd[ray_index], state);
+ shader_prepare_closures(kernel_split_sd(sd, ray_index), state);
}
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
index 0432689d9fa..9d428ee8139 100644
--- a/intern/cycles/kernel/split/kernel_shader_setup.h
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -61,7 +61,7 @@ ccl_device void kernel_shader_setup(KernelGlobals *kg,
Ray ray = kernel_split_state.ray[ray_index];
shader_setup_from_ray(kg,
- &kernel_split_state.sd[ray_index],
+ kernel_split_sd(sd, ray_index),
&isect,
&ray);
}
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
index 5a55b680695..2132c42220f 100644
--- a/intern/cycles/kernel/split/kernel_shader_sort.h
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -47,7 +47,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg,
int ray_index = kernel_split_state.queue_data[add];
bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
if(valid) {
- value = kernel_split_state.sd[ray_index].shader & SHADER_MASK;
+ value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK;
}
}
local_value[i + lid] = value;
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index b50de615fc8..a4cffd77eff 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -33,7 +33,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
return;
}
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index 9a6bdfbdffe..da072fd5f1a 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -43,7 +43,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.light_ray[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
float3 throughput = kernel_split_state.throughput[ray_index];
BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index eac22050a38..fa2f0b20a83 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -31,6 +31,14 @@ ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+
+#ifdef __BRANCHED_PATH__
+ size += align_up(closure_size * num_elements, 16);
+#endif
+
+ size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+
return size;
}
@@ -49,6 +57,15 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
+ uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1);
+
+#ifdef __BRANCHED_PATH__
+ p += align_up(closure_size * num_elements, 16);
+#endif
+
+ split_data->_sd = (ShaderData*)p;
+ p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16);
+
split_data->ray_state = ray_state;
}
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 5c2aadcf4ec..9ac3f904819 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -51,7 +51,6 @@ typedef ccl_global struct SplitBranchedState {
float3 throughput;
Ray ray;
- struct ShaderData sd;
Intersection isect;
char ray_state;
@@ -77,6 +76,9 @@ typedef ccl_global struct SplitBranchedState {
int shared_sample_count; /* number of branched samples shared with other threads */
int original_ray; /* index of original ray when sharing branched samples */
bool waiting_on_shared_samples;
+
+ /* Must be last in to allow for dynamic size of closures */
+ struct ShaderData sd;
} SplitBranchedState;
#define SPLIT_DATA_BRANCHED_ENTRIES \
@@ -110,11 +112,11 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */
#define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \
@@ -126,11 +128,11 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY(ShaderData, _sd, 0)
/* struct that holds pointers to data in the shared state buffer */
typedef struct SplitData {
@@ -154,6 +156,12 @@ __device__ SplitParams __split_param_data;
# define kernel_split_params (__split_param_data)
#endif /* __KERNEL_CUDA__ */
+#define kernel_split_sd(sd, ray_index) ((ShaderData*) \
+ ( \
+ ((ccl_global char*)kernel_split_state._##sd) + \
+ (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \
+ ))
+
/* Local storage for queue_enqueue kernel. */
typedef struct QueueEnqueueLocals {
uint queue_atomics[2];
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index c5504d0a89b..887c3e313d1 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -98,7 +98,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
/* compute lighting with the BSDF closure */
for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
- ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+ ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index);
*bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
* important as the indirect path will write into bssrdf_sd */
@@ -228,7 +228,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *sd = kernel_split_sd(sd, ray_index);
ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
if(sd->flag & SD_BSSRDF) {
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 74cfd02e1a4..e8d9558c38d 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -642,13 +642,11 @@ DeviceRequestedFeatures Session::get_requested_device_features()
DeviceRequestedFeatures requested_features;
requested_features.experimental = params.experimental;
- requested_features.max_closure = get_max_closure_count();
scene->shader_manager->get_requested_features(
scene,
&requested_features);
if(!params.background) {
/* Avoid too much re-compilations for viewport render. */
- requested_features.max_closure = 64;
requested_features.max_nodes_group = NODE_GROUP_LEVEL_MAX;
requested_features.nodes_features = NODE_FEATURE_ALL;
}
@@ -858,6 +856,16 @@ void Session::update_scene()
if(scene->need_update()) {
load_kernels(false);
+ /* Update max_closures. */
+ KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
+ if(params.background) {
+ kintegrator->max_closures = get_max_closure_count();
+ }
+ else {
+ /* Currently viewport render is faster with higher max_closures, needs investigating. */
+ kintegrator->max_closures = 64;
+ }
+
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
}