Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-11-01 23:02:28 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-11-05 22:48:33 +0300
commit8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6 (patch)
treec8997adcc23053eb6b4cfb7f499581644b23f61e /intern/cycles/kernel/split
parentc571be4e05788b8d3447a0bfe59942ebb4464750 (diff)
Cycles: reduce closure memory usage for emission/shadow shader data.
With a Titan Xp, reduces path trace local memory from 1092MB to 840MB. Benchmark performance was within 1% with both RX 480 and Titan Xp. Original patch was implemented by Sergey. Differential Revision: https://developer.blender.org/D2249
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r--intern/cycles/kernel/split/kernel_buffer_update.h7
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h11
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h4
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h2
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h4
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h4
10 files changed, 27 insertions, 13 deletions
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index 511334e0550..180c0b57077 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -122,7 +122,12 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
*/
*throughput = make_float3(1.0f, 1.0f, 1.0f);
path_radiance_init(L, kernel_data.film.use_light_pass);
- path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray);
+ path_state_init(kg,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ state,
+ rng_hash,
+ sample,
+ ray);
#ifdef __SUBSURFACE__
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 2aac66ecb84..832b0e5b265 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -98,7 +98,16 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
BsdfEval L_light;
bool is_lamp;
- if(direct_emission(kg, sd, &kernel_split_state.sd_DL_shadow[ray_index], &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
+ if(direct_emission(kg,
+ sd,
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
+ &ls,
+ state,
+ &light_ray,
+ &L_light,
+ &is_lamp,
+ terminate))
+ {
/* Write intermediate data to global memory to access from
* the next kernel.
*/
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 491487f1230..02881da6c04 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -31,7 +31,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K
ShaderData *sd = &kernel_split_state.sd[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
/* GPU: no decoupled ray marching, scatter probalistically */
int num_samples = kernel_data.integrator.volume_samples;
@@ -141,7 +141,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 906bad8ceb6..bc8ca3aa3ca 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -101,7 +101,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
throughput = kernel_split_state.throughput[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index 5ad62b585fe..fdd54225b07 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -64,7 +64,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
+ AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]),
&kernel_split_state.path_state[ray_index],
rng_hash,
sample,
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 7032461b04a..22602537524 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -50,7 +50,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag);
+ shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE);
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched) {
shader_merge_closures(&kernel_split_state.sd[ray_index]);
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 79aa2c9435b..b50de615fc8 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -34,7 +34,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
}
ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index b52f9a5eb81..9a6bdfbdffe 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -47,7 +47,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
float3 throughput = kernel_split_state.throughput[ray_index];
BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
bool is_lamp = kernel_split_state.is_lamp[ray_index];
# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index b0e6e5f5250..d3464fede41 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -111,7 +111,7 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
@@ -127,7 +127,7 @@ typedef ccl_global struct SplitBranchedState {
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
- SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \
SPLIT_DATA_SUBSURFACE_ENTRIES \
SPLIT_DATA_VOLUME_ENTRIES \
SPLIT_DATA_BRANCHED_ENTRIES \
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 3b957856aea..8d774c020ee 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -39,7 +39,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it
ShaderData *sd = &branched_state->sd;
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
ShaderClosure *sc = &sd->closure[i];
@@ -229,7 +229,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
if(sd->flag & SD_BSSRDF) {