diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-11-01 23:02:28 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-11-05 22:48:33 +0300 |
commit | 8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6 (patch) | |
tree | c8997adcc23053eb6b4cfb7f499581644b23f61e /intern/cycles/kernel/split | |
parent | c571be4e05788b8d3447a0bfe59942ebb4464750 (diff) |
Cycles: reduce closure memory usage for emission/shadow shader data.
With a Titan Xp, reduces path trace local memory from 1092MB to 840MB.
Benchmark performance was within 1% with both RX 480 and Titan Xp.
Original patch was implemented by Sergey.
Differential Revision: https://developer.blender.org/D2249
Diffstat (limited to 'intern/cycles/kernel/split')
10 files changed, 27 insertions, 13 deletions
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 511334e0550..180c0b57077 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -122,7 +122,12 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, */ *throughput = make_float3(1.0f, 1.0f, 1.0f); path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng_hash, sample, ray); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + state, + rng_hash, + sample, + ray); #ifdef __SUBSURFACE__ kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index 2aac66ecb84..832b0e5b265 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -98,7 +98,16 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, BsdfEval L_light; bool is_lamp; - if(direct_emission(kg, sd, &kernel_split_state.sd_DL_shadow[ray_index], &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + if(direct_emission(kg, + sd, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &ls, + state, + &light_ray, + &L_light, + &is_lamp, + terminate)) + { /* Write intermediate data to global memory to access from * the next kernel. */ diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 491487f1230..02881da6c04 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -31,7 +31,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K ShaderData *sd = &kernel_split_state.sd[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); /* GPU: no decoupled ray marching, scatter probalistically */ int num_samples = kernel_data.integrator.volume_samples; @@ -141,7 +141,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 906bad8ceb6..bc8ca3aa3ca 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -101,7 +101,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; throughput = kernel_split_state.throughput[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index 5ad62b585fe..fdd54225b07 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -64,7 +64,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); path_state_init(kg, - &kernel_split_state.sd_DL_shadow[ray_index], + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), &kernel_split_state.path_state[ray_index], rng_hash, sample, diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 7032461b04a..22602537524 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -50,7 +50,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag); + shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE); #ifdef __BRANCHED_PATH__ if(kernel_data.integrator.branched) { shader_merge_closures(&kernel_split_state.sd[ray_index]); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index 79aa2c9435b..b50de615fc8 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -34,7 +34,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) } ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; float3 throughput = kernel_split_state.throughput[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index b52f9a5eb81..9a6bdfbdffe 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -47,7 +47,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) float3 throughput = kernel_split_state.throughput[ray_index]; BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); bool is_lamp = kernel_split_state.is_lamp[ray_index]; # if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index b0e6e5f5250..d3464fede41 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -111,7 +111,7 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ @@ -127,7 +127,7 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 3b957856aea..8d774c020ee 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -39,7 +39,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it ShaderData *sd = &branched_state->sd; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; @@ -229,7 +229,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; ShaderData *sd = &kernel_split_state.sd[ray_index]; - ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); if(sd->flag & SD_BSSRDF) { |