From 915766f42df56a0fee47e8eb21f8b024f01afe18 Mon Sep 17 00:00:00 2001 From: Mai Lavelle Date: Mon, 20 Mar 2017 22:31:54 -0400 Subject: Cycles: Branched path tracing for the split kernel This implements branched path tracing for the split kernel. General approach is to store the ray state at a branch point, trace the branched ray as normal, then restore the state as necessary before iterating to the next part of the path. A state machine is used to advance the indirect loop state, which avoids the need to add any new kernels. Each iteration the state machine recreates as much state as possible from the stored ray to keep overall storage down. Its kind of hard to keep all the different integration loops in sync, so this needs lots of testing to make sure everything is working correctly. We should probably start trying to deduplicate the integration loops more now. Nonbranched BMW is ~2% slower, while classroom is ~2% faster, other scenes could use more testing still. Reviewers: sergey, nirved Reviewed By: nirved Subscribers: Blendify, bliblubli Differential Revision: https://developer.blender.org/D2611 --- intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'intern/cycles/kernel/kernels/cpu') diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 148b2eef568..96f54bb427e 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -183,7 +183,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) +DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -- cgit v1.2.3 From 6bf4115c13962c99d1cdc97f2be92c4922f3fd33 Mon Sep 17 00:00:00 2001 From: Hristo Gueorguiev Date: Wed, 3 May 2017 15:30:45 +0200 Subject: Cycles: Split kernel - sort shaders Reduce thread divergence in kernel_shader_eval. Rays are sorted in blocks of 2048 according to shader->id. On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster. No sorting for CUDA split kernel. Reviewers: sergey, maiself Reviewed By: maiself Differential Revision: https://developer.blender.org/D2598 --- intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 2 ++ intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'intern/cycles/kernel/kernels/cpu') diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 896b80d783e..39c9a9cf33c 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -77,6 +77,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) DECLARE_SPLIT_KERNEL_FUNCTION(do_volume) DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort) DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 96f54bb427e..8c05dd1d9ef 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -44,6 +44,8 @@ # include "kernel/split/kernel_do_volume.h" # include "kernel/split/kernel_queue_enqueue.h" # include "kernel/split/kernel_indirect_background.h" +# include "kernel/split/kernel_shader_setup.h" +# include "kernel/split/kernel_shader_sort.h" # include "kernel/split/kernel_shader_eval.h" # include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" # include "kernel/split/kernel_subsurface_scatter.h" @@ -181,7 +183,9 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) @@ -209,6 +213,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, REGISTER(do_volume); REGISTER(queue_enqueue); REGISTER(indirect_background); + REGISTER(shader_setup); + REGISTER(shader_sort); REGISTER(shader_eval); REGISTER(holdout_emission_blurring_pathtermination_ao); REGISTER(subsurface_scatter); -- cgit v1.2.3