diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-11-27 05:48:17 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-11-27 05:48:17 +0300 |
commit | 8ef6f7e80ffc32ea37b47d3ba829f10ebea83c7f (patch) | |
tree | 9862749527c55a29a95d3b99c4835618c587ffb7 | |
parent | 999b5554a66ebfe2ded5832ded95a3970b6830e8 (diff) |
Cycles: Merge shadow_blocked_ao and _dl kernels to improve build times
Gives another 4 seconds improvement.
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_split.cu | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl (renamed from intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl) | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_split.cl | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked.h (renamed from intern/cycles/kernel/split/kernel_shadow_blocked_dl.h) | 43 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked_ao.h | 55 |
12 files changed, 57 insertions, 111 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index ce02a5a932e..7557f584ebe 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -256,8 +256,7 @@ public: REGISTER_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao); REGISTER_SPLIT_KERNEL(subsurface_scatter); REGISTER_SPLIT_KERNEL(direct_lighting); - REGISTER_SPLIT_KERNEL(shadow_blocked_ao); - REGISTER_SPLIT_KERNEL(shadow_blocked_dl); + REGISTER_SPLIT_KERNEL(shadow_blocked); REGISTER_SPLIT_KERNEL(enqueue_inactive); REGISTER_SPLIT_KERNEL(next_iteration_setup); REGISTER_SPLIT_KERNEL(indirect_subsurface); diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 7135169f238..9697411a23e 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -50,8 +50,7 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) kernel_holdout_emission_blurring_pathtermination_ao = NULL; kernel_subsurface_scatter = NULL; kernel_direct_lighting = NULL; - kernel_shadow_blocked_ao = NULL; - kernel_shadow_blocked_dl = NULL; + kernel_shadow_blocked = NULL; kernel_enqueue_inactive = NULL; kernel_next_iteration_setup = NULL; kernel_indirect_subsurface = NULL; @@ -78,8 +77,7 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_holdout_emission_blurring_pathtermination_ao; delete kernel_subsurface_scatter; delete kernel_direct_lighting; - delete kernel_shadow_blocked_ao; - delete kernel_shadow_blocked_dl; + delete kernel_shadow_blocked; delete kernel_enqueue_inactive; delete kernel_next_iteration_setup; delete kernel_indirect_subsurface; @@ -107,8 +105,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); LOAD_KERNEL(subsurface_scatter); LOAD_KERNEL(direct_lighting); - LOAD_KERNEL(shadow_blocked_ao); - LOAD_KERNEL(shadow_blocked_dl); + LOAD_KERNEL(shadow_blocked); LOAD_KERNEL(enqueue_inactive); LOAD_KERNEL(next_iteration_setup); LOAD_KERNEL(indirect_subsurface); @@ -254,8 +251,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size, local_size); ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 2ec0261e847..816f29f89e0 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -67,8 +67,7 @@ private: SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao; SplitKernelFunction *kernel_subsurface_scatter; SplitKernelFunction *kernel_direct_lighting; - SplitKernelFunction *kernel_shadow_blocked_ao; - SplitKernelFunction *kernel_shadow_blocked_dl; + SplitKernelFunction *kernel_shadow_blocked; SplitKernelFunction *kernel_enqueue_inactive; SplitKernelFunction *kernel_next_iteration_setup; SplitKernelFunction *kernel_indirect_subsurface; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index de056ce97f0..3f8fc333d1e 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -52,8 +52,7 @@ set(SRC_OPENCL_KERNELS kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl kernels/opencl/kernel_subsurface_scatter.cl kernels/opencl/kernel_direct_lighting.cl - kernels/opencl/kernel_shadow_blocked_ao.cl - kernels/opencl/kernel_shadow_blocked_dl.cl + kernels/opencl/kernel_shadow_blocked.cl kernels/opencl/kernel_enqueue_inactive.cl kernels/opencl/kernel_next_iteration_setup.cl kernels/opencl/kernel_indirect_subsurface.cl @@ -304,8 +303,7 @@ set(SRC_SPLIT_HEADERS split/kernel_shader_setup.h split/kernel_shader_sort.h split/kernel_shader_eval.h - split/kernel_shadow_blocked_ao.h - split/kernel_shadow_blocked_dl.h + split/kernel_shadow_blocked.h split/kernel_split_common.h split/kernel_split_data.h split/kernel_split_data_types.h diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 6bdb8546a24..1c934320c89 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -80,8 +80,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) -DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked) DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ecc2bb13eba..6bcefe39ae5 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -51,8 +51,7 @@ # include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" # include "kernel/split/kernel_subsurface_scatter.h" # include "kernel/split/kernel_direct_lighting.h" -# include "kernel/split/kernel_shadow_blocked_ao.h" -# include "kernel/split/kernel_shadow_blocked_dl.h" +# include "kernel/split/kernel_shadow_blocked.h" # include "kernel/split/kernel_enqueue_inactive.h" # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" @@ -222,8 +221,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 41044b65347..d64c8c66458 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -37,8 +37,7 @@ #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" #include "kernel/split/kernel_subsurface_scatter.h" #include "kernel/split/kernel_direct_lighting.h" -#include "kernel/split/kernel_shadow_blocked_ao.h" -#include "kernel/split/kernel_shadow_blocked_dl.h" +#include "kernel/split/kernel_shadow_blocked.h" #include "kernel/split/kernel_enqueue_inactive.h" #include "kernel/split/kernel_next_iteration_setup.h" #include "kernel/split/kernel_indirect_subsurface.h" @@ -115,8 +114,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl index 8de250a375c..472e2503eb2 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -16,9 +16,9 @@ #include "kernel/kernel_compat_opencl.h" #include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shadow_blocked_ao.h" +#include "kernel/split/kernel_shadow_blocked.h" -#define KERNEL_NAME shadow_blocked_ao +#define KERNEL_NAME shadow_blocked #include "kernel/kernels/opencl/kernel_split_function.h" #undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl deleted file mode 100644 index 29da77022ed..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shadow_blocked_dl.h" - -#define KERNEL_NAME shadow_blocked_dl -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 4cbda1bc2e7..3bbadd287e9 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -32,8 +32,7 @@ #include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" #include "kernel/kernels/opencl/kernel_subsurface_scatter.cl" #include "kernel/kernels/opencl/kernel_direct_lighting.cl" -#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl" -#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl" +#include "kernel/kernels/opencl/kernel_shadow_blocked.cl" #include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h index 024e63e88f8..c350c3b242e 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -16,11 +16,44 @@ CCL_NAMESPACE_BEGIN +/* Shadow ray cast for AO. */ +ccl_device void shadow_blocked_ao(KernelGlobals *kg) +{ + unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(thread_index < ao_queue_length) { + ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, 1); + } + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; + +#ifdef __BRANCHED_PATH__ + if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +#endif + kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); +#ifdef __BRANCHED_PATH__ + } + else { + kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); + } +#endif +} + /* Shadow ray cast for direct visible light. */ -ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) +ccl_device void shadow_blocked_dl(KernelGlobals *kg) { unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); int ray_index = QUEUE_EMPTY_SLOT; int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); @@ -112,4 +145,10 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) } } +ccl_device void kernel_shadow_blocked(KernelGlobals *kg) +{ + shadow_blocked_ao(kg); + shadow_blocked_dl(kg); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h deleted file mode 100644 index a4cffd77eff..00000000000 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CCL_NAMESPACE_BEGIN - -/* Shadow ray cast for AO. */ -ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) -{ - unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - } - - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; - -#ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { -#endif - kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); -#ifdef __BRANCHED_PATH__ - } - else { - kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); - } -#endif -} - -CCL_NAMESPACE_END - |