diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-02-14 14:20:48 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-03-08 08:52:41 +0300 |
commit | 0892352bfe6d5a9aa6ec4c088e67f8bbbbfae610 (patch) | |
tree | 83e4169e46d1fc7c2de5884169902841f6c75db9 /intern/cycles/kernel/kernels/cpu | |
parent | 352ee7c3ef2b84568dd419edd208eb6619498f45 (diff) |
Cycles: CPU implementation of split kernel
Diffstat (limited to 'intern/cycles/kernel/kernels/cpu')
8 files changed, 364 insertions, 8 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 1a07c705f1c..1d710157817 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -49,4 +49,39 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample); +/* Split kernels */ + +void KERNEL_FUNCTION_FULL_NAME(data_init)( + KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + ccl_global uint *rng_state, + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); + +#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); + +DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) +DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DECLARE_SPLIT_KERNEL_FUNCTION(sum_all_radiance) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)); + #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ec82d4b4c22..c59f4892546 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -21,17 +21,39 @@ */ #include "kernel_compat_cpu.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_cpu_image.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" + +#ifndef __SPLIT_KERNEL__ +# include "kernel_math.h" +# include "kernel_types.h" + +# include "split/kernel_split_data.h" +# include "kernel_globals.h" + +# include "kernel_cpu_image.h" +# include "kernel_film.h" +# include "kernel_path.h" +# include "kernel_path_branched.h" +# include "kernel_bake.h" +#else +# include "split/kernel_split_common.h" + +# include "split/kernel_data_init.h" +# include "split/kernel_scene_intersect.h" +# include "split/kernel_lamp_emission.h" +# include "split/kernel_queue_enqueue.h" +# include "split/kernel_background_buffer_update.h" +# include "split/kernel_shader_eval.h" +# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" +# include "split/kernel_direct_lighting.h" +# include "split/kernel_shadow_blocked.h" +# include "split/kernel_next_iteration_setup.h" +# include "split/kernel_sum_all_radiance.h" +#endif CCL_NAMESPACE_BEGIN +#ifndef __SPLIT_KERNEL__ + /* Path Tracing */ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, @@ -131,4 +153,55 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, } } +#else /* __SPLIT_KERNEL__ */ + +/* Split Kernel Path Tracing */ + +#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + kernel_##name(kg); \ + } + +DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) +DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION(sum_all_radiance) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) +{ +#define REGISTER_NAME_STRING(name) #name +#define REGISTER_EVAL_NAME(name) REGISTER_NAME_STRING(name) +#define REGISTER(name) reg(REGISTER_EVAL_NAME(KERNEL_FUNCTION_FULL_NAME(name)), (void*)KERNEL_FUNCTION_FULL_NAME(name)); + + REGISTER(path_trace); + REGISTER(convert_to_byte); + REGISTER(convert_to_half_float); + REGISTER(shader); + + REGISTER(data_init); + REGISTER(scene_intersect); + REGISTER(lamp_emission); + REGISTER(queue_enqueue); + REGISTER(background_buffer_update); + REGISTER(shader_eval); + REGISTER(holdout_emission_blurring_pathtermination_ao); + REGISTER(direct_lighting); + REGISTER(shadow_blocked); + REGISTER(next_iteration_setup); + REGISTER(sum_all_radiance); + +#undef REGISTER +#undef REGISTER_EVAL_NAME +#undef REGISTER_NAME_STRING +} + +#endif /* __SPLIT_KERNEL__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp new file mode 100644 index 00000000000..30519dae53e --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp @@ -0,0 +1,63 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CPU kernel entry points */ + +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this + * one with SSE2 intrinsics. + */ +#if defined(__x86_64__) || defined(_M_X64) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +/* When building kernel for native machine detect kernel features from the flags + * set by compiler. + */ +#ifdef WITH_KERNEL_NATIVE +# ifdef __SSE2__ +# ifndef __KERNEL_SSE2__ +# define __KERNEL_SSE2__ +# endif +# endif +# ifdef __SSE3__ +# define __KERNEL_SSE3__ +# endif +# ifdef __SSSE3__ +# define __KERNEL_SSSE3__ +# endif +# ifdef __SSE4_1__ +# define __KERNEL_SSE41__ +# endif +# ifdef __AVX__ +# define __KERNEL_AVX__ +# endif +# ifdef __AVX2__ +# define __KERNEL_SSE__ +# define __KERNEL_AVX2__ +# endif +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + +#include "kernel.h" +#define KERNEL_ARCH cpu +#include "kernel_cpu_impl.h" + diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp new file mode 100644 index 00000000000..335ad24bdc5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# include "kernel.h" +# define KERNEL_ARCH cpu_avx +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp new file mode 100644 index 00000000000..765ba96aba3 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# include "kernel.h" +# define KERNEL_ARCH cpu_avx2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp new file mode 100644 index 00000000000..af244c03929 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp new file mode 100644 index 00000000000..d1b579eeac5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse3 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp new file mode 100644 index 00000000000..83d62de5aa5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse41 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ |