From 0892352bfe6d5a9aa6ec4c088e67f8bbbbfae610 Mon Sep 17 00:00:00 2001 From: Mai Lavelle Date: Tue, 14 Feb 2017 06:20:48 -0500 Subject: Cycles: CPU implementation of split kernel --- intern/cycles/kernel/CMakeLists.txt | 11 +++ intern/cycles/kernel/kernel.h | 2 + intern/cycles/kernel/kernel_compat_cpu.h | 9 +++ intern/cycles/kernel/kernel_globals.h | 7 ++ intern/cycles/kernel/kernel_types.h | 25 ++++-- intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 35 +++++++++ intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 89 ++++++++++++++++++++-- intern/cycles/kernel/kernels/cpu/kernel_split.cpp | 63 +++++++++++++++ .../cycles/kernel/kernels/cpu/kernel_split_avx.cpp | 38 +++++++++ .../kernel/kernels/cpu/kernel_split_avx2.cpp | 40 ++++++++++ .../kernel/kernels/cpu/kernel_split_sse2.cpp | 34 +++++++++ .../kernel/kernels/cpu/kernel_split_sse3.cpp | 36 +++++++++ .../kernel/kernels/cpu/kernel_split_sse41.cpp | 37 +++++++++ intern/cycles/kernel/osl/osl_closures.cpp | 1 + intern/cycles/kernel/osl/osl_services.cpp | 1 + intern/cycles/kernel/osl/osl_shader.cpp | 1 + intern/cycles/kernel/split/kernel_data_init.h | 4 + intern/cycles/kernel/split/kernel_split_common.h | 12 ++- 18 files changed, 429 insertions(+), 16 deletions(-) create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split.cpp create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp create mode 100644 intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index cc1ad5ce48c..d844ba007aa 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -13,6 +13,7 @@ set(INC_SYS set(SRC kernels/cpu/kernel.cpp + kernels/cpu/kernel_split.cpp kernels/opencl/kernel.cl kernels/opencl/kernel_data_init.cl kernels/opencl/kernel_queue_enqueue.cl @@ -316,25 +317,35 @@ if(CXX_HAS_SSE) kernels/cpu/kernel_sse2.cpp kernels/cpu/kernel_sse3.cpp kernels/cpu/kernel_sse41.cpp + kernels/cpu/kernel_split_sse2.cpp + kernels/cpu/kernel_split_sse3.cpp + kernels/cpu/kernel_split_sse41.cpp ) set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX) list(APPEND SRC kernels/cpu/kernel_avx.cpp + kernels/cpu/kernel_split_avx.cpp ) set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX2) list(APPEND SRC kernels/cpu/kernel_avx2.cpp + kernels/cpu/kernel_split_avx2.cpp ) set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() add_library(cycles_kernel diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 9279a94c13a..cd339e6237e 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -20,6 +20,7 @@ /* CPU Kernel Interface */ #include "util_types.h" +#include "kernel_types.h" CCL_NAMESPACE_BEGIN @@ -28,6 +29,7 @@ CCL_NAMESPACE_BEGIN #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) struct KernelGlobals; +struct KernelData; KernelGlobals *kernel_globals_create(); void kernel_globals_free(KernelGlobals *kg); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 9d1f3bdc918..e347a1eca18 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -44,6 +44,15 @@ #define ccl_addr_space +#define ccl_local_id(d) 0 +#define ccl_global_id(d) (kg->global_id[d]) + +#define ccl_local_size(d) 1 +#define ccl_global_size(d) (kg->global_size[d]) + +#define ccl_group_id(d) ccl_global_id(d) +#define ccl_num_groups(d) ccl_global_size(d) + /* On x86_64, versions of glibc < 2.16 have an issue where expf is * much slower than the double version. This was fixed in glibc 2.16. */ diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index e994836f6a2..5dbea4481a3 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -64,6 +64,13 @@ typedef struct KernelGlobals { /* Storage for decoupled volume steps. */ VolumeStep *decoupled_volume_steps[2]; int decoupled_volume_steps_index; + + /* split kernel */ + SplitData split_data; + SplitParams split_param_data; + + int2 global_size; + int2 global_id; } KernelGlobals; #endif /* __KERNEL_CPU__ */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 279cd6dedab..a016e5293ca 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -32,6 +32,11 @@ # define ccl_addr_space #endif +#if defined(__SPLIT_KERNEL__) && !defined(__COMPUTE_DEVICE_GPU__) +/* TODO(mai): need to investigate how this effects the kernel, as cpu kernel crashes without this right now */ +#define __COMPUTE_DEVICE_GPU__ +#endif + CCL_NAMESPACE_BEGIN /* constants */ @@ -65,17 +70,23 @@ CCL_NAMESPACE_BEGIN # endif # define __KERNEL_SHADING__ # define __KERNEL_ADV_SHADING__ -# define __BRANCHED_PATH__ +# ifndef __SPLIT_KERNEL__ +# define __BRANCHED_PATH__ +# endif # ifdef WITH_OSL # define __OSL__ # endif -# define __SUBSURFACE__ +# ifndef __SPLIT_KERNEL__ +# define __SUBSURFACE__ +# endif # define __CMJ__ -# define __VOLUME__ -# define __VOLUME_DECOUPLED__ -# define __VOLUME_SCATTER__ -# define __SHADOW_RECORD_ALL__ -# define __VOLUME_RECORD_ALL__ +# ifndef __SPLIT_KERNEL__ +# define __VOLUME__ +# define __VOLUME_DECOUPLED__ +# define __VOLUME_SCATTER__ +# define __SHADOW_RECORD_ALL__ +# define __VOLUME_RECORD_ALL__ +# endif #endif /* __KERNEL_CPU__ */ #ifdef __KERNEL_CUDA__ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 1a07c705f1c..1d710157817 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -49,4 +49,39 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample); +/* Split kernels */ + +void KERNEL_FUNCTION_FULL_NAME(data_init)( + KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + ccl_global uint *rng_state, + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); + +#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); + +DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) +DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DECLARE_SPLIT_KERNEL_FUNCTION(sum_all_radiance) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)); + #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index ec82d4b4c22..c59f4892546 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -21,17 +21,39 @@ */ #include "kernel_compat_cpu.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_cpu_image.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" + +#ifndef __SPLIT_KERNEL__ +# include "kernel_math.h" +# include "kernel_types.h" + +# include "split/kernel_split_data.h" +# include "kernel_globals.h" + +# include "kernel_cpu_image.h" +# include "kernel_film.h" +# include "kernel_path.h" +# include "kernel_path_branched.h" +# include "kernel_bake.h" +#else +# include "split/kernel_split_common.h" + +# include "split/kernel_data_init.h" +# include "split/kernel_scene_intersect.h" +# include "split/kernel_lamp_emission.h" +# include "split/kernel_queue_enqueue.h" +# include "split/kernel_background_buffer_update.h" +# include "split/kernel_shader_eval.h" +# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" +# include "split/kernel_direct_lighting.h" +# include "split/kernel_shadow_blocked.h" +# include "split/kernel_next_iteration_setup.h" +# include "split/kernel_sum_all_radiance.h" +#endif CCL_NAMESPACE_BEGIN +#ifndef __SPLIT_KERNEL__ + /* Path Tracing */ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, @@ -131,4 +153,55 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, } } +#else /* __SPLIT_KERNEL__ */ + +/* Split Kernel Path Tracing */ + +#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + kernel_##name(kg); \ + } + +DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) +DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) +DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) +DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) +DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) +DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION(sum_all_radiance) + +void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) +{ +#define REGISTER_NAME_STRING(name) #name +#define REGISTER_EVAL_NAME(name) REGISTER_NAME_STRING(name) +#define REGISTER(name) reg(REGISTER_EVAL_NAME(KERNEL_FUNCTION_FULL_NAME(name)), (void*)KERNEL_FUNCTION_FULL_NAME(name)); + + REGISTER(path_trace); + REGISTER(convert_to_byte); + REGISTER(convert_to_half_float); + REGISTER(shader); + + REGISTER(data_init); + REGISTER(scene_intersect); + REGISTER(lamp_emission); + REGISTER(queue_enqueue); + REGISTER(background_buffer_update); + REGISTER(shader_eval); + REGISTER(holdout_emission_blurring_pathtermination_ao); + REGISTER(direct_lighting); + REGISTER(shadow_blocked); + REGISTER(next_iteration_setup); + REGISTER(sum_all_radiance); + +#undef REGISTER +#undef REGISTER_EVAL_NAME +#undef REGISTER_NAME_STRING +} + +#endif /* __SPLIT_KERNEL__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp new file mode 100644 index 00000000000..30519dae53e --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp @@ -0,0 +1,63 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CPU kernel entry points */ + +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this + * one with SSE2 intrinsics. + */ +#if defined(__x86_64__) || defined(_M_X64) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +/* When building kernel for native machine detect kernel features from the flags + * set by compiler. + */ +#ifdef WITH_KERNEL_NATIVE +# ifdef __SSE2__ +# ifndef __KERNEL_SSE2__ +# define __KERNEL_SSE2__ +# endif +# endif +# ifdef __SSE3__ +# define __KERNEL_SSE3__ +# endif +# ifdef __SSSE3__ +# define __KERNEL_SSSE3__ +# endif +# ifdef __SSE4_1__ +# define __KERNEL_SSE41__ +# endif +# ifdef __AVX__ +# define __KERNEL_AVX__ +# endif +# ifdef __AVX2__ +# define __KERNEL_SSE__ +# define __KERNEL_AVX2__ +# endif +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + +#include "kernel.h" +#define KERNEL_ARCH cpu +#include "kernel_cpu_impl.h" + diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp new file mode 100644 index 00000000000..335ad24bdc5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# include "kernel.h" +# define KERNEL_ARCH cpu_avx +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp new file mode 100644 index 00000000000..765ba96aba3 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# include "kernel.h" +# define KERNEL_ARCH cpu_avx2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp new file mode 100644 index 00000000000..af244c03929 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse2 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp new file mode 100644 index 00000000000..d1b579eeac5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse3 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp new file mode 100644 index 00000000000..83d62de5aa5 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +#endif + +#define __SPLIT_KERNEL__ + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# include "kernel.h" +# define KERNEL_ARCH cpu_sse41 +# include "kernel_cpu_impl.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 94de782dca0..4cb46254bc7 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -42,6 +42,7 @@ #include "kernel_types.h" #include "kernel_compat_cpu.h" +#include "split/kernel_split_data.h" #include "kernel_globals.h" #include "kernel_montecarlo.h" #include "kernel_random.h" diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 58bbdc33920..2d5b9c1c1cc 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -39,6 +39,7 @@ #include "util_string.h" #include "kernel_compat_cpu.h" +#include "split/kernel_split_data.h" #include "kernel_globals.h" #include "kernel_random.h" #include "kernel_projection.h" diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 0d762bbdb38..78848d7dfc9 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -19,6 +19,7 @@ #include "kernel_compat_cpu.h" #include "kernel_montecarlo.h" #include "kernel_types.h" +#include "split/kernel_split_data.h" #include "kernel_globals.h" #include "geom/geom_object.h" diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 3251c091468..5604363dcd9 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -51,7 +51,11 @@ CCL_NAMESPACE_BEGIN * The number of elements in the queues is initialized to 0; */ +#ifndef __KERNEL_CPU__ ccl_device void kernel_data_init( +#else +void KERNEL_FUNCTION_FULL_NAME(data_init)( +#endif KernelGlobals *kg, ccl_constant KernelData *data, ccl_global void *split_data_buffer, diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index c3963667aea..dd0c3f9c941 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -23,7 +23,17 @@ #include "kernel_split_data.h" #include "kernel_globals.h" -#include "kernel_image_opencl.h" + +#ifdef __OSL__ +# include "osl_shader.h" +#endif + +#ifdef __KERNEL_OPENCL__ +# include "kernel_image_opencl.h" +#endif +#ifdef __KERNEL_CPU__ +# include "../kernels/cpu/kernel_cpu_image.h" +#endif #include "util_atomic.h" -- cgit v1.2.3