diff options
Diffstat (limited to 'intern/cycles/kernel/kernels/cpu')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel.cpp | 65 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_avx.cpp | 55 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp | 59 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 50 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 126 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp | 56 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp | 55 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp | 55 |
8 files changed, 215 insertions, 306 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 2c8d3503c1a..5c6dc31b949 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -16,15 +16,19 @@ /* CPU kernel entry points */ -#include "kernel_compat_cpu.h" +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ +#if defined(__x86_64__) || defined(_M_X64) +#define __KERNEL_SSE2__ +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + #include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" +#define KERNEL_ARCH cpu +#include "kernel_cpu_impl.h" CCL_NAMESPACE_BEGIN @@ -94,49 +98,4 @@ void kernel_tex_copy(KernelGlobals *kg, assert(0); } -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ -#if defined(__x86_64__) || defined(_M_X64) -#define __KERNEL_SSE2__ -#endif - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) - /* do nothing */ -#endif - -/* Path Tracing */ - -void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluation */ - -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index df77bedc729..bc754f6832c 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -30,58 +30,13 @@ #include "util_optimization.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluate */ - -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - -CCL_NAMESPACE_END -#else +# include "kernel.h" +# define KERNEL_ARCH cpu_avx +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_avx(void); void __dummy_function_cycles_avx(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index b3192369794..ce4a0441f6e 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -27,62 +27,17 @@ #define __KERNEL_AVX__ #define __KERNEL_AVX2__ #endif - -#include "util_optimization.h" - -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} -/* Shader Evaluate */ - -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} +#include "util_optimization.h" -CCL_NAMESPACE_END -#else +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# include "kernel.h" +# define KERNEL_ARCH cpu_avx2 +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_avx2(void); void __dummy_function_cycles_avx2(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h new file mode 100644 index 00000000000..2560c6d8dee --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Templated common declaration part of all CPU kernels. */ + +void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, + float *buffer, + unsigned int *rng_state, + int sample, + int x, int y, + int offset, + int stride); + +void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, + uchar4 *rgba, + float *buffer, + float sample_scale, + int x, int y, + int offset, int stride); + +void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, + uchar4 *rgba, + float *buffer, + float sample_scale, + int x, int y, + int offset, + int stride); + +void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, + uint4 *input, + float4 *output, + int type, + int i, + int offset, + int sample); + +#undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h new file mode 100644 index 00000000000..693285ec3a8 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -0,0 +1,126 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Templated common implementation part of all CPU kernels. + * + * The idea is that particular .cpp files sets needed optimization flags and + * simply includes this file without worry of copying actual implementation over. + */ + +#include "kernel_compat_cpu.h" +#include "kernel_math.h" +#include "kernel_types.h" +#include "kernel_globals.h" +#include "kernel_film.h" +#include "kernel_path.h" +#include "kernel_path_branched.h" +#include "kernel_bake.h" + +CCL_NAMESPACE_BEGIN + +/* Path Tracing */ + +void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, + float *buffer, + unsigned int *rng_state, + int sample, + int x, int y, + int offset, + int stride) +{ +#ifdef __BRANCHED_PATH__ + if(kernel_data.integrator.branched) { + kernel_branched_path_trace(kg, + buffer, + rng_state, + sample, + x, y, + offset, + stride); + } + else +#endif + { + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + } +} + +/* Film */ + +void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, + uchar4 *rgba, + float *buffer, + float sample_scale, + int x, int y, + int offset, + int stride) +{ + kernel_film_convert_to_byte(kg, + rgba, + buffer, + sample_scale, + x, y, + offset, + stride); +} + +void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, + uchar4 *rgba, + float *buffer, + float sample_scale, + int x, int y, + int offset, + int stride) +{ + kernel_film_convert_to_half_float(kg, + rgba, + buffer, + sample_scale, + x, y, + offset, + stride); +} + +/* Shader Evaluate */ + +void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, + uint4 *input, + float4 *output, + int type, + int i, + int offset, + int sample) +{ + if(type >= SHADER_EVAL_BAKE) { + kernel_bake_evaluate(kg, + input, + output, + (ShaderEvalType)type, + i, + offset, + sample); + } + else { + kernel_shader_evaluate(kg, + input, + output, + (ShaderEvalType)type, + i, + sample); + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index f9c5134e442..3142f1d7e8b 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -26,59 +26,13 @@ #include "util_optimization.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluate */ - -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - -CCL_NAMESPACE_END - -#else +# include "kernel.h" +# define KERNEL_ARCH cpu_sse2 +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse2(void); void __dummy_function_cycles_sse2(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index 2dbe4b81821..93ee7d1a8ef 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -28,58 +28,13 @@ #include "util_optimization.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluate */ - -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - -CCL_NAMESPACE_END -#else +# include "kernel.h" +# define KERNEL_ARCH cpu_sse3 +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse3(void); void __dummy_function_cycles_sse3(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index 5c57ad01181..c3ace9e8c07 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -29,58 +29,13 @@ #include "util_optimization.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluate */ - -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - -CCL_NAMESPACE_END -#else +# include "kernel.h" +# define KERNEL_ARCH cpu_sse41 +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse41(void); void __dummy_function_cycles_sse41(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ |