Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt3
-rw-r--r--intern/cycles/kernel/kernel.h74
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp65
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp55
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp59
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h50
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h126
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp56
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp55
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp55
10 files changed, 239 insertions, 359 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 5a7d2450b46..20f2878da87 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -63,6 +63,9 @@ set(SRC_HEADERS
kernel_types.h
kernel_volume.h
kernel_work_stealing.h
+
+ kernels/cpu/kernel_cpu.h
+ kernels/cpu/kernel_cpu_impl.h
)
set(SRC_CLOSURE_HEADERS
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index b2596d10ee7..9279a94c13a 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -23,6 +23,10 @@
CCL_NAMESPACE_BEGIN
+#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
+#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
+#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
+
struct KernelGlobals;
KernelGlobals *kernel_globals_create();
@@ -41,69 +45,33 @@ void kernel_tex_copy(KernelGlobals *kg,
InterpolationType interpolation=INTERPOLATION_LINEAR,
ExtensionType extension = EXTENSION_REPEAT);
-void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
+#define KERNEL_ARCH cpu
+#include "kernels/cpu/kernel_cpu.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
-#endif
+# define KERNEL_ARCH cpu_sse2
+# include "kernels/cpu/kernel_cpu.h"
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
-#endif
+# define KERNEL_ARCH cpu_sse3
+# include "kernels/cpu/kernel_cpu.h"
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
-#endif
+# define KERNEL_ARCH cpu_sse41
+# include "kernels/cpu/kernel_cpu.h"
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
-#endif
+# define KERNEL_ARCH cpu_avx
+# include "kernels/cpu/kernel_cpu.h"
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
- int sample, int x, int y, int offset, int stride);
-void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
- float sample_scale, int x, int y, int offset, int stride);
-void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
- int type, int i, int offset, int sample);
-#endif
+# define KERNEL_ARCH cpu_avx2
+# include "kernels/cpu/kernel_cpu.h"
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index 2c8d3503c1a..5c6dc31b949 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -16,15 +16,19 @@
/* CPU kernel entry points */
-#include "kernel_compat_cpu.h"
+/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
+#if defined(__x86_64__) || defined(_M_X64)
+#define __KERNEL_SSE2__
+#endif
+
+/* quiet unused define warnings */
+#if defined(__KERNEL_SSE2__)
+ /* do nothing */
+#endif
+
#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
+#define KERNEL_ARCH cpu
+#include "kernel_cpu_impl.h"
CCL_NAMESPACE_BEGIN
@@ -94,49 +98,4 @@ void kernel_tex_copy(KernelGlobals *kg,
assert(0);
}
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
-#if defined(__x86_64__) || defined(_M_X64)
-#define __KERNEL_SSE2__
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
- /* do nothing */
-#endif
-
-/* Path Tracing */
-
-void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-/* Shader Evaluation */
-
-void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
-
CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index df77bedc729..bc754f6832c 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -30,58 +30,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-
-#include "kernel_compat_cpu.h"
-#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Path Tracing */
-
-void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-/* Shader Evaluate */
-
-void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
-
-CCL_NAMESPACE_END
-#else
+# include "kernel.h"
+# define KERNEL_ARCH cpu_avx
+# include "kernel_cpu_impl.h"
+#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_avx(void);
void __dummy_function_cycles_avx(void) {}
-#endif
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index b3192369794..ce4a0441f6e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -27,62 +27,17 @@
#define __KERNEL_AVX__
#define __KERNEL_AVX2__
#endif
-
-#include "util_optimization.h"
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-
-#include "kernel_compat_cpu.h"
-#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Path Tracing */
-
-void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-/* Shader Evaluate */
-
-void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
+#include "util_optimization.h"
-CCL_NAMESPACE_END
-#else
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+# include "kernel.h"
+# define KERNEL_ARCH cpu_avx2
+# include "kernel_cpu_impl.h"
+#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_avx2(void);
void __dummy_function_cycles_avx2(void) {}
-#endif
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
new file mode 100644
index 00000000000..2560c6d8dee
--- /dev/null
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Templated common declaration part of all CPU kernels. */
+
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
+ float *buffer,
+ unsigned int *rng_state,
+ int sample,
+ int x, int y,
+ int offset,
+ int stride);
+
+void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
+ uchar4 *rgba,
+ float *buffer,
+ float sample_scale,
+ int x, int y,
+ int offset, int stride);
+
+void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
+ uchar4 *rgba,
+ float *buffer,
+ float sample_scale,
+ int x, int y,
+ int offset,
+ int stride);
+
+void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
+ uint4 *input,
+ float4 *output,
+ int type,
+ int i,
+ int offset,
+ int sample);
+
+#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
new file mode 100644
index 00000000000..693285ec3a8
--- /dev/null
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Templated common implementation part of all CPU kernels.
+ *
+ * The idea is that particular .cpp files sets needed optimization flags and
+ * simply includes this file without worry of copying actual implementation over.
+ */
+
+#include "kernel_compat_cpu.h"
+#include "kernel_math.h"
+#include "kernel_types.h"
+#include "kernel_globals.h"
+#include "kernel_film.h"
+#include "kernel_path.h"
+#include "kernel_path_branched.h"
+#include "kernel_bake.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Path Tracing */
+
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
+ float *buffer,
+ unsigned int *rng_state,
+ int sample,
+ int x, int y,
+ int offset,
+ int stride)
+{
+#ifdef __BRANCHED_PATH__
+ if(kernel_data.integrator.branched) {
+ kernel_branched_path_trace(kg,
+ buffer,
+ rng_state,
+ sample,
+ x, y,
+ offset,
+ stride);
+ }
+ else
+#endif
+ {
+ kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ }
+}
+
+/* Film */
+
+void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
+ uchar4 *rgba,
+ float *buffer,
+ float sample_scale,
+ int x, int y,
+ int offset,
+ int stride)
+{
+ kernel_film_convert_to_byte(kg,
+ rgba,
+ buffer,
+ sample_scale,
+ x, y,
+ offset,
+ stride);
+}
+
+void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
+ uchar4 *rgba,
+ float *buffer,
+ float sample_scale,
+ int x, int y,
+ int offset,
+ int stride)
+{
+ kernel_film_convert_to_half_float(kg,
+ rgba,
+ buffer,
+ sample_scale,
+ x, y,
+ offset,
+ stride);
+}
+
+/* Shader Evaluate */
+
+void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
+ uint4 *input,
+ float4 *output,
+ int type,
+ int i,
+ int offset,
+ int sample)
+{
+ if(type >= SHADER_EVAL_BAKE) {
+ kernel_bake_evaluate(kg,
+ input,
+ output,
+ (ShaderEvalType)type,
+ i,
+ offset,
+ sample);
+ }
+ else {
+ kernel_shader_evaluate(kg,
+ input,
+ output,
+ (ShaderEvalType)type,
+ i,
+ sample);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
index f9c5134e442..3142f1d7e8b 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
@@ -26,59 +26,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-
-#include "kernel_compat_cpu.h"
-#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Path Tracing */
-
-void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-/* Shader Evaluate */
-
-void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
-
-CCL_NAMESPACE_END
-
-#else
+# include "kernel.h"
+# define KERNEL_ARCH cpu_sse2
+# include "kernel_cpu_impl.h"
+#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse2(void);
void __dummy_function_cycles_sse2(void) {}
-#endif
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
index 2dbe4b81821..93ee7d1a8ef 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
@@ -28,58 +28,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-
-#include "kernel_compat_cpu.h"
-#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Path Tracing */
-
-void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-/* Shader Evaluate */
-
-void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
-
-CCL_NAMESPACE_END
-#else
+# include "kernel.h"
+# define KERNEL_ARCH cpu_sse3
+# include "kernel_cpu_impl.h"
+#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse3(void);
void __dummy_function_cycles_sse3(void) {}
-#endif
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
index 5c57ad01181..c3ace9e8c07 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
@@ -29,58 +29,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-
-#include "kernel_compat_cpu.h"
-#include "kernel.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_film.h"
-#include "kernel_path.h"
-#include "kernel_path_branched.h"
-#include "kernel_bake.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Path Tracing */
-
-void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
-{
-#ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched)
- kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
- else
-#endif
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
-}
-
-/* Film */
-
-void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
-{
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-/* Shader Evaluate */
-
-void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
-{
- if(type >= SHADER_EVAL_BAKE)
- kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
- else
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
-}
-
-CCL_NAMESPACE_END
-#else
+# include "kernel.h"
+# define KERNEL_ARCH cpu_sse41
+# include "kernel_cpu_impl.h"
+#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse41(void);
void __dummy_function_cycles_sse41(void) {}
-#endif
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */