diff options
author | Lukas Stockner <lukas.stockner@freenet.de> | 2017-05-07 15:40:58 +0300 |
---|---|---|
committer | Lukas Stockner <lukas.stockner@freenet.de> | 2017-05-07 15:40:58 +0300 |
commit | 43b374e8c5430488a302298b1026faa1c3a231e9 (patch) | |
tree | 42e619a9fa08d02cef515b6315ce34dd7fd062b2 /intern/cycles/kernel/kernels/cpu | |
parent | bca697834728fd12c84941aa2a428abfe2090b27 (diff) |
Cycles: Implement denoising option for reducing noise in the rendered image
This commit contains the first part of the new Cycles denoising option,
which filters the resulting image using information gathered during rendering
to get rid of noise while preserving visual features as well as possible.
To use the option, enable it in the render layer options. The default settings
fit a wide range of scenes, but the user can tweak individual settings to
control the tradeoff between a noise-free image, image details, and calculation
time.
Note that the denoiser may still change in the future and that some features
are not implemented yet. The most important missing feature is animation
denoising, which uses information from multiple frames at once to produce a
flicker-free and smoother result. These features will be added in the future.
Finally, thanks to all the people who supported this project:
- Google (through the GSoC) and Theory Studios for sponsoring the development
- The authors of the papers I used for implementing the denoiser (more details
on them will be included in the technical docs)
- The other Cycles devs for feedback on the code, especially Sergey for
mentoring the GSoC project and Brecht for the code review!
- And of course the users who helped with testing, reported bugs and things
that could and/or should work better!
Diffstat (limited to 'intern/cycles/kernel/kernels/cpu')
20 files changed, 818 insertions, 164 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp new file mode 100644 index 00000000000..2ff1a392dc3 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter.cpp @@ -0,0 +1,61 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CPU kernel entry points */ + +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this + * one with SSE2 intrinsics. + */ +#if defined(__x86_64__) || defined(_M_X64) +# define __KERNEL_SSE2__ +#endif + +/* When building kernel for native machine detect kernel features from the flags + * set by compiler. + */ +#ifdef WITH_KERNEL_NATIVE +# ifdef __SSE2__ +# ifndef __KERNEL_SSE2__ +# define __KERNEL_SSE2__ +# endif +# endif +# ifdef __SSE3__ +# define __KERNEL_SSE3__ +# endif +# ifdef __SSSE3__ +# define __KERNEL_SSSE3__ +# endif +# ifdef __SSE4_1__ +# define __KERNEL_SSE41__ +# endif +# ifdef __AVX__ +# define __KERNEL_SSE__ +# define __KERNEL_AVX__ +# endif +# ifdef __AVX2__ +# define __KERNEL_SSE__ +# define __KERNEL_AVX2__ +# endif +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp new file mode 100644 index 00000000000..4a9e6047ecf --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +#include "util/util_optimization.h" + +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu_avx +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp new file mode 100644 index 00000000000..c22ec576254 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with AVX2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +#include "util/util_optimization.h" + +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +# endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu_avx2 +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h new file mode 100644 index 00000000000..10007ee2635 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -0,0 +1,132 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Templated common declaration part of all CPU kernels. */ + +void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, + TilesInfo *tiles, + int x, + int y, + float *unfilteredA, + float *unfilteredB, + float *sampleV, + float *sampleVV, + float *bufferV, + int* prefilter_rect, + int buffer_pass_stride, + int buffer_denoising_offset, + bool use_split_variance); + +void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, + TilesInfo *tiles, + int m_offset, + int v_offset, + int x, + int y, + float *mean, + float *variance, + int* prefilter_rect, + int buffer_pass_stride, + int buffer_denoising_offset, + bool use_split_variance); + +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, + float *mean, + float *variance, + float *a, + float *b, + int* prefilter_rect, + int r); + +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, + int x, + int y, + int storage_ofs, + float *transform, + int *rank, + int* rect, + int pass_stride, + int radius, + float pca_threshold); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, + int dy, + float *weightImage, + float *variance, + float *differenceImage, + int* rect, + int w, + int channel_offset, + float a, + float k_2); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *differenceImage, + float *outImage, + int* rect, + int w, + int f); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *differenceImage, + float *outImage, + int* rect, + int w, + int f); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, + int dy, + float *differenceImage, + float *image, + float *outImage, + float *accumImage, + int* rect, + int w, + int f); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, + int dy, + float *differenceImage, + float *buffer, + float *color_pass, + float *variance_pass, + float *transform, + int *rank, + float *XtWX, + float3 *XtWY, + int *rect, + int *filter_rect, + int w, + int h, + int f, + int pass_stride); + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *outImage, + float *accumImage, + int* rect, + int w); + +void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, + int y, + int storage_ofs, + int w, + int h, + float *buffer, + int *rank, + float *XtWX, + float3 *XtWY, + int *buffer_params, + int sample); + +#undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h new file mode 100644 index 00000000000..3b71e50ca3b --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -0,0 +1,259 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Templated common implementation part of all CPU kernels. + * + * The idea is that particular .cpp files sets needed optimization flags and + * simply includes this file without worry of copying actual implementation over. + */ + +#include "kernel/kernel_compat_cpu.h" + +#include "kernel/filter/filter_kernel.h" + +#ifdef KERNEL_STUB +# include "util/util_debug.h" +# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +#endif + +CCL_NAMESPACE_BEGIN + + +/* Denoise filter */ + +void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, + TilesInfo *tiles, + int x, + int y, + float *unfilteredA, + float *unfilteredB, + float *sampleVariance, + float *sampleVarianceV, + float *bufferVariance, + int* prefilter_rect, + int buffer_pass_stride, + int buffer_denoising_offset, + bool use_split_variance) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); +#else + kernel_filter_divide_shadow(sample, tiles, + x, y, + unfilteredA, + unfilteredB, + sampleVariance, + sampleVarianceV, + bufferVariance, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset, + use_split_variance); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, + TilesInfo *tiles, + int m_offset, + int v_offset, + int x, + int y, + float *mean, float *variance, + int* prefilter_rect, + int buffer_pass_stride, + int buffer_denoising_offset, + bool use_split_variance) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_get_feature); +#else + kernel_filter_get_feature(sample, tiles, + m_offset, v_offset, + x, y, + mean, variance, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset, + use_split_variance); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, + float *mean, + float *variance, + float *a, + float *b, + int* prefilter_rect, + int r) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); +#else + kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, + int x, + int y, + int storage_ofs, + float *transform, + int *rank, + int* prefilter_rect, + int pass_stride, + int radius, + float pca_threshold) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); +#else + rank += storage_ofs; + transform += storage_ofs*TRANSFORM_SIZE; + kernel_filter_construct_transform(buffer, + x, y, + load_int4(prefilter_rect), + pass_stride, + transform, + rank, + radius, + pca_threshold); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, + int dy, + float *weightImage, + float *variance, + float *differenceImage, + int *rect, + int w, + int channel_offset, + float a, + float k_2) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); +#else + kernel_filter_nlm_calc_difference(dx, dy, weightImage, variance, differenceImage, load_int4(rect), w, channel_offset, a, k_2); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *differenceImage, + float *outImage, + int *rect, + int w, + int f) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); +#else + kernel_filter_nlm_blur(differenceImage, outImage, load_int4(rect), w, f); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *differenceImage, + float *outImage, + int *rect, + int w, + int f) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); +#else + kernel_filter_nlm_calc_weight(differenceImage, outImage, load_int4(rect), w, f); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, + int dy, + float *differenceImage, + float *image, + float *outImage, + float *accumImage, + int *rect, + int w, + int f) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); +#else + kernel_filter_nlm_update_output(dx, dy, differenceImage, image, outImage, accumImage, load_int4(rect), w, f); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, + int dy, + float *differenceImage, + float *buffer, + float *color_pass, + float *variance_pass, + float *transform, + int *rank, + float *XtWX, + float3 *XtWY, + int *rect, + int *filter_rect, + int w, + int h, + int f, + int pass_stride) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); +#else + kernel_filter_nlm_construct_gramian(dx, dy, differenceImage, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *outImage, + float *accumImage, + int *rect, + int w) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); +#else + kernel_filter_nlm_normalize(outImage, accumImage, load_int4(rect), w); +#endif +} + +void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, + int y, + int storage_ofs, + int w, + int h, + float *buffer, + int *rank, + float *XtWX, + float3 *XtWY, + int *buffer_params, + int sample) +{ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, filter_finalize); +#else + XtWX += storage_ofs*XTWX_SIZE; + XtWY += storage_ofs*XTWY_SIZE; + rank += storage_ofs; + kernel_filter_finalize(x, y, w, h, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); +#endif +} + +#undef KERNEL_STUB +#undef STUB_ASSERT +#undef KERNEL_ARCH + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp new file mode 100644 index 00000000000..f7c9935f1d0 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE2 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +#include "util/util_optimization.h" + +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu_sse2 +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp new file mode 100644 index 00000000000..070b95a3505 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +#include "util/util_optimization.h" + +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu_sse3 +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp new file mode 100644 index 00000000000..1a7b2040da1 --- /dev/null +++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 + * optimization flags and nearly all functions inlined, while kernel.cpp + * is compiled without for other CPU's. */ + +#include "util/util_optimization.h" + +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ + +#include "kernel/filter/filter.h" +#define KERNEL_ARCH cpu_sse41 +#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index 2600d977972..a645fb4d8dd 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -17,21 +17,23 @@ /* Optimized CPU kernel entry points. This file is compiled with AVX * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -#endif #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_avx -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_avx +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index dba15d037ac..6bbb87727b9 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -18,21 +18,23 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# define __KERNEL_AVX2__ -#endif - #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_avx2 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_avx2 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 39c9a9cf33c..9895080d328 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -89,6 +89,4 @@ DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) -void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)); - #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 8c05dd1d9ef..b9d82781840 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -57,6 +57,11 @@ # include "kernel/split/kernel_buffer_update.h" #endif +#ifdef KERNEL_STUB +# include "util/util_debug.h" +# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +#endif + CCL_NAMESPACE_BEGIN #ifndef __SPLIT_KERNEL__ @@ -71,7 +76,10 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, int offset, int stride) { -#ifdef __BRANCHED_PATH__ +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, path_trace); +#else +# ifdef __BRANCHED_PATH__ if(kernel_data.integrator.branched) { kernel_branched_path_trace(kg, buffer, @@ -82,10 +90,11 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, stride); } else -#endif +# endif { kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } +#endif /* KERNEL_STUB */ } /* Film */ @@ -98,6 +107,9 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, int offset, int stride) { +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_byte); +#else kernel_film_convert_to_byte(kg, rgba, buffer, @@ -105,6 +117,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, x, y, offset, stride); +#endif /* KERNEL_STUB */ } void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, @@ -115,6 +128,9 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, int offset, int stride) { +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); +#else kernel_film_convert_to_half_float(kg, rgba, buffer, @@ -122,6 +138,7 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, x, y, offset, stride); +#endif /* KERNEL_STUB */ } /* Shader Evaluate */ @@ -136,9 +153,12 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample) { +#ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, shader); +#else if(type >= SHADER_EVAL_BAKE) { kernel_assert(output_luma == NULL); -#ifdef __BAKING__ +# ifdef __BAKING__ kernel_bake_evaluate(kg, input, output, @@ -147,7 +167,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, i, offset, sample); -#endif +# endif } else { kernel_shader_evaluate(kg, @@ -158,17 +178,26 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, i, sample); } +#endif /* KERNEL_STUB */ } #else /* __SPLIT_KERNEL__ */ /* Split Kernel Path Tracing */ -#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ +#ifdef KERNEL_STUB +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ + { \ + STUB_ASSERT(KERNEL_ARCH, name); \ + } +#else +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ { \ kernel_##name(kg); \ } +#endif /* KERNEL_STUB */ #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ @@ -194,42 +223,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) - -void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) -{ -#define REGISTER_NAME_STRING(name) #name -#define REGISTER_EVAL_NAME(name) REGISTER_NAME_STRING(name) -#define REGISTER(name) reg(REGISTER_EVAL_NAME(KERNEL_FUNCTION_FULL_NAME(name)), (void*)KERNEL_FUNCTION_FULL_NAME(name)); - - REGISTER(path_trace); - REGISTER(convert_to_byte); - REGISTER(convert_to_half_float); - REGISTER(shader); - - REGISTER(data_init); - REGISTER(path_init); - REGISTER(scene_intersect); - REGISTER(lamp_emission); - REGISTER(do_volume); - REGISTER(queue_enqueue); - REGISTER(indirect_background); - REGISTER(shader_setup); - REGISTER(shader_sort); - REGISTER(shader_eval); - REGISTER(holdout_emission_blurring_pathtermination_ao); - REGISTER(subsurface_scatter); - REGISTER(direct_lighting); - REGISTER(shadow_blocked_ao); - REGISTER(shadow_blocked_dl); - REGISTER(next_iteration_setup); - REGISTER(indirect_subsurface); - REGISTER(buffer_update); - -#undef REGISTER -#undef REGISTER_EVAL_NAME -#undef REGISTER_NAME_STRING -} - #endif /* __SPLIT_KERNEL__ */ +#undef KERNEL_STUB +#undef STUB_ASSERT +#undef KERNEL_ARCH + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp index 27a746a0799..6ba3425a343 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -17,22 +17,25 @@ /* Optimized CPU kernel entry points. This file is compiled with AVX * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -#endif #define __SPLIT_KERNEL__ #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_avx -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_avx +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp index 364d279a189..76b2d77ebb8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -18,23 +18,25 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# define __KERNEL_AVX2__ -#endif - #define __SPLIT_KERNEL__ #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_avx2 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# define __KERNEL_AVX__ +# define __KERNEL_AVX2__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_avx2 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp index 0afb481296f..b468b6f44c8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -18,17 +18,19 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -#endif - #define __SPLIT_KERNEL__ #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse2 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse2 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp index 13d00813591..3e5792d0b17 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -18,19 +18,21 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -#endif - #define __SPLIT_KERNEL__ #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse3 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse3 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp index a4312071edc..3629f21cd29 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -18,20 +18,22 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -#endif - #define __SPLIT_KERNEL__ #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse41 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse41 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index 1acfaa91ac9..57530c88710 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -18,15 +18,17 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -#endif - #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse2 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse2 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index f7b6a2e21fe..c607753bc4b 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -18,17 +18,19 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -#endif - #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse3 -# include "kernel/kernels/cpu/kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse3 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index 1900c6e3012..a278554731c 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -18,18 +18,20 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -/* SSE optimization disabled for now on 32 bit, see bug #36316 */ -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -#endif - #include "util/util_optimization.h" -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# include "kernel/kernel.h" -# define KERNEL_ARCH cpu_sse41 -# include "kernel/kernels/cpu//kernel_cpu_impl.h" +#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# define KERNEL_STUB +#else +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ +# endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ + +#include "kernel/kernel.h" +#define KERNEL_ARCH cpu_sse41 +#include "kernel/kernels/cpu/kernel_cpu_impl.h" |