Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-09-20 18:59:20 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-09-21 15:55:54 +0300
commit08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernels/cpu
parentfa6b1007bad065440950cd67deb16a04f368856f (diff)
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
Diffstat (limited to 'intern/cycles/kernel/kernels/cpu')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter.cpp61
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx2.cpp40
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h143
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h331
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse2.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse3.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse41.cpp38
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp94
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp40
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h100
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h660
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h232
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split.cpp62
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp41
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp42
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp38
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp37
23 files changed, 0 insertions, 2252 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp
deleted file mode 100644
index 145a6b6ac40..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
deleted file mode 100644
index 012daba62d8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
deleted file mode 100644
index 16351a7f949..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
deleted file mode 100644
index 1423b182ab8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common declaration part of all CPU kernels. */
-
-void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
- TileInfo *tile_info,
- int x,
- int y,
- float *unfilteredA,
- float *unfilteredB,
- float *sampleV,
- float *sampleVV,
- float *bufferV,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
- TileInfo *tile_info,
- int m_offset,
- int v_offset,
- int x,
- int y,
- float *mean,
- float *variance,
- float scale,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
- int x,
- int y,
- int *buffer_params,
- float *from,
- float *buffer,
- int out_offset,
- int *prefilter_rect);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
- int y,
- ccl_global float *image,
- ccl_global float *variance,
- ccl_global float *depth,
- ccl_global float *output,
- int *rect,
- int pass_stride);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
- int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
- TileInfo *tiles,
- int x,
- int y,
- int storage_ofs,
- float *transform,
- int *rank,
- int *rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- int radius,
- float pca_threshold);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
- int dy,
- float *weight_image,
- float *variance_image,
- float *scale_image,
- float *difference_image,
- int *rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
- float *difference_image, float *out_image, int *rect, int stride, int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
- float *difference_image, float *out_image, int *rect, int stride, int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
- int dy,
- float *difference_image,
- float *image,
- float *temp_image,
- float *out_image,
- float *accum_image,
- int *rect,
- int channel_offset,
- int stride,
- int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
- int dy,
- int t,
- float *difference_image,
- float *buffer,
- float *transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *rect,
- int *filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
- float *accum_image,
- int *rect,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
- int y,
- int storage_ofs,
- float *buffer,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *buffer_params,
- int sample);
-
-#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
deleted file mode 100644
index 3d4cb87e104..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common implementation part of all CPU kernels.
- *
- * The idea is that particular .cpp files sets needed optimization flags and
- * simply includes this file without worry of copying actual implementation over.
- */
-
-#include "kernel/kernel_compat_cpu.h"
-
-#include "kernel/filter/filter_kernel.h"
-
-#ifdef KERNEL_STUB
-# define STUB_ASSERT(arch, name) \
- assert(!(#name " kernel stub for architecture " #arch " was called!"))
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Denoise filter */
-
-void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
- TileInfo *tile_info,
- int x,
- int y,
- float *unfilteredA,
- float *unfilteredB,
- float *sampleVariance,
- float *sampleVarianceV,
- float *bufferVariance,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
-#else
- kernel_filter_divide_shadow(sample,
- tile_info,
- x,
- y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
- TileInfo *tile_info,
- int m_offset,
- int v_offset,
- int x,
- int y,
- float *mean,
- float *variance,
- float scale,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
-#else
- kernel_filter_get_feature(sample,
- tile_info,
- m_offset,
- v_offset,
- x,
- y,
- mean,
- variance,
- scale,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
- int x,
- int y,
- int *buffer_params,
- float *from,
- float *buffer,
- int out_offset,
- int *prefilter_rect)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
-#else
- kernel_filter_write_feature(
- sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
- int y,
- ccl_global float *image,
- ccl_global float *variance,
- ccl_global float *depth,
- ccl_global float *output,
- int *rect,
- int pass_stride)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
-#else
- kernel_filter_detect_outliers(
- x, y, image, variance, depth, output, load_int4(rect), pass_stride);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
- int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
-#else
- kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
- TileInfo *tile_info,
- int x,
- int y,
- int storage_ofs,
- float *transform,
- int *rank,
- int *prefilter_rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- int radius,
- float pca_threshold)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
-#else
- rank += storage_ofs;
- transform += storage_ofs * TRANSFORM_SIZE;
- kernel_filter_construct_transform(buffer,
- tile_info,
- x,
- y,
- load_int4(prefilter_rect),
- pass_stride,
- frame_stride,
- use_time,
- transform,
- rank,
- radius,
- pca_threshold);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
- int dy,
- float *weight_image,
- float *variance_image,
- float *scale_image,
- float *difference_image,
- int *rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
-#else
- kernel_filter_nlm_calc_difference(dx,
- dy,
- weight_image,
- variance_image,
- scale_image,
- difference_image,
- load_int4(rect),
- stride,
- channel_offset,
- frame_offset,
- a,
- k_2);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
- float *difference_image, float *out_image, int *rect, int stride, int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
-#else
- kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
- float *difference_image, float *out_image, int *rect, int stride, int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
-#else
- kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
- int dy,
- float *difference_image,
- float *image,
- float *temp_image,
- float *out_image,
- float *accum_image,
- int *rect,
- int channel_offset,
- int stride,
- int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
-#else
- kernel_filter_nlm_update_output(dx,
- dy,
- difference_image,
- image,
- temp_image,
- out_image,
- accum_image,
- load_int4(rect),
- channel_offset,
- stride,
- f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
- int dy,
- int t,
- float *difference_image,
- float *buffer,
- float *transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *rect,
- int *filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
-#else
- kernel_filter_nlm_construct_gramian(dx,
- dy,
- t,
- difference_image,
- buffer,
- transform,
- rank,
- XtWX,
- XtWY,
- load_int4(rect),
- load_int4(filter_window),
- stride,
- f,
- pass_stride,
- frame_offset,
- use_time);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
- float *accum_image,
- int *rect,
- int stride)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
-#else
- kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
- int y,
- int storage_ofs,
- float *buffer,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *buffer_params,
- int sample)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_finalize);
-#else
- XtWX += storage_ofs * XTWX_SIZE;
- XtWY += storage_ofs * XTWY_SIZE;
- rank += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
-#endif
-}
-
-#undef KERNEL_STUB
-#undef STUB_ASSERT
-#undef KERNEL_ARCH
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
deleted file mode 100644
index 75833d83648..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
deleted file mode 100644
index c998cd54d3a..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
deleted file mode 100644
index fc4ef1fca5b..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
deleted file mode 100644
index 8040bfb7b33..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Memory Copy */
-
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
-{
- if (strcmp(name, "__data") == 0) {
- kg->__data = *(KernelData *)host;
- }
- else {
- assert(0);
- }
-}
-
-void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
-{
- if (0) {
- }
-
-#define KERNEL_TEX(type, tname) \
- else if (strcmp(name, #tname) == 0) \
- { \
- kg->tname.data = (type *)mem; \
- kg->tname.width = size; \
- }
-#include "kernel/kernel_textures.h"
- else {
- assert(0);
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
deleted file mode 100644
index 5f6b6800363..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
deleted file mode 100644
index 97e8fc25140..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
deleted file mode 100644
index ea3103f12c3..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common declaration part of all CPU kernels. */
-
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
- uint4 *input,
- float4 *output,
- int type,
- int filter,
- int i,
- int offset,
- int sample);
-
-void KERNEL_FUNCTION_FULL_NAME(bake)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
-
-/* Split kernels */
-
-void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- int start_sample,
- int end_sample,
- int sx,
- int sy,
- int sw,
- int sh,
- int offset,
- int stride,
- ccl_global int *Queue_index,
- int queuesize,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pool_wgs,
- unsigned int num_samples,
- ccl_global float *buffer);
-
-#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data);
-
-DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
-DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
-DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
-DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
-DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
-DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
-DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
-
-#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
deleted file mode 100644
index 59b96c86c50..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __KERNEL_CPU_IMAGE_H__
-#define __KERNEL_CPU_IMAGE_H__
-
-#ifdef WITH_NANOVDB
-# define NANOVDB_USE_INTRINSICS
-# include <nanovdb/NanoVDB.h>
-# include <nanovdb/util/SampleFromVoxels.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Make template functions private so symbols don't conflict between kernels with different
- * instruction sets. */
-namespace {
-
-#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
- u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
- u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } \
- (void)0
-
-ccl_device_inline float frac(float x, int *ix)
-{
- int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
- *ix = i;
- return x - (float)i;
-}
-
-template<typename T> struct TextureInterpolator {
-
- static ccl_always_inline float4 read(float4 r)
- {
- return r;
- }
-
- static ccl_always_inline float4 read(uchar4 r)
- {
- float f = 1.0f / 255.0f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
-
- static ccl_always_inline float4 read(uchar r)
- {
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(float r)
- {
- /* TODO(dingto): Optimize this, so interpolation
- * happens on float instead of float4 */
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(half4 r)
- {
- return half4_to_float4(r);
- }
-
- static ccl_always_inline float4 read(half r)
- {
- float f = half_to_float(r);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(uint16_t r)
- {
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(ushort4 r)
- {
- float f = 1.0f / 65535.0f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
-
- static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
- {
- if (x < 0 || y < 0 || x >= width || y >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[y * width + x]);
- }
-
- static ccl_always_inline int wrap_periodic(int x, int width)
- {
- x %= width;
- if (x < 0)
- x += width;
- return x;
- }
-
- static ccl_always_inline int wrap_clamp(int x, int width)
- {
- return clamp(x, 0, width - 1);
- }
-
- /* ******** 2D interpolation ******** */
-
- static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy;
- frac(x * (float)width, &ix);
- frac(y * (float)height, &iy);
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[ix + iy * width]);
- }
-
- static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- break;
- case EXTENSION_CLIP:
- nix = ix + 1;
- niy = iy + 1;
- break;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
- (1.0f - ty) * tx * read(data, nix, iy, width, height) +
- ty * (1.0f - tx) * read(data, ix, niy, width, height) +
- ty * tx * read(data, nix, niy, width, height);
- }
-
- static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- int pix, piy, nnix, nniy;
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- nnix = wrap_periodic(ix + 2, width);
- nniy = wrap_periodic(iy + 2, height);
- break;
- case EXTENSION_CLIP:
- pix = ix - 1;
- piy = iy - 1;
- nix = ix + 1;
- niy = iy + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- break;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- float u[4], v[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-#define DATA(x, y) (read(data, xc[x], yc[y], width, height))
-#define TERM(col) \
- (v[col] * \
- (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
- /* Actual interpolation. */
- return TERM(0) + TERM(1) + TERM(2) + TERM(3);
-#undef TERM
-#undef DATA
- }
-
- static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
- {
- if (UNLIKELY(!info.data)) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- switch (info.interpolation) {
- case INTERPOLATION_CLOSEST:
- return interp_closest(info, x, y);
- case INTERPOLATION_LINEAR:
- return interp_linear(info, x, y);
- default:
- return interp_cubic(info, x, y);
- }
- }
-
- /* ******** 3D interpolation ******** */
-
- static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
- float x,
- float y,
- float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
-
- frac(x * (float)width, &ix);
- frac(y * (float)height, &iy);
- frac(z * (float)depth, &iz);
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T *)info.data;
- return read(data[ix + iy * width + iz * width * height]);
- }
-
- static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
- float x,
- float y,
- float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
-
- float tx = frac(x * (float)width - 0.5f, &ix);
- float ty = frac(y * (float)height - 0.5f, &iy);
- float tz = frac(z * (float)depth - 0.5f, &iz);
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- niz = wrap_periodic(iz + 1, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T *)info.data;
- float4 r;
-
- r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
- read(data[ix + iy * width + iz * width * height]);
- r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
- r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
- r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
-
- r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
- r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
- r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
- r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
-
- return r;
- }
-
- /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
- * causing stack overflow issue in this function unless it is inlined.
- *
- * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
- * enabled.
- */
-#if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
-#else
- static ccl_never_inline
-#endif
- float4
- interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- const float tz = frac(z * (float)depth - 0.5f, &iz);
- int pix, piy, piz, nnix, nniy, nniz;
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
- piz = wrap_periodic(iz - 1, depth);
-
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- niz = wrap_periodic(iz + 1, depth);
-
- nnix = wrap_periodic(ix + 2, width);
- nniy = wrap_periodic(iy + 2, height);
- nniz = wrap_periodic(iz + 2, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
- piz = wrap_clamp(iz - 1, depth);
-
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
- nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
- nniz = wrap_clamp(iz + 2, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
- const int zc[4] = {
- width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
-#define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
-#define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- /* Actual interpolation. */
- const T *data = (const T *)info.data;
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-#undef COL_TERM
-#undef ROW_TERM
-#undef DATA
- }
-
- static ccl_always_inline float4
- interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
- {
- if (UNLIKELY(!info.data))
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(info, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(info, x, y, z);
- default:
- return interp_3d_cubic(info, x, y, z);
- }
- }
-};
-
-#ifdef WITH_NANOVDB
-template<typename T> struct NanoVDBInterpolator {
-
- typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
-
- static ccl_always_inline float4 read(float r)
- {
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(nanovdb::Vec3f r)
- {
- return make_float4(r[0], r[1], r[2], 1.0f);
- }
-
- static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc,
- float x,
- float y,
- float z)
- {
- const nanovdb::Vec3f xyz(x, y, z);
- return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
- }
-
- static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc,
- float x,
- float y,
- float z)
- {
- const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
- return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
- }
-
-# if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
-# else
- static ccl_never_inline
-# endif
- float4
- interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
- {
- int ix, iy, iz;
- int nix, niy, niz;
- int pix, piy, piz;
- int nnix, nniy, nniz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x - 0.5f, &ix);
- const float ty = frac(y - 0.5f, &iy);
- const float tz = frac(z - 0.5f, &iz);
- pix = ix - 1;
- piy = iy - 1;
- piz = iz - 1;
- nix = ix + 1;
- niy = iy + 1;
- niz = iz + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- nniz = iz + 2;
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- const int zc[4] = {piz, iz, niz, nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
-# define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
-# define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- /* Actual interpolation. */
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-# undef COL_TERM
-# undef ROW_TERM
-# undef DATA
- }
-
- static ccl_always_inline float4
- interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
- {
- using namespace nanovdb;
-
- NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
- AccessorType acc = grid->getAccessor();
-
- switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(acc, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(acc, x, y, z);
- default:
- return interp_3d_cubic(acc, x, y, z);
- }
- }
-};
-#endif
-
-#undef SET_CUBIC_SPLINE_WEIGHTS
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- switch (info.data_type) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp(info, x, y);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp(info, x, y);
- default:
- assert(0);
- return make_float4(
- TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
-}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
- int id,
- float3 P,
- InterpolationType interp)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- if (info.use_transform_3d) {
- P = transform_point(&info.transform_3d, P);
- }
-
- switch (info.data_type) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
-#ifdef WITH_NANOVDB
- case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
- return NanoVDBInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
- return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
-#endif
- default:
- assert(0);
- return make_float4(
- TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
-}
-
-} /* Namespace. */
-
-CCL_NAMESPACE_END
-
-#endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
deleted file mode 100644
index 51d6c23f72f..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common implementation part of all CPU kernels.
- *
- * The idea is that particular .cpp files sets needed optimization flags and
- * simply includes this file without worry of copying actual implementation over.
- */
-
-// clang-format off
-#include "kernel/kernel_compat_cpu.h"
-
-#ifndef KERNEL_STUB
-# ifndef __SPLIT_KERNEL__
-# include "kernel/kernel_math.h"
-# include "kernel/kernel_types.h"
-
-# include "kernel/split/kernel_split_data.h"
-# include "kernel/kernel_globals.h"
-
-# include "kernel/kernel_color.h"
-# include "kernel/kernels/cpu/kernel_cpu_image.h"
-# include "kernel/kernel_film.h"
-# include "kernel/kernel_path.h"
-# include "kernel/kernel_path_branched.h"
-# include "kernel/kernel_bake.h"
-# else
-# include "kernel/split/kernel_split_common.h"
-
-# include "kernel/split/kernel_data_init.h"
-# include "kernel/split/kernel_path_init.h"
-# include "kernel/split/kernel_scene_intersect.h"
-# include "kernel/split/kernel_lamp_emission.h"
-# include "kernel/split/kernel_do_volume.h"
-# include "kernel/split/kernel_queue_enqueue.h"
-# include "kernel/split/kernel_indirect_background.h"
-# include "kernel/split/kernel_shader_setup.h"
-# include "kernel/split/kernel_shader_sort.h"
-# include "kernel/split/kernel_shader_eval.h"
-# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-# include "kernel/split/kernel_subsurface_scatter.h"
-# include "kernel/split/kernel_direct_lighting.h"
-# include "kernel/split/kernel_shadow_blocked_ao.h"
-# include "kernel/split/kernel_shadow_blocked_dl.h"
-# include "kernel/split/kernel_enqueue_inactive.h"
-# include "kernel/split/kernel_next_iteration_setup.h"
-# include "kernel/split/kernel_indirect_subsurface.h"
-# include "kernel/split/kernel_buffer_update.h"
-# include "kernel/split/kernel_adaptive_stopping.h"
-# include "kernel/split/kernel_adaptive_filter_x.h"
-# include "kernel/split/kernel_adaptive_filter_y.h"
-# include "kernel/split/kernel_adaptive_adjust_samples.h"
-# endif /* __SPLIT_KERNEL__ */
-#else
-# define STUB_ASSERT(arch, name) \
- assert(!(#name " kernel stub for architecture " #arch " was called!"))
-
-# ifdef __SPLIT_KERNEL__
-# include "kernel/split/kernel_data_init.h"
-# endif /* __SPLIT_KERNEL__ */
-#endif /* KERNEL_STUB */
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __SPLIT_KERNEL__
-
-/* Path Tracing */
-
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, path_trace);
-# else
-# ifdef __BRANCHED_PATH__
- if (kernel_data.integrator.branched) {
- kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
- else
-# endif
- {
- kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
-# endif /* KERNEL_STUB */
-}
-
-/* Film */
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
-# else
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-# endif /* KERNEL_STUB */
-}
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
-# else
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-# endif /* KERNEL_STUB */
-}
-
-/* Bake */
-
-void KERNEL_FUNCTION_FULL_NAME(bake)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, bake);
-# else
-# ifdef __BAKING__
- kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
-# endif
-# endif /* KERNEL_STUB */
-}
-
-/* Shader Evaluate */
-
-void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
- uint4 *input,
- float4 *output,
- int type,
- int filter,
- int i,
- int offset,
- int sample)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, shader);
-# else
- if (type == SHADER_EVAL_DISPLACE) {
- kernel_displace_evaluate(kg, input, output, i);
- }
- else {
- kernel_background_evaluate(kg, input, output, i);
- }
-# endif /* KERNEL_STUB */
-}
-
-#else /* __SPLIT_KERNEL__ */
-
-/* Split Kernel Path Tracing */
-
-# ifdef KERNEL_STUB
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-# else
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- kernel_##name(kg); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- ccl_local type locals; \
- kernel_##name(kg, &locals); \
- }
-# endif /* KERNEL_STUB */
-
-DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
-DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
-DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao,
- BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
-#endif /* __SPLIT_KERNEL__ */
-
-#undef KERNEL_STUB
-#undef STUB_ASSERT
-#undef KERNEL_ARCH
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
deleted file mode 100644
index 989f5e5aaa8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-#define __SPLIT_KERNEL__
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
deleted file mode 100644
index 40e485d27c0..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
deleted file mode 100644
index 8c44238470e..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
deleted file mode 100644
index 7a3f218d5fc..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
deleted file mode 100644
index 1cab59e0ea0..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
deleted file mode 100644
index 637126d9d4c..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
deleted file mode 100644
index 26d7fd4de48..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
deleted file mode 100644
index 3f259aa4480..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
deleted file mode 100644
index 68bae8c07c6..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"