Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter.cpp61
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx2.cpp40
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h143
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h331
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse2.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse3.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse41.cpp38
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp94
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp40
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h100
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h660
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h232
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split.cpp62
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp41
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp42
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp38
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp39
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp36
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp37
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu413
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu232
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_config.h121
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h265
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu156
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl321
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl23
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_background.cl35
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_bake.cl36
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_base.cl88
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl53
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_displace.cl36
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h358
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_path_init.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl26
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl27
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl24
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl34
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h67
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl29
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl24
-rw-r--r--intern/cycles/kernel/kernels/optix/kernel_optix.cu327
61 files changed, 0 insertions, 5364 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp
deleted file mode 100644
index 145a6b6ac40..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
deleted file mode 100644
index 012daba62d8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
deleted file mode 100644
index 16351a7f949..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
deleted file mode 100644
index 1423b182ab8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common declaration part of all CPU kernels. */
-
-void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
- TileInfo *tile_info,
- int x,
- int y,
- float *unfilteredA,
- float *unfilteredB,
- float *sampleV,
- float *sampleVV,
- float *bufferV,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
- TileInfo *tile_info,
- int m_offset,
- int v_offset,
- int x,
- int y,
- float *mean,
- float *variance,
- float scale,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
- int x,
- int y,
- int *buffer_params,
- float *from,
- float *buffer,
- int out_offset,
- int *prefilter_rect);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
- int y,
- ccl_global float *image,
- ccl_global float *variance,
- ccl_global float *depth,
- ccl_global float *output,
- int *rect,
- int pass_stride);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
- int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
- TileInfo *tiles,
- int x,
- int y,
- int storage_ofs,
- float *transform,
- int *rank,
- int *rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- int radius,
- float pca_threshold);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
- int dy,
- float *weight_image,
- float *variance_image,
- float *scale_image,
- float *difference_image,
- int *rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
- float *difference_image, float *out_image, int *rect, int stride, int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
- float *difference_image, float *out_image, int *rect, int stride, int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
- int dy,
- float *difference_image,
- float *image,
- float *temp_image,
- float *out_image,
- float *accum_image,
- int *rect,
- int channel_offset,
- int stride,
- int f);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
- int dy,
- int t,
- float *difference_image,
- float *buffer,
- float *transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *rect,
- int *filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
- float *accum_image,
- int *rect,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
- int y,
- int storage_ofs,
- float *buffer,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *buffer_params,
- int sample);
-
-#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
deleted file mode 100644
index 3d4cb87e104..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common implementation part of all CPU kernels.
- *
- * The idea is that particular .cpp files sets needed optimization flags and
- * simply includes this file without worry of copying actual implementation over.
- */
-
-#include "kernel/kernel_compat_cpu.h"
-
-#include "kernel/filter/filter_kernel.h"
-
-#ifdef KERNEL_STUB
-# define STUB_ASSERT(arch, name) \
- assert(!(#name " kernel stub for architecture " #arch " was called!"))
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Denoise filter */
-
-void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
- TileInfo *tile_info,
- int x,
- int y,
- float *unfilteredA,
- float *unfilteredB,
- float *sampleVariance,
- float *sampleVarianceV,
- float *bufferVariance,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
-#else
- kernel_filter_divide_shadow(sample,
- tile_info,
- x,
- y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
- TileInfo *tile_info,
- int m_offset,
- int v_offset,
- int x,
- int y,
- float *mean,
- float *variance,
- float scale,
- int *prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
-#else
- kernel_filter_get_feature(sample,
- tile_info,
- m_offset,
- v_offset,
- x,
- y,
- mean,
- variance,
- scale,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
- int x,
- int y,
- int *buffer_params,
- float *from,
- float *buffer,
- int out_offset,
- int *prefilter_rect)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
-#else
- kernel_filter_write_feature(
- sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
- int y,
- ccl_global float *image,
- ccl_global float *variance,
- ccl_global float *depth,
- ccl_global float *output,
- int *rect,
- int pass_stride)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
-#else
- kernel_filter_detect_outliers(
- x, y, image, variance, depth, output, load_int4(rect), pass_stride);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
- int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
-#else
- kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
- TileInfo *tile_info,
- int x,
- int y,
- int storage_ofs,
- float *transform,
- int *rank,
- int *prefilter_rect,
- int pass_stride,
- int frame_stride,
- bool use_time,
- int radius,
- float pca_threshold)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
-#else
- rank += storage_ofs;
- transform += storage_ofs * TRANSFORM_SIZE;
- kernel_filter_construct_transform(buffer,
- tile_info,
- x,
- y,
- load_int4(prefilter_rect),
- pass_stride,
- frame_stride,
- use_time,
- transform,
- rank,
- radius,
- pca_threshold);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
- int dy,
- float *weight_image,
- float *variance_image,
- float *scale_image,
- float *difference_image,
- int *rect,
- int stride,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
-#else
- kernel_filter_nlm_calc_difference(dx,
- dy,
- weight_image,
- variance_image,
- scale_image,
- difference_image,
- load_int4(rect),
- stride,
- channel_offset,
- frame_offset,
- a,
- k_2);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
- float *difference_image, float *out_image, int *rect, int stride, int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
-#else
- kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
- float *difference_image, float *out_image, int *rect, int stride, int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
-#else
- kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
- int dy,
- float *difference_image,
- float *image,
- float *temp_image,
- float *out_image,
- float *accum_image,
- int *rect,
- int channel_offset,
- int stride,
- int f)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
-#else
- kernel_filter_nlm_update_output(dx,
- dy,
- difference_image,
- image,
- temp_image,
- out_image,
- accum_image,
- load_int4(rect),
- channel_offset,
- stride,
- f);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
- int dy,
- int t,
- float *difference_image,
- float *buffer,
- float *transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *rect,
- int *filter_window,
- int stride,
- int f,
- int pass_stride,
- int frame_offset,
- bool use_time)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
-#else
- kernel_filter_nlm_construct_gramian(dx,
- dy,
- t,
- difference_image,
- buffer,
- transform,
- rank,
- XtWX,
- XtWY,
- load_int4(rect),
- load_int4(filter_window),
- stride,
- f,
- pass_stride,
- frame_offset,
- use_time);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
- float *accum_image,
- int *rect,
- int stride)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
-#else
- kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
-#endif
-}
-
-void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
- int y,
- int storage_ofs,
- float *buffer,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int *buffer_params,
- int sample)
-{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_finalize);
-#else
- XtWX += storage_ofs * XTWX_SIZE;
- XtWY += storage_ofs * XTWY_SIZE;
- rank += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
-#endif
-}
-
-#undef KERNEL_STUB
-#undef STUB_ASSERT
-#undef KERNEL_ARCH
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
deleted file mode 100644
index 75833d83648..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
deleted file mode 100644
index c998cd54d3a..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
deleted file mode 100644
index fc4ef1fca5b..00000000000
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/filter/filter.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/filter_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
deleted file mode 100644
index 8040bfb7b33..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Memory Copy */
-
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
-{
- if (strcmp(name, "__data") == 0) {
- kg->__data = *(KernelData *)host;
- }
- else {
- assert(0);
- }
-}
-
-void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
-{
- if (0) {
- }
-
-#define KERNEL_TEX(type, tname) \
- else if (strcmp(name, #tname) == 0) \
- { \
- kg->tname.data = (type *)mem; \
- kg->tname.width = size; \
- }
-#include "kernel/kernel_textures.h"
- else {
- assert(0);
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
deleted file mode 100644
index 5f6b6800363..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
deleted file mode 100644
index 97e8fc25140..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
deleted file mode 100644
index ea3103f12c3..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common declaration part of all CPU kernels. */
-
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride);
-
-void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
- uint4 *input,
- float4 *output,
- int type,
- int filter,
- int i,
- int offset,
- int sample);
-
-void KERNEL_FUNCTION_FULL_NAME(bake)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
-
-/* Split kernels */
-
-void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- int start_sample,
- int end_sample,
- int sx,
- int sy,
- int sw,
- int sh,
- int offset,
- int stride,
- ccl_global int *Queue_index,
- int queuesize,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pool_wgs,
- unsigned int num_samples,
- ccl_global float *buffer);
-
-#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data);
-
-DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
-DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
-DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
-DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
-DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort)
-DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
-DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
-DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
-DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
-DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
-DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
-
-#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
deleted file mode 100644
index 59b96c86c50..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __KERNEL_CPU_IMAGE_H__
-#define __KERNEL_CPU_IMAGE_H__
-
-#ifdef WITH_NANOVDB
-# define NANOVDB_USE_INTRINSICS
-# include <nanovdb/NanoVDB.h>
-# include <nanovdb/util/SampleFromVoxels.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Make template functions private so symbols don't conflict between kernels with different
- * instruction sets. */
-namespace {
-
-#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
- u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
- u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } \
- (void)0
-
-ccl_device_inline float frac(float x, int *ix)
-{
- int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
- *ix = i;
- return x - (float)i;
-}
-
-template<typename T> struct TextureInterpolator {
-
- static ccl_always_inline float4 read(float4 r)
- {
- return r;
- }
-
- static ccl_always_inline float4 read(uchar4 r)
- {
- float f = 1.0f / 255.0f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
-
- static ccl_always_inline float4 read(uchar r)
- {
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(float r)
- {
- /* TODO(dingto): Optimize this, so interpolation
- * happens on float instead of float4 */
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(half4 r)
- {
- return half4_to_float4(r);
- }
-
- static ccl_always_inline float4 read(half r)
- {
- float f = half_to_float(r);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(uint16_t r)
- {
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(ushort4 r)
- {
- float f = 1.0f / 65535.0f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
-
- static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
- {
- if (x < 0 || y < 0 || x >= width || y >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[y * width + x]);
- }
-
- static ccl_always_inline int wrap_periodic(int x, int width)
- {
- x %= width;
- if (x < 0)
- x += width;
- return x;
- }
-
- static ccl_always_inline int wrap_clamp(int x, int width)
- {
- return clamp(x, 0, width - 1);
- }
-
- /* ******** 2D interpolation ******** */
-
- static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy;
- frac(x * (float)width, &ix);
- frac(y * (float)height, &iy);
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[ix + iy * width]);
- }
-
- static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- break;
- case EXTENSION_CLIP:
- nix = ix + 1;
- niy = iy + 1;
- break;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
- (1.0f - ty) * tx * read(data, nix, iy, width, height) +
- ty * (1.0f - tx) * read(data, ix, niy, width, height) +
- ty * tx * read(data, nix, niy, width, height);
- }
-
- static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
- {
- const T *data = (const T *)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- int pix, piy, nnix, nniy;
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- nnix = wrap_periodic(ix + 2, width);
- nniy = wrap_periodic(iy + 2, height);
- break;
- case EXTENSION_CLIP:
- pix = ix - 1;
- piy = iy - 1;
- nix = ix + 1;
- niy = iy + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- break;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- float u[4], v[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-#define DATA(x, y) (read(data, xc[x], yc[y], width, height))
-#define TERM(col) \
- (v[col] * \
- (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
- /* Actual interpolation. */
- return TERM(0) + TERM(1) + TERM(2) + TERM(3);
-#undef TERM
-#undef DATA
- }
-
- static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
- {
- if (UNLIKELY(!info.data)) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- switch (info.interpolation) {
- case INTERPOLATION_CLOSEST:
- return interp_closest(info, x, y);
- case INTERPOLATION_LINEAR:
- return interp_linear(info, x, y);
- default:
- return interp_cubic(info, x, y);
- }
- }
-
- /* ******** 3D interpolation ******** */
-
- static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
- float x,
- float y,
- float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
-
- frac(x * (float)width, &ix);
- frac(y * (float)height, &iy);
- frac(z * (float)depth, &iz);
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T *)info.data;
- return read(data[ix + iy * width + iz * width * height]);
- }
-
- static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
- float x,
- float y,
- float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
-
- float tx = frac(x * (float)width - 0.5f, &ix);
- float ty = frac(y * (float)height - 0.5f, &iy);
- float tz = frac(z * (float)depth - 0.5f, &iz);
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- niz = wrap_periodic(iz + 1, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T *)info.data;
- float4 r;
-
- r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
- read(data[ix + iy * width + iz * width * height]);
- r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
- r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
- r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
-
- r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
- r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
- r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
- r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
-
- return r;
- }
-
- /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
- * causing stack overflow issue in this function unless it is inlined.
- *
- * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
- * enabled.
- */
-#if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
-#else
- static ccl_never_inline
-#endif
- float4
- interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x * (float)width - 0.5f, &ix);
- const float ty = frac(y * (float)height - 0.5f, &iy);
- const float tz = frac(z * (float)depth - 0.5f, &iz);
- int pix, piy, piz, nnix, nniy, nniz;
-
- switch (info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- pix = wrap_periodic(ix - 1, width);
- piy = wrap_periodic(iy - 1, height);
- piz = wrap_periodic(iz - 1, depth);
-
- nix = wrap_periodic(ix + 1, width);
- niy = wrap_periodic(iy + 1, height);
- niz = wrap_periodic(iz + 1, depth);
-
- nnix = wrap_periodic(ix + 2, width);
- nniy = wrap_periodic(iy + 2, height);
- nniz = wrap_periodic(iz + 2, depth);
- break;
- case EXTENSION_CLIP:
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix - 1, width);
- piy = wrap_clamp(iy - 1, height);
- piz = wrap_clamp(iz - 1, depth);
-
- nix = wrap_clamp(ix + 1, width);
- niy = wrap_clamp(iy + 1, height);
- niz = wrap_clamp(iz + 1, depth);
-
- nnix = wrap_clamp(ix + 2, width);
- nniy = wrap_clamp(iy + 2, height);
- nniz = wrap_clamp(iz + 2, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
- const int zc[4] = {
- width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
-#define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
-#define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- /* Actual interpolation. */
- const T *data = (const T *)info.data;
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-#undef COL_TERM
-#undef ROW_TERM
-#undef DATA
- }
-
- static ccl_always_inline float4
- interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
- {
- if (UNLIKELY(!info.data))
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(info, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(info, x, y, z);
- default:
- return interp_3d_cubic(info, x, y, z);
- }
- }
-};
-
-#ifdef WITH_NANOVDB
-template<typename T> struct NanoVDBInterpolator {
-
- typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
-
- static ccl_always_inline float4 read(float r)
- {
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(nanovdb::Vec3f r)
- {
- return make_float4(r[0], r[1], r[2], 1.0f);
- }
-
- static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc,
- float x,
- float y,
- float z)
- {
- const nanovdb::Vec3f xyz(x, y, z);
- return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
- }
-
- static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc,
- float x,
- float y,
- float z)
- {
- const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
- return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
- }
-
-# if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
-# else
- static ccl_never_inline
-# endif
- float4
- interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
- {
- int ix, iy, iz;
- int nix, niy, niz;
- int pix, piy, piz;
- int nnix, nniy, nniz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x - 0.5f, &ix);
- const float ty = frac(y - 0.5f, &iy);
- const float tz = frac(z - 0.5f, &iz);
- pix = ix - 1;
- piy = iy - 1;
- piz = iz - 1;
- nix = ix + 1;
- niy = iy + 1;
- niz = iz + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- nniz = iz + 2;
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- const int zc[4] = {piz, iz, niz, nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
-# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
-# define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
-# define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
-
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- /* Actual interpolation. */
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
-
-# undef COL_TERM
-# undef ROW_TERM
-# undef DATA
- }
-
- static ccl_always_inline float4
- interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
- {
- using namespace nanovdb;
-
- NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
- AccessorType acc = grid->getAccessor();
-
- switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(acc, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(acc, x, y, z);
- default:
- return interp_3d_cubic(acc, x, y, z);
- }
- }
-};
-#endif
-
-#undef SET_CUBIC_SPLINE_WEIGHTS
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- switch (info.data_type) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp(info, x, y);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp(info, x, y);
- default:
- assert(0);
- return make_float4(
- TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
-}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
- int id,
- float3 P,
- InterpolationType interp)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- if (info.use_transform_3d) {
- P = transform_point(&info.transform_3d, P);
- }
-
- switch (info.data_type) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
-#ifdef WITH_NANOVDB
- case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
- return NanoVDBInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp);
- case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
- return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
-#endif
- default:
- assert(0);
- return make_float4(
- TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
-}
-
-} /* Namespace. */
-
-CCL_NAMESPACE_END
-
-#endif // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
deleted file mode 100644
index 51d6c23f72f..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Templated common implementation part of all CPU kernels.
- *
- * The idea is that particular .cpp files sets needed optimization flags and
- * simply includes this file without worry of copying actual implementation over.
- */
-
-// clang-format off
-#include "kernel/kernel_compat_cpu.h"
-
-#ifndef KERNEL_STUB
-# ifndef __SPLIT_KERNEL__
-# include "kernel/kernel_math.h"
-# include "kernel/kernel_types.h"
-
-# include "kernel/split/kernel_split_data.h"
-# include "kernel/kernel_globals.h"
-
-# include "kernel/kernel_color.h"
-# include "kernel/kernels/cpu/kernel_cpu_image.h"
-# include "kernel/kernel_film.h"
-# include "kernel/kernel_path.h"
-# include "kernel/kernel_path_branched.h"
-# include "kernel/kernel_bake.h"
-# else
-# include "kernel/split/kernel_split_common.h"
-
-# include "kernel/split/kernel_data_init.h"
-# include "kernel/split/kernel_path_init.h"
-# include "kernel/split/kernel_scene_intersect.h"
-# include "kernel/split/kernel_lamp_emission.h"
-# include "kernel/split/kernel_do_volume.h"
-# include "kernel/split/kernel_queue_enqueue.h"
-# include "kernel/split/kernel_indirect_background.h"
-# include "kernel/split/kernel_shader_setup.h"
-# include "kernel/split/kernel_shader_sort.h"
-# include "kernel/split/kernel_shader_eval.h"
-# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-# include "kernel/split/kernel_subsurface_scatter.h"
-# include "kernel/split/kernel_direct_lighting.h"
-# include "kernel/split/kernel_shadow_blocked_ao.h"
-# include "kernel/split/kernel_shadow_blocked_dl.h"
-# include "kernel/split/kernel_enqueue_inactive.h"
-# include "kernel/split/kernel_next_iteration_setup.h"
-# include "kernel/split/kernel_indirect_subsurface.h"
-# include "kernel/split/kernel_buffer_update.h"
-# include "kernel/split/kernel_adaptive_stopping.h"
-# include "kernel/split/kernel_adaptive_filter_x.h"
-# include "kernel/split/kernel_adaptive_filter_y.h"
-# include "kernel/split/kernel_adaptive_adjust_samples.h"
-# endif /* __SPLIT_KERNEL__ */
-#else
-# define STUB_ASSERT(arch, name) \
- assert(!(#name " kernel stub for architecture " #arch " was called!"))
-
-# ifdef __SPLIT_KERNEL__
-# include "kernel/split/kernel_data_init.h"
-# endif /* __SPLIT_KERNEL__ */
-#endif /* KERNEL_STUB */
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __SPLIT_KERNEL__
-
-/* Path Tracing */
-
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, path_trace);
-# else
-# ifdef __BRANCHED_PATH__
- if (kernel_data.integrator.branched) {
- kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
- else
-# endif
- {
- kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
-# endif /* KERNEL_STUB */
-}
-
-/* Film */
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
-# else
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-# endif /* KERNEL_STUB */
-}
-
-void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
- uchar4 *rgba,
- float *buffer,
- float sample_scale,
- int x,
- int y,
- int offset,
- int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
-# else
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-# endif /* KERNEL_STUB */
-}
-
-/* Bake */
-
-void KERNEL_FUNCTION_FULL_NAME(bake)(
- KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, bake);
-# else
-# ifdef __BAKING__
- kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
-# endif
-# endif /* KERNEL_STUB */
-}
-
-/* Shader Evaluate */
-
-void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
- uint4 *input,
- float4 *output,
- int type,
- int filter,
- int i,
- int offset,
- int sample)
-{
-# ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, shader);
-# else
- if (type == SHADER_EVAL_DISPLACE) {
- kernel_displace_evaluate(kg, input, output, i);
- }
- else {
- kernel_background_evaluate(kg, input, output, i);
- }
-# endif /* KERNEL_STUB */
-}
-
-#else /* __SPLIT_KERNEL__ */
-
-/* Split Kernel Path Tracing */
-
-# ifdef KERNEL_STUB
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-# else
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- kernel_##name(kg); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
- { \
- ccl_local type locals; \
- kernel_##name(kg, &locals); \
- }
-# endif /* KERNEL_STUB */
-
-DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
-DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
-DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao,
- BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
-#endif /* __SPLIT_KERNEL__ */
-
-#undef KERNEL_STUB
-#undef STUB_ASSERT
-#undef KERNEL_ARCH
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
deleted file mode 100644
index 989f5e5aaa8..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CPU kernel entry points */
-
-/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this
- * one with SSE2 intrinsics.
- */
-#if defined(__x86_64__) || defined(_M_X64)
-# define __KERNEL_SSE2__
-#endif
-
-#define __SPLIT_KERNEL__
-
-/* When building kernel for native machine detect kernel features from the flags
- * set by compiler.
- */
-#ifdef WITH_KERNEL_NATIVE
-# ifdef __SSE2__
-# ifndef __KERNEL_SSE2__
-# define __KERNEL_SSE2__
-# endif
-# endif
-# ifdef __SSE3__
-# define __KERNEL_SSE3__
-# endif
-# ifdef __SSSE3__
-# define __KERNEL_SSSE3__
-# endif
-# ifdef __SSE4_1__
-# define __KERNEL_SSE41__
-# endif
-# ifdef __AVX__
-# define __KERNEL_AVX__
-# endif
-# ifdef __AVX2__
-# define __KERNEL_SSE__
-# define __KERNEL_AVX2__
-# endif
-#endif
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)
-/* do nothing */
-#endif
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
deleted file mode 100644
index 40e485d27c0..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
deleted file mode 100644
index 8c44238470e..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with AVX2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE__
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# define __KERNEL_AVX__
-# define __KERNEL_AVX2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_avx2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
deleted file mode 100644
index 7a3f218d5fc..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
deleted file mode 100644
index 1cab59e0ea0..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
deleted file mode 100644
index 637126d9d4c..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#define __SPLIT_KERNEL__
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
deleted file mode 100644
index 26d7fd4de48..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE2
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse2
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
deleted file mode 100644
index 3f259aa4480..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse3
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
deleted file mode 100644
index 68bae8c07c6..00000000000
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
- * optimization flags and nearly all functions inlined, while kernel.cpp
- * is compiled without for other CPU's. */
-
-#include "util/util_optimization.h"
-
-#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-# define KERNEL_STUB
-#else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
-# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-# define __KERNEL_SSE2__
-# define __KERNEL_SSE3__
-# define __KERNEL_SSSE3__
-# define __KERNEL_SSE41__
-# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
-
-#include "kernel/kernel.h"
-#define KERNEL_ARCH cpu_sse41
-#include "kernel/kernels/cpu/kernel_cpu_impl.h"
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
deleted file mode 100644
index 6c9642d1f03..00000000000
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CUDA kernel entry points */
-
-#ifdef __CUDA_ARCH__
-
-#include "kernel_config.h"
-
-#include "kernel/kernel_compat_cuda.h"
-
-#include "kernel/filter/filter_kernel.h"
-
-/* kernels */
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_copy_input(float *buffer,
- CCL_FILTER_TILE_INFO,
- int4 prefilter_rect,
- int buffer_pass_stride)
-{
- int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
- int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2);
- int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2);
- int itile = ytile * 3 + xtile;
- float *const in = ((float *)ccl_get_tile_buffer(itile)) +
- (tile_info->offsets[itile] + y * tile_info->strides[itile] + x) * buffer_pass_stride;
- buffer += ((y - prefilter_rect.y) * (prefilter_rect.z - prefilter_rect.x) + (x - prefilter_rect.x)) * buffer_pass_stride;
- for (int i = 0; i < buffer_pass_stride; ++i)
- buffer[i] = in[i];
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int stride, int pass_stride, int3 pass_offset, int num_inputs, int num_samples)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < sw && y < sh) {
- if (num_inputs > 0) {
- float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float);
- float *out = rgb + (x + y * sw) * 3;
- out[0] = clamp(in[0] / num_samples, 0.0f, 10000.0f);
- out[1] = clamp(in[1] / num_samples, 0.0f, 10000.0f);
- out[2] = clamp(in[2] / num_samples, 0.0f, 10000.0f);
- }
- if (num_inputs > 1) {
- float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float);
- float *out = rgb + (x + y * sw) * 3 + (sw * sh) * 3;
- out[0] = in[0] / num_samples;
- out[1] = in[1] / num_samples;
- out[2] = in[2] / num_samples;
- }
- if (num_inputs > 2) {
- float *in = buf + x * pass_stride + (y * stride + pass_offset.z) / sizeof(float);
- float *out = rgb + (x + y * sw) * 3 + (sw * sh * 2) * 3;
- out[0] = in[0] / num_samples;
- out[1] = in[1] / num_samples;
- out[2] = in[2] / num_samples;
- }
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_convert_from_rgb(float *rgb, float *buf, int ix, int iy, int iw, int ih, int sx, int sy, int sw, int sh, int offset, int stride, int pass_stride, int num_samples)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < sw && y < sh) {
- float *in = rgb + ((ix + x) + (iy + y) * iw) * 3;
- float *out = buf + (offset + (sx + x) + (sy + y) * stride) * pass_stride;
- out[0] = in[0] * num_samples;
- out[1] = in[1] * num_samples;
- out[2] = in[2] * num_samples;
- }
-}
-
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_divide_shadow(int sample,
- CCL_FILTER_TILE_INFO,
- float *unfilteredA,
- float *unfilteredB,
- float *sampleVariance,
- float *sampleVarianceV,
- float *bufferVariance,
- int4 prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
- int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_divide_shadow(sample,
- tile_info,
- x, y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- prefilter_rect,
- buffer_pass_stride,
- buffer_denoising_offset);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_get_feature(int sample,
- CCL_FILTER_TILE_INFO,
- int m_offset,
- int v_offset,
- float *mean,
- float *variance,
- float scale,
- int4 prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
- int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_get_feature(sample,
- tile_info,
- m_offset, v_offset,
- x, y,
- mean, variance,
- scale,
- prefilter_rect,
- buffer_pass_stride,
- buffer_denoising_offset);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_write_feature(int sample,
- int4 buffer_params,
- int4 filter_area,
- float *from,
- float *buffer,
- int out_offset,
- int4 prefilter_rect)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < filter_area.z && y < filter_area.w) {
- kernel_filter_write_feature(sample,
- x + filter_area.x,
- y + filter_area.y,
- buffer_params,
- from,
- buffer,
- out_offset,
- prefilter_rect);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_detect_outliers(float *image,
- float *variance,
- float *depth,
- float *output,
- int4 prefilter_rect,
- int pass_stride)
-{
- int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
- int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)
-{
- int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
- int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_construct_transform(float const* __restrict__ buffer,
- CCL_FILTER_TILE_INFO,
- float *transform, int *rank,
- int4 filter_area, int4 rect,
- int radius, float pca_threshold,
- int pass_stride, int frame_stride,
- bool use_time)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < filter_area.z && y < filter_area.w) {
- int *l_rank = rank + y*filter_area.z + x;
- float *l_transform = transform + y*filter_area.z + x;
- kernel_filter_construct_transform(buffer,
- tile_info,
- x + filter_area.x, y + filter_area.y,
- rect,
- pass_stride, frame_stride,
- use_time,
- l_transform, l_rank,
- radius, pca_threshold,
- filter_area.z*filter_area.w,
- threadIdx.y*blockDim.x + threadIdx.x);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image,
- const float *ccl_restrict variance_image,
- const float *ccl_restrict scale_image,
- float *difference_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
- weight_image,
- variance_image,
- scale_image,
- difference_image + ofs,
- rect, stride,
- channel_offset,
- frame_offset,
- a, k_2);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image,
- float *out_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_blur(co.x, co.y,
- difference_image + ofs,
- out_image + ofs,
- rect, stride, f);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image,
- float *out_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_calc_weight(co.x, co.y,
- difference_image + ofs,
- out_image + ofs,
- rect, stride, f);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image,
- const float *ccl_restrict image,
- float *out_image,
- float *accum_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int channel_offset,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
- difference_image + ofs,
- image,
- out_image,
- accum_image,
- rect,
- channel_offset,
- stride, f);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_normalize(float *out_image,
- const float *ccl_restrict accum_image,
- int w,
- int h,
- int stride)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < w && y < h) {
- kernel_filter_nlm_normalize(x, y, out_image, accum_image, stride);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_construct_gramian(int t,
- const float *ccl_restrict difference_image,
- const float *ccl_restrict buffer,
- float const* __restrict__ transform,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int4 filter_window,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f,
- int frame_offset,
- bool use_time)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
- kernel_filter_nlm_construct_gramian(co.x, co.y,
- co.z, co.w,
- t,
- difference_image + ofs,
- buffer,
- transform, rank,
- XtWX, XtWY,
- rect, filter_window,
- stride, f,
- pass_stride,
- frame_offset,
- use_time,
- threadIdx.y*blockDim.x + threadIdx.x);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_finalize(float *buffer,
- int *rank,
- float *XtWX,
- float3 *XtWY,
- int4 filter_area,
- int4 buffer_params,
- int sample)
-{
- int x = blockDim.x*blockIdx.x + threadIdx.x;
- int y = blockDim.y*blockIdx.y + threadIdx.y;
- if(x < filter_area.z && y < filter_area.w) {
- int storage_ofs = y*filter_area.z+x;
- rank += storage_ofs;
- XtWX += storage_ofs;
- XtWY += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank,
- filter_area.z*filter_area.w,
- XtWX, XtWY,
- buffer_params, sample);
- }
-}
-
-#endif
-
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
deleted file mode 100644
index cf62b6e781e..00000000000
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CUDA kernel entry points */
-
-#ifdef __CUDA_ARCH__
-
-#include "kernel/kernel_compat_cuda.h"
-#include "kernel_config.h"
-
-#include "util/util_atomic.h"
-
-#include "kernel/kernel_math.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-#include "kernel/kernel_color.h"
-#include "kernel/kernels/cuda/kernel_cuda_image.h"
-#include "kernel/kernel_film.h"
-#include "kernel/kernel_path.h"
-#include "kernel/kernel_path_branched.h"
-#include "kernel/kernel_bake.h"
-#include "kernel/kernel_work_stealing.h"
-#include "kernel/kernel_adaptive_sampling.h"
-
-/* kernels */
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
-{
- int work_index = ccl_global_id(0);
- bool thread_is_active = work_index < total_work_size;
- uint x, y, sample;
- KernelGlobals kg;
- if(thread_is_active) {
- get_work_pixel(tile, work_index, &x, &y, &sample);
-
- kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
- }
-
- if(kernel_data.film.cryptomatte_passes) {
- __syncthreads();
- if(thread_is_active) {
- kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
- }
- }
-}
-
-#ifdef __BRANCHED_PATH__
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
-kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
-{
- int work_index = ccl_global_id(0);
- bool thread_is_active = work_index < total_work_size;
- uint x, y, sample;
- KernelGlobals kg;
- if(thread_is_active) {
- get_work_pixel(tile, work_index, &x, &y, &sample);
-
- kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
- }
-
- if(kernel_data.film.cryptomatte_passes) {
- __syncthreads();
- if(thread_is_active) {
- kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
- }
- }
-}
-#endif
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
-{
- int work_index = ccl_global_id(0);
- bool thread_is_active = work_index < total_work_size;
- KernelGlobals kg;
- if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
- uint x = tile->x + work_index % tile->w;
- uint y = tile->y + work_index / tile->w;
- int index = tile->offset + x + y * tile->stride;
- ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
- kernel_do_adaptive_stopping(&kg, buffer, sample);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
-{
- KernelGlobals kg;
- if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
- if(ccl_global_id(0) < tile->h) {
- int y = tile->y + ccl_global_id(0);
- kernel_do_adaptive_filter_x(&kg, y, tile);
- }
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
-{
- KernelGlobals kg;
- if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
- if(ccl_global_id(0) < tile->w) {
- int x = tile->x + ccl_global_id(0);
- kernel_do_adaptive_filter_y(&kg, x, tile);
- }
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
-{
- if(kernel_data.film.pass_adaptive_aux_buffer) {
- int work_index = ccl_global_id(0);
- bool thread_is_active = work_index < total_work_size;
- KernelGlobals kg;
- if(thread_is_active) {
- uint x = tile->x + work_index % tile->w;
- uint y = tile->y + work_index / tile->w;
- int index = tile->offset + x + y * tile->stride;
- ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
- if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
- buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
- float sample_multiplier = sample / buffer[kernel_data.film.pass_sample_count];
- if(sample_multiplier != 1.0f) {
- kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
- }
- }
- else {
- kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
- }
- }
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
-
- if(x < sx + sw && y < sy + sh) {
- kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
-
- if(x < sx + sw && y < sy + sh) {
- kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_displace(uint4 *input,
- float4 *output,
- int type,
- int sx,
- int sw,
- int offset,
- int sample)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
-
- if(x < sx + sw) {
- KernelGlobals kg;
- kernel_displace_evaluate(&kg, input, output, x);
- }
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_background(uint4 *input,
- float4 *output,
- int type,
- int sx,
- int sw,
- int offset,
- int sample)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
-
- if(x < sx + sw) {
- KernelGlobals kg;
- kernel_background_evaluate(&kg, input, output, x);
- }
-}
-
-#ifdef __BAKING__
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(WorkTile *tile, uint total_work_size)
-{
- int work_index = ccl_global_id(0);
-
- if(work_index < total_work_size) {
- uint x, y, sample;
- get_work_pixel(tile, work_index, &x, &y, &sample);
-
- KernelGlobals kg;
- kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
- }
-}
-#endif
-
-#endif
-
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
deleted file mode 100644
index 2e47ce2de6c..00000000000
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* device data taken from CUDA occupancy calculator */
-
-/* 3.0 and 3.5 */
-#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
-# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
-# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
-# define CUDA_BLOCK_MAX_THREADS 1024
-# define CUDA_THREAD_MAX_REGISTERS 63
-
-/* tunable parameters */
-# define CUDA_THREADS_BLOCK_WIDTH 16
-# define CUDA_KERNEL_MAX_REGISTERS 63
-# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-
-/* 3.2 */
-#elif __CUDA_ARCH__ == 320
-# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
-# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
-# define CUDA_BLOCK_MAX_THREADS 1024
-# define CUDA_THREAD_MAX_REGISTERS 63
-
-/* tunable parameters */
-# define CUDA_THREADS_BLOCK_WIDTH 16
-# define CUDA_KERNEL_MAX_REGISTERS 63
-# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-
-/* 3.7 */
-#elif __CUDA_ARCH__ == 370
-# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
-# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
-# define CUDA_BLOCK_MAX_THREADS 1024
-# define CUDA_THREAD_MAX_REGISTERS 255
-
-/* tunable parameters */
-# define CUDA_THREADS_BLOCK_WIDTH 16
-# define CUDA_KERNEL_MAX_REGISTERS 63
-# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-
-/* 5.x, 6.x */
-#elif __CUDA_ARCH__ <= 699
-# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
-# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
-# define CUDA_BLOCK_MAX_THREADS 1024
-# define CUDA_THREAD_MAX_REGISTERS 255
-
-/* tunable parameters */
-# define CUDA_THREADS_BLOCK_WIDTH 16
-/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of
- * registers */
-# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
-# define CUDA_KERNEL_MAX_REGISTERS 64
-# else
-# define CUDA_KERNEL_MAX_REGISTERS 48
-# endif
-# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-
-/* 7.x, 8.x */
-#elif __CUDA_ARCH__ <= 899
-# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
-# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
-# define CUDA_BLOCK_MAX_THREADS 1024
-# define CUDA_THREAD_MAX_REGISTERS 255
-
-/* tunable parameters */
-# define CUDA_THREADS_BLOCK_WIDTH 16
-# define CUDA_KERNEL_MAX_REGISTERS 64
-# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
-
-/* unknown architecture */
-#else
-# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
-#endif
-
-/* For split kernel using all registers seems fastest for now, but this
- * is unlikely to be optimal once we resolve other bottlenecks. */
-
-#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS
-
-/* Compute number of threads per block and minimum blocks per multiprocessor
- * given the maximum number of registers per thread. */
-
-#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
- __launch_bounds__(threads_block_width *threads_block_width, \
- CUDA_MULTIPRESSOR_MAX_REGISTERS / \
- (threads_block_width * threads_block_width * thread_num_registers))
-
-/* sanity checks */
-
-#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
-# error "Maximum number of threads per block exceeded"
-#endif
-
-#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \
- (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \
- CUDA_MULTIPROCESSOR_MAX_BLOCKS
-# error "Maximum number of blocks per multiprocessor exceeded"
-#endif
-
-#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
-# error "Maximum number of registers per thread exceeded"
-#endif
-
-#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
-# error "Maximum number of registers per thread exceeded"
-#endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
deleted file mode 100644
index 132653fa7ca..00000000000
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright 2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_NANOVDB
-# define NDEBUG /* Disable "assert" in device code */
-# define NANOVDB_USE_INTRINSICS
-# include "nanovdb/NanoVDB.h"
-# include "nanovdb/util/SampleFromVoxels.h"
-#endif
-
-/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
-ccl_device float cubic_w0(float a)
-{
- return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
-}
-ccl_device float cubic_w1(float a)
-{
- return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
-}
-ccl_device float cubic_w2(float a)
-{
- return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
-}
-ccl_device float cubic_w3(float a)
-{
- return (1.0f / 6.0f) * (a * a * a);
-}
-
-/* g0 and g1 are the two amplitude functions. */
-ccl_device float cubic_g0(float a)
-{
- return cubic_w0(a) + cubic_w1(a);
-}
-ccl_device float cubic_g1(float a)
-{
- return cubic_w2(a) + cubic_w3(a);
-}
-
-/* h0 and h1 are the two offset functions */
-ccl_device float cubic_h0(float a)
-{
- return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
-}
-ccl_device float cubic_h1(float a)
-{
- return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
-}
-
-/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
-template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y)
-{
- CUtexObject tex = (CUtexObject)info.data;
-
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
-
- float px = floorf(x);
- float py = floorf(y);
- float fx = x - px;
- float fy = y - py;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
- float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
- float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
- float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
- float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
-
- return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
- cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
-}
-
-/* Fast tricubic texture lookup using 8 trilinear lookups. */
-template<typename T>
-ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
-{
- CUtexObject tex = (CUtexObject)info.data;
-
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
- z = (z * info.depth) - 0.5f;
-
- float px = floorf(x);
- float py = floorf(y);
- float pz = floorf(z);
- float fx = x - px;
- float fy = y - py;
- float fz = z - pz;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float g0y = cubic_g0(fy);
- float g1y = cubic_g1(fy);
- float g0z = cubic_g0(fz);
- float g1z = cubic_g1(fz);
-
- /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
- float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
- float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
- float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
- float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
- float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
- float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
-
- return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
- g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1)));
-}
-
-#ifdef WITH_NANOVDB
-template<typename T, typename S>
-ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
-{
- float px = floorf(x);
- float py = floorf(y);
- float pz = floorf(z);
- float fx = x - px;
- float fy = y - py;
- float fz = z - pz;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float g0y = cubic_g0(fy);
- float g1y = cubic_g1(fy);
- float g0z = cubic_g0(fz);
- float g1z = cubic_g1(fz);
-
- float x0 = px + cubic_h0(fx);
- float x1 = px + cubic_h1(fx);
- float y0 = py + cubic_h0(fy);
- float y1 = py + cubic_h1(fy);
- float z0 = pz + cubic_h0(fz);
- float z1 = pz + cubic_h1(fz);
-
- using namespace nanovdb;
-
- return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
- g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
- g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
- g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
-}
-
-template<typename T>
-ccl_device_inline T kernel_tex_image_interp_nanovdb(
- const TextureInfo &info, float x, float y, float z, uint interpolation)
-{
- using namespace nanovdb;
-
- NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
- typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
- AccessorType acc = grid->getAccessor();
-
- switch (interpolation) {
- case INTERPOLATION_CLOSEST:
- return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z));
- case INTERPOLATION_LINEAR:
- return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
- default:
- SampleFromVoxels<AccessorType, 1, false> s(acc);
- return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
- }
-}
-#endif
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- /* float4, byte4, ushort4 and half4 */
- const int texture_type = info.data_type;
- if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
- if (info.interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic<float4>(info, x, y);
- }
- else {
- CUtexObject tex = (CUtexObject)info.data;
- return tex2D<float4>(tex, x, y);
- }
- }
- /* float, byte and half */
- else {
- float f;
-
- if (info.interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic<float>(info, x, y);
- }
- else {
- CUtexObject tex = (CUtexObject)info.data;
- f = tex2D<float>(tex, x, y);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
-}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg,
- int id,
- float3 P,
- InterpolationType interp)
-{
- const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
-
- if (info.use_transform_3d) {
- P = transform_point(&info.transform_3d, P);
- }
-
- const float x = P.x;
- const float y = P.y;
- const float z = P.z;
-
- uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
- const int texture_type = info.data_type;
-
-#ifdef WITH_NANOVDB
- if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
- float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation);
- return make_float4(f, f, f, 1.0f);
- }
- if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
- nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>(
- info, x, y, z, interpolation);
- return make_float4(f[0], f[1], f[2], 1.0f);
- }
-#endif
- if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
- if (interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_tricubic<float4>(info, x, y, z);
- }
- else {
- CUtexObject tex = (CUtexObject)info.data;
- return tex3D<float4>(tex, x, y, z);
- }
- }
- else {
- float f;
-
- if (interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_tricubic<float>(info, x, y, z);
- }
- else {
- CUtexObject tex = (CUtexObject)info.data;
- f = tex3D<float>(tex, x, y, z);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
-}
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
deleted file mode 100644
index 95ad7599cf1..00000000000
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* CUDA split kernel entry points */
-
-#ifdef __CUDA_ARCH__
-
-#define __SPLIT_KERNEL__
-
-#include "kernel/kernel_compat_cuda.h"
-#include "kernel_config.h"
-
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_data_init.h"
-#include "kernel/split/kernel_path_init.h"
-#include "kernel/split/kernel_scene_intersect.h"
-#include "kernel/split/kernel_lamp_emission.h"
-#include "kernel/split/kernel_do_volume.h"
-#include "kernel/split/kernel_queue_enqueue.h"
-#include "kernel/split/kernel_indirect_background.h"
-#include "kernel/split/kernel_shader_setup.h"
-#include "kernel/split/kernel_shader_sort.h"
-#include "kernel/split/kernel_shader_eval.h"
-#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-#include "kernel/split/kernel_subsurface_scatter.h"
-#include "kernel/split/kernel_direct_lighting.h"
-#include "kernel/split/kernel_shadow_blocked_ao.h"
-#include "kernel/split/kernel_shadow_blocked_dl.h"
-#include "kernel/split/kernel_enqueue_inactive.h"
-#include "kernel/split/kernel_next_iteration_setup.h"
-#include "kernel/split/kernel_indirect_subsurface.h"
-#include "kernel/split/kernel_buffer_update.h"
-#include "kernel/split/kernel_adaptive_stopping.h"
-#include "kernel/split/kernel_adaptive_filter_x.h"
-#include "kernel/split/kernel_adaptive_filter_y.h"
-#include "kernel/split/kernel_adaptive_adjust_samples.h"
-
-#include "kernel/kernel_film.h"
-
-/* kernels */
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size)
-{
- *size = split_data_buffer_size(NULL, num_threads);
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_path_trace_data_init(
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index,
- int queuesize,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pool_wgs,
- unsigned int num_samples,
- ccl_global float *buffer)
-{
- kernel_data_init(NULL,
- NULL,
- split_data_buffer,
- num_elements,
- ray_state,
- start_sample,
- end_sample,
- sx, sy, sw, sh, offset, stride,
- Queue_index,
- queuesize,
- use_queues_flag,
- work_pool_wgs,
- num_samples,
- buffer);
-}
-
-#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- extern "C" __global__ void \
- CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
- kernel_cuda_##name() \
- { \
- kernel_##name(NULL); \
- }
-
-#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- extern "C" __global__ void \
- CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
- kernel_cuda_##name() \
- { \
- ccl_local type locals; \
- kernel_##name(NULL, &locals); \
- }
-
-DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
-DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
-DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
-DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
-DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
-DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
-
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-extern "C" __global__ void
-CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
-{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
-
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-#endif
-
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
deleted file mode 100644
index 996bc27f71b..00000000000
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* OpenCL kernel entry points */
-
-#include "kernel/kernel_compat_opencl.h"
-
-#include "kernel/filter/filter_kernel.h"
-
-/* kernels */
-
-__kernel void kernel_ocl_filter_divide_shadow(int sample,
- CCL_FILTER_TILE_INFO,
- ccl_global float *unfilteredA,
- ccl_global float *unfilteredB,
- ccl_global float *sampleVariance,
- ccl_global float *sampleVarianceV,
- ccl_global float *bufferVariance,
- int4 prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int x = prefilter_rect.x + get_global_id(0);
- int y = prefilter_rect.y + get_global_id(1);
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_divide_shadow(sample,
- CCL_FILTER_TILE_INFO_ARG,
- x, y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- prefilter_rect,
- buffer_pass_stride,
- buffer_denoising_offset);
- }
-}
-
-__kernel void kernel_ocl_filter_get_feature(int sample,
- CCL_FILTER_TILE_INFO,
- int m_offset,
- int v_offset,
- ccl_global float *mean,
- ccl_global float *variance,
- float scale,
- int4 prefilter_rect,
- int buffer_pass_stride,
- int buffer_denoising_offset)
-{
- int x = prefilter_rect.x + get_global_id(0);
- int y = prefilter_rect.y + get_global_id(1);
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_get_feature(sample,
- CCL_FILTER_TILE_INFO_ARG,
- m_offset, v_offset,
- x, y,
- mean, variance,
- scale,
- prefilter_rect,
- buffer_pass_stride,
- buffer_denoising_offset);
- }
-}
-
-__kernel void kernel_ocl_filter_write_feature(int sample,
- int4 buffer_params,
- int4 filter_area,
- ccl_global float *from,
- ccl_global float *buffer,
- int out_offset,
- int4 prefilter_rect)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
- if(x < filter_area.z && y < filter_area.w) {
- kernel_filter_write_feature(sample,
- x + filter_area.x,
- y + filter_area.y,
- buffer_params,
- from,
- buffer,
- out_offset,
- prefilter_rect);
- }
-}
-
-__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
- ccl_global float *variance,
- ccl_global float *depth,
- ccl_global float *output,
- int4 prefilter_rect,
- int pass_stride)
-{
- int x = prefilter_rect.x + get_global_id(0);
- int y = prefilter_rect.y + get_global_id(1);
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
- }
-}
-
-__kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
- ccl_global float *variance,
- ccl_global float *a,
- ccl_global float *b,
- int4 prefilter_rect,
- int r)
-{
- int x = prefilter_rect.x + get_global_id(0);
- int y = prefilter_rect.y + get_global_id(1);
- if(x < prefilter_rect.z && y < prefilter_rect.w) {
- kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r);
- }
-}
-
-__kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
- CCL_FILTER_TILE_INFO,
- ccl_global float *transform,
- ccl_global int *rank,
- int4 filter_area,
- int4 rect,
- int pass_stride,
- int frame_stride,
- char use_time,
- int radius,
- float pca_threshold)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
- if(x < filter_area.z && y < filter_area.w) {
- ccl_global int *l_rank = rank + y*filter_area.z + x;
- ccl_global float *l_transform = transform + y*filter_area.z + x;
- kernel_filter_construct_transform(buffer,
- CCL_FILTER_TILE_INFO_ARG,
- x + filter_area.x, y + filter_area.y,
- rect,
- pass_stride, frame_stride,
- use_time,
- l_transform, l_rank,
- radius, pca_threshold,
- filter_area.z*filter_area.w,
- get_local_id(1)*get_local_size(0) + get_local_id(0));
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_restrict weight_image,
- const ccl_global float *ccl_restrict variance_image,
- const ccl_global float *ccl_restrict scale_image,
- ccl_global float *difference_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int channel_offset,
- int frame_offset,
- float a,
- float k_2)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w,
- weight_image,
- variance_image,
- scale_image,
- difference_image + ofs,
- rect, stride,
- channel_offset,
- frame_offset,
- a, k_2);
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict difference_image,
- ccl_global float *out_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_blur(co.x, co.y,
- difference_image + ofs,
- out_image + ofs,
- rect, stride, f);
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict difference_image,
- ccl_global float *out_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_calc_weight(co.x, co.y,
- difference_image + ofs,
- out_image + ofs,
- rect, stride, f);
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_restrict difference_image,
- const ccl_global float *ccl_restrict image,
- ccl_global float *out_image,
- ccl_global float *accum_image,
- int w,
- int h,
- int stride,
- int pass_stride,
- int channel_offset,
- int r,
- int f)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) {
- kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w,
- difference_image + ofs,
- image,
- out_image,
- accum_image,
- rect,
- channel_offset,
- stride, f);
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image,
- const ccl_global float *ccl_restrict accum_image,
- int w,
- int h,
- int stride)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
- if(x < w && y < h) {
- kernel_filter_nlm_normalize(x, y, out_image, accum_image, stride);
- }
-}
-
-__kernel void kernel_ocl_filter_nlm_construct_gramian(int t,
- const ccl_global float *ccl_restrict difference_image,
- const ccl_global float *ccl_restrict buffer,
- const ccl_global float *ccl_restrict transform,
- ccl_global int *rank,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int4 filter_window,
- int w,
- int h,
- int stride,
- int pass_stride,
- int r,
- int f,
- int frame_offset,
- char use_time)
-{
- int4 co, rect;
- int ofs;
- if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) {
- kernel_filter_nlm_construct_gramian(co.x, co.y,
- co.z, co.w,
- t,
- difference_image + ofs,
- buffer,
- transform, rank,
- XtWX, XtWY,
- rect, filter_window,
- stride, f,
- pass_stride,
- frame_offset,
- use_time,
- get_local_id(1)*get_local_size(0) + get_local_id(0));
- }
-}
-
-__kernel void kernel_ocl_filter_finalize(ccl_global float *buffer,
- ccl_global int *rank,
- ccl_global float *XtWX,
- ccl_global float3 *XtWY,
- int4 filter_area,
- int4 buffer_params,
- int sample)
-{
- int x = get_global_id(0);
- int y = get_global_id(1);
- if(x < filter_area.z && y < filter_area.w) {
- int storage_ofs = y*filter_area.z+x;
- rank += storage_ofs;
- XtWX += storage_ofs;
- XtWY += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank,
- filter_area.z*filter_area.w,
- XtWX, XtWY,
- buffer_params, sample);
- }
-}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
deleted file mode 100644
index ebdb99d4730..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_adaptive_adjust_samples.h"
-
-#define KERNEL_NAME adaptive_adjust_samples
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
deleted file mode 100644
index 76d82d4184e..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_adaptive_filter_x.h"
-
-#define KERNEL_NAME adaptive_filter_x
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
deleted file mode 100644
index 1e6d15ba0f2..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_adaptive_filter_y.h"
-
-#define KERNEL_NAME adaptive_filter_y
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
deleted file mode 100644
index 51de0059667..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_adaptive_stopping.h"
-
-#define KERNEL_NAME adaptive_stopping
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl
deleted file mode 100644
index 0e600676e82..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_background.cl
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/kernel_math.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-#include "kernel/kernel_color.h"
-#include "kernel/kernels/opencl/kernel_opencl_image.h"
-
-#include "kernel/kernel_path.h"
-#include "kernel/kernel_path_branched.h"
-
-#include "kernel/kernel_bake.h"
-
-__kernel void kernel_ocl_background(
- ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
-
- KERNEL_BUFFER_PARAMS,
-
- int type, int sx, int sw, int offset, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
-
- if(x < sx + sw) {
- kernel_background_evaluate(kg, input, output, x);
- }
-}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
deleted file mode 100644
index 7b81e387467..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/kernel_math.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-#include "kernel/kernel_color.h"
-#include "kernel/kernels/opencl/kernel_opencl_image.h"
-
-#include "kernel/kernel_path.h"
-#include "kernel/kernel_path_branched.h"
-
-#include "kernel/kernel_bake.h"
-
-__kernel void kernel_ocl_bake(
- ccl_constant KernelData *data,
- ccl_global float *buffer,
-
- KERNEL_BUFFER_PARAMS,
-
- int sx, int sy, int sw, int sh, int offset, int stride, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
- int y = sy + ccl_global_id(1);
-
- if(x < sx + sw && y < sy + sh) {
-#ifndef __NO_BAKING__
- kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
-#endif
- }
-}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_base.cl b/intern/cycles/kernel/kernels/opencl/kernel_base.cl
deleted file mode 100644
index 1c2d89e8a92..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_base.cl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* OpenCL base kernels entry points */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-
-#include "kernel/kernel_film.h"
-
-
-__kernel void kernel_ocl_convert_to_byte(
- ccl_constant KernelData *data,
- ccl_global uchar4 *rgba,
- ccl_global float *buffer,
-
- KERNEL_BUFFER_PARAMS,
-
- float sample_scale,
- int sx, int sy, int sw, int sh, int offset, int stride)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
- int y = sy + ccl_global_id(1);
-
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-__kernel void kernel_ocl_convert_to_half_float(
- ccl_constant KernelData *data,
- ccl_global uchar4 *rgba,
- ccl_global float *buffer,
-
- KERNEL_BUFFER_PARAMS,
-
- float sample_scale,
- int sx, int sy, int sw, int sh, int offset, int stride)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
- int y = sy + ccl_global_id(1);
-
- if(x < sx + sw && y < sy + sh)
- kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
-}
-
-__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, uint64_t size, uint64_t offset)
-{
- size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
-
- if(i < size / sizeof(float4)) {
- buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- else if(i == size / sizeof(float4)) {
- ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)];
-
- for(i = 0; i < size % sizeof(float4); i++) {
- *(b++) = 0;
- }
- }
-}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
deleted file mode 100644
index dcea2630aef..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_buffer_update.h"
-
-#define KERNEL_NAME buffer_update
-#define LOCALS_TYPE unsigned int
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
deleted file mode 100644
index 7125348a49f..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_data_init.h"
-
-__kernel void kernel_ocl_path_trace_data_init(
- ccl_global char *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- KERNEL_BUFFER_PARAMS,
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index, /* Tracks the number of elements in queues */
- int queuesize, /* size (capacity) of the queue */
- ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */
- ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */
- unsigned int num_samples, /* Total number of samples per pixel */
- ccl_global float *buffer)
-{
- kernel_data_init((KernelGlobals*)kg,
- data,
- split_data_buffer,
- num_elements,
- ray_state,
- KERNEL_BUFFER_ARGS,
- start_sample,
- end_sample,
- sx, sy, sw, sh, offset, stride,
- Queue_index,
- queuesize,
- use_queues_flag,
- work_pool_wgs,
- num_samples,
- buffer);
-}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
deleted file mode 100644
index ed64ae01aae..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_direct_lighting.h"
-
-#define KERNEL_NAME direct_lighting
-#define LOCALS_TYPE unsigned int
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
deleted file mode 100644
index 76cc36971f5..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl
+++ /dev/null
@@ -1,36 +0,0 @@
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/kernel_math.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-#include "kernel/kernel_color.h"
-#include "kernel/kernels/opencl/kernel_opencl_image.h"
-
-#include "kernel/kernel_path.h"
-#include "kernel/kernel_path_branched.h"
-
-#include "kernel/kernel_bake.h"
-
-__kernel void kernel_ocl_displace(
- ccl_constant KernelData *data,
- ccl_global uint4 *input,
- ccl_global float4 *output,
-
- KERNEL_BUFFER_PARAMS,
-
- int type, int sx, int sw, int offset, int sample)
-{
- KernelGlobals kglobals, *kg = &kglobals;
-
- kg->data = data;
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_info(kg);
-
- int x = sx + ccl_global_id(0);
-
- if(x < sx + sw) {
- kernel_displace_evaluate(kg, input, output, x);
- }
-}
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
deleted file mode 100644
index 8afaa686e28..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_do_volume.h"
-
-#define KERNEL_NAME do_volume
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
deleted file mode 100644
index e68d4104a91..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_enqueue_inactive.h"
-
-#define KERNEL_NAME enqueue_inactive
-#define LOCALS_TYPE unsigned int
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
deleted file mode 100644
index 9e1e57beba6..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
-
-#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao
-#define LOCALS_TYPE BackgroundAOLocals
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
deleted file mode 100644
index 192d01444ba..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_indirect_background.h"
-
-#define KERNEL_NAME indirect_background
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
deleted file mode 100644
index 84938b889e5..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_indirect_subsurface.h"
-
-#define KERNEL_NAME indirect_subsurface
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
deleted file mode 100644
index c314dc96c33..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_lamp_emission.h"
-
-#define KERNEL_NAME lamp_emission
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
deleted file mode 100644
index 8b1332bf013..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_next_iteration_setup.h"
-
-#define KERNEL_NAME next_iteration_setup
-#define LOCALS_TYPE unsigned int
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
deleted file mode 100644
index bb6b8a40e8e..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright 2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_NANOVDB
-/* Data type to replace `double` used in the NanoVDB headers. Cycles don't need doubles, and is
- * safer and more portable to never use double datatype on GPU.
- * Use a special structure, so that the following is true:
- * - No unnoticed implicit cast or mathematical operations used on scalar 64bit type
- * (which rules out trick like using `uint64_t` as a drop-in replacement for double).
- * - Padding rules are matching exactly `double`
- * (which rules out array of `uint8_t`). */
-typedef struct ccl_vdb_double_t {
- uint64_t i;
-} ccl_vdb_double_t;
-
-# define double ccl_vdb_double_t
-# include "nanovdb/CNanoVDB.h"
-# undef double
-#endif
-
-/* For OpenCL we do manual lookup and interpolation. */
-
-ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id)
-{
- const uint tex_offset = id
-#define KERNEL_TEX(type, name) +1
-#include "kernel/kernel_textures.h"
- ;
-
- return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset];
-}
-
-#define tex_fetch(type, info, index) \
- ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)]
-
-ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
-{
- x %= width;
- if (x < 0)
- x += width;
- return x;
-}
-
-ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
-{
- return clamp(x, 0, width - 1);
-}
-
-ccl_device_inline float4 svm_image_texture_read(
- KernelGlobals *kg, const ccl_global TextureInfo *info, void *acc, int x, int y, int z)
-{
- const int data_offset = x + info->width * y + info->width * info->height * z;
- const int texture_type = info->data_type;
-
- /* Float4 */
- if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
- return tex_fetch(float4, info, data_offset);
- }
- /* Byte4 */
- else if (texture_type == IMAGE_DATA_TYPE_BYTE4) {
- uchar4 r = tex_fetch(uchar4, info, data_offset);
- float f = 1.0f / 255.0f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
- /* Ushort4 */
- else if (texture_type == IMAGE_DATA_TYPE_USHORT4) {
- ushort4 r = tex_fetch(ushort4, info, data_offset);
- float f = 1.0f / 65535.f;
- return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
- }
- /* Float */
- else if (texture_type == IMAGE_DATA_TYPE_FLOAT) {
- float f = tex_fetch(float, info, data_offset);
- return make_float4(f, f, f, 1.0f);
- }
- /* UShort */
- else if (texture_type == IMAGE_DATA_TYPE_USHORT) {
- ushort r = tex_fetch(ushort, info, data_offset);
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
-#ifdef WITH_NANOVDB
- /* NanoVDB Float */
- else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
- cnanovdb_coord coord;
- coord.mVec[0] = x;
- coord.mVec[1] = y;
- coord.mVec[2] = z;
- float f = cnanovdb_readaccessor_getValueF((cnanovdb_readaccessor *)acc, &coord);
- return make_float4(f, f, f, 1.0f);
- }
- /* NanoVDB Float3 */
- else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
- cnanovdb_coord coord;
- coord.mVec[0] = x;
- coord.mVec[1] = y;
- coord.mVec[2] = z;
- cnanovdb_Vec3F f = cnanovdb_readaccessor_getValueF3((cnanovdb_readaccessor *)acc, &coord);
- return make_float4(f.mVec[0], f.mVec[1], f.mVec[2], 1.0f);
- }
-#endif
-#ifdef __KERNEL_CL_KHR_FP16__
- /* Half and Half4 are optional in OpenCL */
- else if (texture_type == IMAGE_DATA_TYPE_HALF) {
- float f = tex_fetch(half, info, data_offset);
- return make_float4(f, f, f, 1.0f);
- }
- else if (texture_type == IMAGE_DATA_TYPE_HALF4) {
- half4 r = tex_fetch(half4, info, data_offset);
- return make_float4(r.x, r.y, r.z, r.w);
- }
-#endif
- /* Byte */
- else {
- uchar r = tex_fetch(uchar, info, data_offset);
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-}
-
-ccl_device_inline float4
-svm_image_texture_read_2d(KernelGlobals *kg, int id, void *acc, int x, int y)
-{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-#ifdef WITH_NANOVDB
- if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
- info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
-#endif
- /* Wrap */
- if (info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- }
-#ifdef WITH_NANOVDB
- }
-#endif
-
- return svm_image_texture_read(kg, info, acc, x, y, 0);
-}
-
-ccl_device_inline float4
-svm_image_texture_read_3d(KernelGlobals *kg, int id, void *acc, int x, int y, int z)
-{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
-#ifdef WITH_NANOVDB
- if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
- info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
-#endif
- /* Wrap */
- if (info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- z = svm_image_texture_wrap_periodic(z, info->depth);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- z = svm_image_texture_wrap_clamp(z, info->depth);
- }
-#ifdef WITH_NANOVDB
- }
-#endif
-
- return svm_image_texture_read(kg, info, acc, x, y, z);
-}
-
-ccl_device_inline float svm_image_texture_frac(float x, int *ix)
-{
- int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
- *ix = i;
- return x - (float)i;
-}
-
-#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
- u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
- u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } \
- (void)0
-
-ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
-{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if (info->extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- if (info->interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy;
- svm_image_texture_frac(x * info->width, &ix);
- svm_image_texture_frac(y * info->height, &iy);
-
- return svm_image_texture_read_2d(kg, id, NULL, ix, iy);
- }
- else if (info->interpolation == INTERPOLATION_LINEAR) {
- /* Bilinear interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
-
- float4 r;
- r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy);
- r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy);
- r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy + 1);
- r += ty * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy + 1);
- return r;
- }
- else {
- /* Bicubic interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
-
- float u[4], v[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for (int y = 0; y < 4; y++) {
- for (int x = 0; x < 4; x++) {
- float weight = u[x] * v[y];
- r += weight * svm_image_texture_read_2d(kg, id, NULL, ix + x - 1, iy + y - 1);
- }
- }
- return r;
- }
-}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P, int interp)
-{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if (info->use_transform_3d) {
- Transform tfm = info->transform_3d;
- P = transform_point(&tfm, P);
- }
-
- float x = P.x;
- float y = P.y;
- float z = P.z;
-
- uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp;
-
-#ifdef WITH_NANOVDB
- cnanovdb_readaccessor acc;
- if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT ||
- info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
- ccl_global cnanovdb_griddata *grid =
- (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data);
- cnanovdb_readaccessor_init(&acc, cnanovdb_treedata_rootF(cnanovdb_griddata_tree(grid)));
- }
- else {
- if (info->extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- x *= info->width;
- y *= info->height;
- z *= info->depth;
- }
-# define NANOVDB_ACCESS_POINTER &acc
-#else
-# define NANOVDB_ACCESS_POINTER NULL
-#endif
-
- if (interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy, iz;
- svm_image_texture_frac(x, &ix);
- svm_image_texture_frac(y, &iy);
- svm_image_texture_frac(z, &iz);
-
- return svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz);
- }
- else if (interpolation == INTERPOLATION_LINEAR) {
- /* Trilinear interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x - 0.5f, &ix);
- float ty = svm_image_texture_frac(y - 0.5f, &iy);
- float tz = svm_image_texture_frac(z - 0.5f, &iz);
-
- float4 r;
- r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz);
- r += (1.0f - tz) * (1.0f - ty) * tx *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy, iz);
- r += (1.0f - tz) * ty * (1.0f - tx) *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy + 1, iz);
- r += (1.0f - tz) * ty * tx *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy + 1, iz);
-
- r += tz * (1.0f - ty) * (1.0f - tx) *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz + 1);
- r += tz * (1.0f - ty) * tx *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy, iz + 1);
- r += tz * ty * (1.0f - tx) *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy + 1, iz + 1);
- r += tz * ty * tx *
- svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy + 1, iz + 1);
- return r;
- }
- else {
- /* Tricubic interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x - 0.5f, &ix);
- float ty = svm_image_texture_frac(y - 0.5f, &iy);
- float tz = svm_image_texture_frac(z - 0.5f, &iz);
-
- float u[4], v[4], w[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for (int z = 0; z < 4; z++) {
- for (int y = 0; y < 4; y++) {
- for (int x = 0; x < 4; x++) {
- float weight = u[x] * v[y] * w[z];
- r += weight * svm_image_texture_read_3d(
- kg, id, NANOVDB_ACCESS_POINTER, ix + x - 1, iy + y - 1, iz + z - 1);
- }
- }
- }
- return r;
- }
-#undef NANOVDB_ACCESS_POINTER
-}
-
-#undef SET_CUBIC_SPLINE_WEIGHTS
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
deleted file mode 100644
index fa210e747c0..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_path_init.h"
-
-#define KERNEL_NAME path_init
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
deleted file mode 100644
index 68ee6f1d536..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_queue_enqueue.h"
-
-#define KERNEL_NAME queue_enqueue
-#define LOCALS_TYPE QueueEnqueueLocals
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
deleted file mode 100644
index 10d09377ba9..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_scene_intersect.h"
-
-#define KERNEL_NAME scene_intersect
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
deleted file mode 100644
index 40eaa561863..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_shader_eval.h"
-
-#define KERNEL_NAME shader_eval
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
deleted file mode 100644
index 8c36100f762..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_shader_setup.h"
-
-#define KERNEL_NAME shader_setup
-#define LOCALS_TYPE unsigned int
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
deleted file mode 100644
index bcacaa4a054..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_shader_sort.h"
-
-__attribute__((reqd_work_group_size(64, 1, 1)))
-#define KERNEL_NAME shader_sort
-#define LOCALS_TYPE ShaderSortLocals
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-#undef LOCALS_TYPE
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
deleted file mode 100644
index 8de250a375c..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_shadow_blocked_ao.h"
-
-#define KERNEL_NAME shadow_blocked_ao
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
deleted file mode 100644
index 29da77022ed..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_shadow_blocked_dl.h"
-
-#define KERNEL_NAME shadow_blocked_dl
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
deleted file mode 100644
index c3b7b09460a..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h" // PRECOMPILED
-#include "kernel/split/kernel_split_common.h" // PRECOMPILED
-
-#include "kernel/kernels/opencl/kernel_data_init.cl"
-#include "kernel/kernels/opencl/kernel_path_init.cl"
-#include "kernel/kernels/opencl/kernel_state_buffer_size.cl"
-#include "kernel/kernels/opencl/kernel_scene_intersect.cl"
-#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
-#include "kernel/kernels/opencl/kernel_shader_setup.cl"
-#include "kernel/kernels/opencl/kernel_shader_sort.cl"
-#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl"
-#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
-#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
-#include "kernel/kernels/opencl/kernel_buffer_update.cl"
-#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
-#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
-#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
-#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
deleted file mode 100644
index e123b4cd6ec..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define KERNEL_NAME_JOIN(a, b) a##_##b
-#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
-
-__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace,
- KERNEL_NAME)(ccl_global char *kg_global,
- ccl_constant KernelData *data,
-
- ccl_global void *split_data_buffer,
- ccl_global char *ray_state,
-
- KERNEL_BUFFER_PARAMS,
-
- ccl_global int *queue_index,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pools,
- ccl_global float *buffer)
-{
-#ifdef LOCALS_TYPE
- ccl_local LOCALS_TYPE locals;
-#endif
-
- KernelGlobals *kg = (KernelGlobals *)kg_global;
-
- if (ccl_local_id(0) + ccl_local_id(1) == 0) {
- kg->data = data;
-
- kernel_split_params.queue_index = queue_index;
- kernel_split_params.use_queues_flag = use_queues_flag;
- kernel_split_params.work_pools = work_pools;
- kernel_split_params.tile.buffer = buffer;
-
- split_data_init(kg,
- &kernel_split_state,
- ccl_global_size(0) * ccl_global_size(1),
- split_data_buffer,
- ray_state);
- }
-
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-
- KERNEL_NAME_EVAL(kernel, KERNEL_NAME)
- (kg
-#ifdef LOCALS_TYPE
- ,
- &locals
-#endif
- );
-}
-
-#undef KERNEL_NAME_JOIN
-#undef KERNEL_NAME_EVAL
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
deleted file mode 100644
index c10ecc426c6..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-
-__kernel void kernel_ocl_path_trace_state_buffer_size(
- ccl_global char *kg,
- ccl_constant KernelData *data,
- uint num_threads,
- ccl_global uint64_t *size)
-{
- ((KernelGlobals*)kg)->data = data;
- *size = split_data_buffer_size((KernelGlobals*)kg, num_threads);
-}
-
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
deleted file mode 100644
index 2b3be38df84..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/kernel_compat_opencl.h"
-#include "kernel/split/kernel_split_common.h"
-#include "kernel/split/kernel_subsurface_scatter.h"
-
-#define KERNEL_NAME subsurface_scatter
-#include "kernel/kernels/opencl/kernel_split_function.h"
-#undef KERNEL_NAME
-
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
deleted file mode 100644
index 7f609eab474..00000000000
--- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright 2019, NVIDIA Corporation.
- * Copyright 2019, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// clang-format off
-#include "kernel/kernel_compat_optix.h"
-#include "util/util_atomic.h"
-#include "kernel/kernel_types.h"
-#include "kernel/kernel_globals.h"
-#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics
-
-#include "kernel/kernel_path.h"
-#include "kernel/kernel_bake.h"
-// clang-format on
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
-{
- return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0());
-}
-template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
-{
- return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2());
-}
-
-template<bool always = false> ccl_device_forceinline uint get_object_id()
-{
-#ifdef __OBJECT_MOTION__
- // Always get the the instance ID from the TLAS
- // There might be a motion transform node between TLAS and BLAS which does not have one
- uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
-#else
- uint object = optixGetInstanceId();
-#endif
- // Choose between always returning object ID or only for instances
- if (always || (object & 1) == 0)
- // Can just remove the low bit since instance always contains object ID
- return object >> 1;
- else
- // Set to OBJECT_NONE if this is not an instanced object
- return OBJECT_NONE;
-}
-
-extern "C" __global__ void __raygen__kernel_optix_path_trace()
-{
- KernelGlobals kg; // Allocate stack storage for common data
-
- const uint3 launch_index = optixGetLaunchIndex();
- // Keep threads for same pixel together to improve occupancy of warps
- uint pixel_offset = launch_index.x / __params.tile.num_samples;
- uint sample_offset = launch_index.x % __params.tile.num_samples;
-
- kernel_path_trace(&kg,
- __params.tile.buffer,
- __params.tile.start_sample + sample_offset,
- __params.tile.x + pixel_offset,
- __params.tile.y + launch_index.y,
- __params.tile.offset,
- __params.tile.stride);
-}
-
-#ifdef __BAKING__
-extern "C" __global__ void __raygen__kernel_optix_bake()
-{
- KernelGlobals kg;
- const ShaderParams &p = __params.shader;
- kernel_bake_evaluate(&kg,
- p.input,
- p.output,
- (ShaderEvalType)p.type,
- p.filter,
- p.sx + optixGetLaunchIndex().x,
- p.offset,
- p.sample);
-}
-#endif
-
-extern "C" __global__ void __raygen__kernel_optix_displace()
-{
- KernelGlobals kg;
- const ShaderParams &p = __params.shader;
- kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
-}
-
-extern "C" __global__ void __raygen__kernel_optix_background()
-{
- KernelGlobals kg;
- const ShaderParams &p = __params.shader;
- kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x);
-}
-
-extern "C" __global__ void __miss__kernel_optix_miss()
-{
- // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
- optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
- optixSetPayload_5(PRIMITIVE_NONE);
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_local_hit()
-{
-#ifdef __BVH_LOCAL__
- const uint object = get_object_id<true>();
- if (object != optixGetPayload_4() /* local_object */) {
- // Only intersect with matching object
- return optixIgnoreIntersection();
- }
-
- const uint max_hits = optixGetPayload_5();
- if (max_hits == 0) {
- // Special case for when no hit information is requested, just report that something was hit
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- int hit = 0;
- uint *const lcg_state = get_payload_ptr_0<uint>();
- LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
-
- if (lcg_state) {
- for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if (optixGetRayTmax() == local_isect->hits[i].t) {
- return optixIgnoreIntersection();
- }
- }
-
- hit = local_isect->num_hits++;
-
- if (local_isect->num_hits > max_hits) {
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
- if (hit >= max_hits) {
- return optixIgnoreIntersection();
- }
- }
- }
- else {
- if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
- // Record closest intersection only
- // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
- return optixIgnoreIntersection();
- }
-
- local_isect->num_hits = 1;
- }
-
- Intersection *isect = &local_isect->hits[hit];
- isect->t = optixGetRayTmax();
- isect->prim = optixGetPrimitiveIndex();
- isect->object = get_object_id();
- isect->type = kernel_tex_fetch(__prim_type, isect->prim);
-
- const float2 barycentrics = optixGetTriangleBarycentrics();
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
-
- // Record geometric normal
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
- const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
- const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
- const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
- local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
- // Continue tracing (without this the trace call would return after the first hit)
- optixIgnoreIntersection();
-#endif
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
-{
-#ifdef __SHADOW_RECORD_ALL__
- const uint prim = optixGetPrimitiveIndex();
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-# endif
-
- // Offset into array with num_hits
- Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2();
- isect->t = optixGetRayTmax();
- isect->prim = prim;
- isect->object = get_object_id();
- isect->type = kernel_tex_fetch(__prim_type, prim);
-
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
- }
-# ifdef __HAIR__
- else {
- const float u = __uint_as_float(optixGetAttribute_0());
- isect->u = u;
- isect->v = __uint_as_float(optixGetAttribute_1());
-
- // Filter out curve endcaps
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
- }
-# endif
-
-# ifdef __TRANSPARENT_SHADOWS__
- // Detect if this surface has a shader with transparent shadows
- if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) {
-# endif
- // This is an opaque hit or the hit limit has been reached, abort traversal
- optixSetPayload_5(true);
- return optixTerminateRay();
-# ifdef __TRANSPARENT_SHADOWS__
- }
-
- optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++
-
- // Continue tracing
- optixIgnoreIntersection();
-# endif
-#endif
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
- uint visibility = optixGetPayload_4();
-#ifdef __VISIBILITY_FLAG__
- const uint prim = optixGetPrimitiveIndex();
- if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
-#ifdef __HAIR__
- if (!optixIsTriangleHit()) {
- // Filter out curve endcaps
- const float u = __uint_as_float(optixGetAttribute_0());
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
- }
-#endif
-
- // Shadow ray early termination
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return optixTerminateRay();
- }
-}
-
-extern "C" __global__ void __closesthit__kernel_optix_hit()
-{
- optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
- optixSetPayload_3(optixGetPrimitiveIndex());
- optixSetPayload_4(get_object_id());
- // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
- optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
-
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
- optixSetPayload_2(__float_as_uint(barycentrics.x));
- }
- else {
- optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()'
- optixSetPayload_2(optixGetAttribute_1());
- }
-}
-
-#ifdef __HAIR__
-ccl_device_inline void optix_intersection_curve(const uint prim, const uint type)
-{
- const uint object = get_object_id<true>();
- const uint visibility = optixGetPayload_4();
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
-
- // The direction is not normalized by default, but the curve intersection routine expects that
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- // Transform maximum distance into object space
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- if (curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
- optixReportIntersection(isect.t / len,
- type & PRIMITIVE_ALL,
- __float_as_int(isect.u), // Attribute_0
- __float_as_int(isect.v)); // Attribute_1
- }
-}
-
-extern "C" __global__ void __intersection__curve_ribbon()
-{
- const uint prim = optixGetPrimitiveIndex();
- const uint type = kernel_tex_fetch(__prim_type, prim);
-
- if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
- optix_intersection_curve(prim, type);
- }
-}
-
-extern "C" __global__ void __intersection__curve_all()
-{
- const uint prim = optixGetPrimitiveIndex();
- const uint type = kernel_tex_fetch(__prim_type, prim);
- optix_intersection_curve(prim, type);
-}
-#endif