diff options
Diffstat (limited to 'intern/cycles/kernel/kernels')
61 files changed, 0 insertions, 5364 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp deleted file mode 100644 index 145a6b6ac40..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CPU kernel entry points */ - -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this - * one with SSE2 intrinsics. - */ -#if defined(__x86_64__) || defined(_M_X64) -# define __KERNEL_SSE2__ -#endif - -/* When building kernel for native machine detect kernel features from the flags - * set by compiler. - */ -#ifdef WITH_KERNEL_NATIVE -# ifdef __SSE2__ -# ifndef __KERNEL_SSE2__ -# define __KERNEL_SSE2__ -# endif -# endif -# ifdef __SSE3__ -# define __KERNEL_SSE3__ -# endif -# ifdef __SSSE3__ -# define __KERNEL_SSSE3__ -# endif -# ifdef __SSE4_1__ -# define __KERNEL_SSE41__ -# endif -# ifdef __AVX__ -# define __KERNEL_SSE__ -# define __KERNEL_AVX__ -# endif -# ifdef __AVX2__ -# define __KERNEL_SSE__ -# define __KERNEL_AVX2__ -# endif -#endif - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) -/* do nothing */ -#endif - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp deleted file mode 100644 index 012daba62d8..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu_avx -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp deleted file mode 100644 index 16351a7f949..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# define __KERNEL_AVX2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu_avx2 -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h deleted file mode 100644 index 1423b182ab8..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Templated common declaration part of all CPU kernels. */ - -void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, - TileInfo *tile_info, - int x, - int y, - float *unfilteredA, - float *unfilteredB, - float *sampleV, - float *sampleVV, - float *bufferV, - int *prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset); - -void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, - TileInfo *tile_info, - int m_offset, - int v_offset, - int x, - int y, - float *mean, - float *variance, - float scale, - int *prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset); - -void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, - int x, - int y, - int *buffer_params, - float *from, - float *buffer, - int out_offset, - int *prefilter_rect); - -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, - int y, - ccl_global float *image, - ccl_global float *variance, - ccl_global float *depth, - ccl_global float *output, - int *rect, - int pass_stride); - -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( - int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r); - -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, - TileInfo *tiles, - int x, - int y, - int storage_ofs, - float *transform, - int *rank, - int *rect, - int pass_stride, - int frame_stride, - bool use_time, - int radius, - float pca_threshold); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, - int dy, - float *weight_image, - float *variance_image, - float *scale_image, - float *difference_image, - int *rect, - int stride, - int channel_offset, - int frame_offset, - float a, - float k_2); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( - float *difference_image, float *out_image, int *rect, int stride, int f); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( - float *difference_image, float *out_image, int *rect, int stride, int f); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, - int dy, - float *difference_image, - float *image, - float *temp_image, - float *out_image, - float *accum_image, - int *rect, - int channel_offset, - int stride, - int f); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, - int dy, - int t, - float *difference_image, - float *buffer, - float *transform, - int *rank, - float *XtWX, - float3 *XtWY, - int *rect, - int *filter_window, - int stride, - int f, - int pass_stride, - int frame_offset, - bool use_time); - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, - float *accum_image, - int *rect, - int stride); - -void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, - int y, - int storage_ofs, - float *buffer, - int *rank, - float *XtWX, - float3 *XtWY, - int *buffer_params, - int sample); - -#undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h deleted file mode 100644 index 3d4cb87e104..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Templated common implementation part of all CPU kernels. - * - * The idea is that particular .cpp files sets needed optimization flags and - * simply includes this file without worry of copying actual implementation over. - */ - -#include "kernel/kernel_compat_cpu.h" - -#include "kernel/filter/filter_kernel.h" - -#ifdef KERNEL_STUB -# define STUB_ASSERT(arch, name) \ - assert(!(#name " kernel stub for architecture " #arch " was called!")) -#endif - -CCL_NAMESPACE_BEGIN - -/* Denoise filter */ - -void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, - TileInfo *tile_info, - int x, - int y, - float *unfilteredA, - float *unfilteredB, - float *sampleVariance, - float *sampleVarianceV, - float *bufferVariance, - int *prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); -#else - kernel_filter_divide_shadow(sample, - tile_info, - x, - y, - unfilteredA, - unfilteredB, - sampleVariance, - sampleVarianceV, - bufferVariance, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, - TileInfo *tile_info, - int m_offset, - int v_offset, - int x, - int y, - float *mean, - float *variance, - float scale, - int *prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_get_feature); -#else - kernel_filter_get_feature(sample, - tile_info, - m_offset, - v_offset, - x, - y, - mean, - variance, - scale, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, - int x, - int y, - int *buffer_params, - float *from, - float *buffer, - int out_offset, - int *prefilter_rect) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_write_feature); -#else - kernel_filter_write_feature( - sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, - int y, - ccl_global float *image, - ccl_global float *variance, - ccl_global float *depth, - ccl_global float *output, - int *rect, - int pass_stride) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); -#else - kernel_filter_detect_outliers( - x, y, image, variance, depth, output, load_int4(rect), pass_stride); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( - int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); -#else - kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, - TileInfo *tile_info, - int x, - int y, - int storage_ofs, - float *transform, - int *rank, - int *prefilter_rect, - int pass_stride, - int frame_stride, - bool use_time, - int radius, - float pca_threshold) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); -#else - rank += storage_ofs; - transform += storage_ofs * TRANSFORM_SIZE; - kernel_filter_construct_transform(buffer, - tile_info, - x, - y, - load_int4(prefilter_rect), - pass_stride, - frame_stride, - use_time, - transform, - rank, - radius, - pca_threshold); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, - int dy, - float *weight_image, - float *variance_image, - float *scale_image, - float *difference_image, - int *rect, - int stride, - int channel_offset, - int frame_offset, - float a, - float k_2) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); -#else - kernel_filter_nlm_calc_difference(dx, - dy, - weight_image, - variance_image, - scale_image, - difference_image, - load_int4(rect), - stride, - channel_offset, - frame_offset, - a, - k_2); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( - float *difference_image, float *out_image, int *rect, int stride, int f) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); -#else - kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( - float *difference_image, float *out_image, int *rect, int stride, int f) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); -#else - kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, - int dy, - float *difference_image, - float *image, - float *temp_image, - float *out_image, - float *accum_image, - int *rect, - int channel_offset, - int stride, - int f) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); -#else - kernel_filter_nlm_update_output(dx, - dy, - difference_image, - image, - temp_image, - out_image, - accum_image, - load_int4(rect), - channel_offset, - stride, - f); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, - int dy, - int t, - float *difference_image, - float *buffer, - float *transform, - int *rank, - float *XtWX, - float3 *XtWY, - int *rect, - int *filter_window, - int stride, - int f, - int pass_stride, - int frame_offset, - bool use_time) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); -#else - kernel_filter_nlm_construct_gramian(dx, - dy, - t, - difference_image, - buffer, - transform, - rank, - XtWX, - XtWY, - load_int4(rect), - load_int4(filter_window), - stride, - f, - pass_stride, - frame_offset, - use_time); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, - float *accum_image, - int *rect, - int stride) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); -#else - kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride); -#endif -} - -void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, - int y, - int storage_ofs, - float *buffer, - int *rank, - float *XtWX, - float3 *XtWY, - int *buffer_params, - int sample) -{ -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_finalize); -#else - XtWX += storage_ofs * XTWX_SIZE; - XtWY += storage_ofs * XTWY_SIZE; - rank += storage_ofs; - kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); -#endif -} - -#undef KERNEL_STUB -#undef STUB_ASSERT -#undef KERNEL_ARCH - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp deleted file mode 100644 index 75833d83648..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu_sse2 -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp deleted file mode 100644 index c998cd54d3a..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu_sse3 -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp deleted file mode 100644 index fc4ef1fca5b..00000000000 --- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ - -#include "kernel/filter/filter.h" -#define KERNEL_ARCH cpu_sse41 -#include "kernel/kernels/cpu/filter_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp deleted file mode 100644 index 8040bfb7b33..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CPU kernel entry points */ - -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this - * one with SSE2 intrinsics. - */ -#if defined(__x86_64__) || defined(_M_X64) -# define __KERNEL_SSE2__ -#endif - -/* When building kernel for native machine detect kernel features from the flags - * set by compiler. - */ -#ifdef WITH_KERNEL_NATIVE -# ifdef __SSE2__ -# ifndef __KERNEL_SSE2__ -# define __KERNEL_SSE2__ -# endif -# endif -# ifdef __SSE3__ -# define __KERNEL_SSE3__ -# endif -# ifdef __SSSE3__ -# define __KERNEL_SSSE3__ -# endif -# ifdef __SSE4_1__ -# define __KERNEL_SSE41__ -# endif -# ifdef __AVX__ -# define __KERNEL_SSE__ -# define __KERNEL_AVX__ -# endif -# ifdef __AVX2__ -# define __KERNEL_SSE__ -# define __KERNEL_AVX2__ -# endif -#endif - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) -/* do nothing */ -#endif - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu -#include "kernel/kernels/cpu/kernel_cpu_impl.h" - -CCL_NAMESPACE_BEGIN - -/* Memory Copy */ - -void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t) -{ - if (strcmp(name, "__data") == 0) { - kg->__data = *(KernelData *)host; - } - else { - assert(0); - } -} - -void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) -{ - if (0) { - } - -#define KERNEL_TEX(type, tname) \ - else if (strcmp(name, #tname) == 0) \ - { \ - kg->tname.data = (type *)mem; \ - kg->tname.width = size; \ - } -#include "kernel/kernel_textures.h" - else { - assert(0); - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp deleted file mode 100644 index 5f6b6800363..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_avx -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp deleted file mode 100644 index 97e8fc25140..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2011-2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# define __KERNEL_AVX2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_avx2 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h deleted file mode 100644 index ea3103f12c3..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Templated common declaration part of all CPU kernels. */ - -void KERNEL_FUNCTION_FULL_NAME(path_trace)( - KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); - -void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, - uchar4 *rgba, - float *buffer, - float sample_scale, - int x, - int y, - int offset, - int stride); - -void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, - uchar4 *rgba, - float *buffer, - float sample_scale, - int x, - int y, - int offset, - int stride); - -void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, - uint4 *input, - float4 *output, - int type, - int filter, - int i, - int offset, - int sample); - -void KERNEL_FUNCTION_FULL_NAME(bake)( - KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); - -/* Split kernels */ - -void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, - int start_sample, - int end_sample, - int sx, - int sy, - int sw, - int sh, - int offset, - int stride, - ccl_global int *Queue_index, - int queuesize, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, - ccl_global float *buffer); - -#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data); - -DECLARE_SPLIT_KERNEL_FUNCTION(path_init) -DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) -DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) -DECLARE_SPLIT_KERNEL_FUNCTION(do_volume) -DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) -DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background) -DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup) -DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort) -DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) -DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) -DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) -DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) -DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) -DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) -DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) -DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) -DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) -DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) - -#undef KERNEL_ARCH diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h deleted file mode 100644 index 59b96c86c50..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ /dev/null @@ -1,660 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __KERNEL_CPU_IMAGE_H__ -#define __KERNEL_CPU_IMAGE_H__ - -#ifdef WITH_NANOVDB -# define NANOVDB_USE_INTRINSICS -# include <nanovdb/NanoVDB.h> -# include <nanovdb/util/SampleFromVoxels.h> -#endif - -CCL_NAMESPACE_BEGIN - -/* Make template functions private so symbols don't conflict between kernels with different - * instruction sets. */ -namespace { - -#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ - u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ - u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } \ - (void)0 - -ccl_device_inline float frac(float x, int *ix) -{ - int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); - *ix = i; - return x - (float)i; -} - -template<typename T> struct TextureInterpolator { - - static ccl_always_inline float4 read(float4 r) - { - return r; - } - - static ccl_always_inline float4 read(uchar4 r) - { - float f = 1.0f / 255.0f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - - static ccl_always_inline float4 read(uchar r) - { - float f = r * (1.0f / 255.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(float r) - { - /* TODO(dingto): Optimize this, so interpolation - * happens on float instead of float4 */ - return make_float4(r, r, r, 1.0f); - } - - static ccl_always_inline float4 read(half4 r) - { - return half4_to_float4(r); - } - - static ccl_always_inline float4 read(half r) - { - float f = half_to_float(r); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(uint16_t r) - { - float f = r * (1.0f / 65535.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(ushort4 r) - { - float f = 1.0f / 65535.0f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - - static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height) - { - if (x < 0 || y < 0 || x >= width || y >= height) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[y * width + x]); - } - - static ccl_always_inline int wrap_periodic(int x, int width) - { - x %= width; - if (x < 0) - x += width; - return x; - } - - static ccl_always_inline int wrap_clamp(int x, int width) - { - return clamp(x, 0, width - 1); - } - - /* ******** 2D interpolation ******** */ - - static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y) - { - const T *data = (const T *)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy; - frac(x * (float)width, &ix); - frac(y * (float)height, &iy); - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - break; - case EXTENSION_CLIP: - if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[ix + iy * width]); - } - - static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y) - { - const T *data = (const T *)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x * (float)width - 0.5f, &ix); - const float ty = frac(y * (float)height - 0.5f, &iy); - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - nix = wrap_periodic(ix + 1, width); - niy = wrap_periodic(iy + 1, height); - break; - case EXTENSION_CLIP: - nix = ix + 1; - niy = iy + 1; - break; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix + 1, width); - niy = wrap_clamp(iy + 1, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) + - (1.0f - ty) * tx * read(data, nix, iy, width, height) + - ty * (1.0f - tx) * read(data, ix, niy, width, height) + - ty * tx * read(data, nix, niy, width, height); - } - - static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y) - { - const T *data = (const T *)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x * (float)width - 0.5f, &ix); - const float ty = frac(y * (float)height - 0.5f, &iy); - int pix, piy, nnix, nniy; - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - pix = wrap_periodic(ix - 1, width); - piy = wrap_periodic(iy - 1, height); - nix = wrap_periodic(ix + 1, width); - niy = wrap_periodic(iy + 1, height); - nnix = wrap_periodic(ix + 2, width); - nniy = wrap_periodic(iy + 2, height); - break; - case EXTENSION_CLIP: - pix = ix - 1; - piy = iy - 1; - nix = ix + 1; - niy = iy + 1; - nnix = ix + 2; - nniy = iy + 2; - break; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix - 1, width); - piy = wrap_clamp(iy - 1, height); - nix = wrap_clamp(ix + 1, width); - niy = wrap_clamp(iy + 1, height); - nnix = wrap_clamp(ix + 2, width); - nniy = wrap_clamp(iy + 2, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {piy, iy, niy, nniy}; - float u[4], v[4]; - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y) (read(data, xc[x], yc[y], width, height)) -#define TERM(col) \ - (v[col] * \ - (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - /* Actual interpolation. */ - return TERM(0) + TERM(1) + TERM(2) + TERM(3); -#undef TERM -#undef DATA - } - - static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y) - { - if (UNLIKELY(!info.data)) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - switch (info.interpolation) { - case INTERPOLATION_CLOSEST: - return interp_closest(info, x, y); - case INTERPOLATION_LINEAR: - return interp_linear(info, x, y); - default: - return interp_cubic(info, x, y); - } - } - - /* ******** 3D interpolation ******** */ - - static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info, - float x, - float y, - float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - - frac(x * (float)width, &ix); - frac(y * (float)height, &iy); - frac(z * (float)depth, &iz); - - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T *)info.data; - return read(data[ix + iy * width + iz * width * height]); - } - - static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info, - float x, - float y, - float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - - float tx = frac(x * (float)width - 0.5f, &ix); - float ty = frac(y * (float)height - 0.5f, &iy); - float tz = frac(z * (float)depth - 0.5f, &iz); - - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix + 1, width); - niy = wrap_periodic(iy + 1, height); - niz = wrap_periodic(iz + 1, depth); - break; - case EXTENSION_CLIP: - if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix + 1, width); - niy = wrap_clamp(iy + 1, height); - niz = wrap_clamp(iz + 1, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T *)info.data; - float4 r; - - r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * - read(data[ix + iy * width + iz * width * height]); - r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]); - r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]); - r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]); - - r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]); - r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]); - r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]); - r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]); - - return r; - } - - /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are - * causing stack overflow issue in this function unless it is inlined. - * - * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization - * enabled. - */ -#if defined(__GNUC__) || defined(__clang__) - static ccl_always_inline -#else - static ccl_never_inline -#endif - float4 - interp_3d_cubic(const TextureInfo &info, float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x * (float)width - 0.5f, &ix); - const float ty = frac(y * (float)height - 0.5f, &iy); - const float tz = frac(z * (float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch (info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - pix = wrap_periodic(ix - 1, width); - piy = wrap_periodic(iy - 1, height); - piz = wrap_periodic(iz - 1, depth); - - nix = wrap_periodic(ix + 1, width); - niy = wrap_periodic(iy + 1, height); - niz = wrap_periodic(iz + 1, depth); - - nnix = wrap_periodic(ix + 2, width); - nniy = wrap_periodic(iy + 2, height); - nniz = wrap_periodic(iz + 2, depth); - break; - case EXTENSION_CLIP: - if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix - 1, width); - piy = wrap_clamp(iy - 1, height); - piz = wrap_clamp(iz - 1, depth); - - nix = wrap_clamp(ix + 1, width); - niy = wrap_clamp(iy + 1, height); - niz = wrap_clamp(iz + 1, depth); - - nnix = wrap_clamp(ix + 2, width); - nniy = wrap_clamp(iy + 2, height); - nniz = wrap_clamp(iz + 2, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, width * iy, width * niy, width * nniy}; - const int zc[4] = { - width * height * piz, width * height * iz, width * height * niz, width * height * nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) -#define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) -#define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - /* Actual interpolation. */ - const T *data = (const T *)info.data; - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); - -#undef COL_TERM -#undef ROW_TERM -#undef DATA - } - - static ccl_always_inline float4 - interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp) - { - if (UNLIKELY(!info.data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { - case INTERPOLATION_CLOSEST: - return interp_3d_closest(info, x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_linear(info, x, y, z); - default: - return interp_3d_cubic(info, x, y, z); - } - } -}; - -#ifdef WITH_NANOVDB -template<typename T> struct NanoVDBInterpolator { - - typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType; - - static ccl_always_inline float4 read(float r) - { - return make_float4(r, r, r, 1.0f); - } - - static ccl_always_inline float4 read(nanovdb::Vec3f r) - { - return make_float4(r[0], r[1], r[2], 1.0f); - } - - static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc, - float x, - float y, - float z) - { - const nanovdb::Vec3f xyz(x, y, z); - return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz)); - } - - static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc, - float x, - float y, - float z) - { - const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f); - return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz)); - } - -# if defined(__GNUC__) || defined(__clang__) - static ccl_always_inline -# else - static ccl_never_inline -# endif - float4 - interp_3d_cubic(const AccessorType &acc, float x, float y, float z) - { - int ix, iy, iz; - int nix, niy, niz; - int pix, piy, piz; - int nnix, nniy, nniz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x - 0.5f, &ix); - const float ty = frac(y - 0.5f, &iy); - const float tz = frac(z - 0.5f, &iz); - pix = ix - 1; - piy = iy - 1; - piz = iz - 1; - nix = ix + 1; - niy = iy + 1; - niz = iz + 1; - nnix = ix + 2; - nniy = iy + 2; - nniz = iz + 2; - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {piy, iy, niy, nniy}; - const int zc[4] = {piz, iz, niz, nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ -# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z])))) -# define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) -# define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); - -# undef COL_TERM -# undef ROW_TERM -# undef DATA - } - - static ccl_always_inline float4 - interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp) - { - using namespace nanovdb; - - NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; - AccessorType acc = grid->getAccessor(); - - switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { - case INTERPOLATION_CLOSEST: - return interp_3d_closest(acc, x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_linear(acc, x, y, z); - default: - return interp_3d_cubic(acc, x, y, z); - } - } -}; -#endif - -#undef SET_CUBIC_SPLINE_WEIGHTS - -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) -{ - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - - switch (info.data_type) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp(info, x, y); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp(info, x, y); - default: - assert(0); - return make_float4( - TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } -} - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, - int id, - float3 P, - InterpolationType interp) -{ - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - - if (info.use_transform_3d) { - P = transform_point(&info.transform_3d, P); - } - - switch (info.data_type) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp); -#ifdef WITH_NANOVDB - case IMAGE_DATA_TYPE_NANOVDB_FLOAT: - return NanoVDBInterpolator<float>::interp_3d(info, P.x, P.y, P.z, interp); - case IMAGE_DATA_TYPE_NANOVDB_FLOAT3: - return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp); -#endif - default: - assert(0); - return make_float4( - TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } -} - -} /* Namespace. */ - -CCL_NAMESPACE_END - -#endif // __KERNEL_CPU_IMAGE_H__ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h deleted file mode 100644 index 51d6c23f72f..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Templated common implementation part of all CPU kernels. - * - * The idea is that particular .cpp files sets needed optimization flags and - * simply includes this file without worry of copying actual implementation over. - */ - -// clang-format off -#include "kernel/kernel_compat_cpu.h" - -#ifndef KERNEL_STUB -# ifndef __SPLIT_KERNEL__ -# include "kernel/kernel_math.h" -# include "kernel/kernel_types.h" - -# include "kernel/split/kernel_split_data.h" -# include "kernel/kernel_globals.h" - -# include "kernel/kernel_color.h" -# include "kernel/kernels/cpu/kernel_cpu_image.h" -# include "kernel/kernel_film.h" -# include "kernel/kernel_path.h" -# include "kernel/kernel_path_branched.h" -# include "kernel/kernel_bake.h" -# else -# include "kernel/split/kernel_split_common.h" - -# include "kernel/split/kernel_data_init.h" -# include "kernel/split/kernel_path_init.h" -# include "kernel/split/kernel_scene_intersect.h" -# include "kernel/split/kernel_lamp_emission.h" -# include "kernel/split/kernel_do_volume.h" -# include "kernel/split/kernel_queue_enqueue.h" -# include "kernel/split/kernel_indirect_background.h" -# include "kernel/split/kernel_shader_setup.h" -# include "kernel/split/kernel_shader_sort.h" -# include "kernel/split/kernel_shader_eval.h" -# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -# include "kernel/split/kernel_subsurface_scatter.h" -# include "kernel/split/kernel_direct_lighting.h" -# include "kernel/split/kernel_shadow_blocked_ao.h" -# include "kernel/split/kernel_shadow_blocked_dl.h" -# include "kernel/split/kernel_enqueue_inactive.h" -# include "kernel/split/kernel_next_iteration_setup.h" -# include "kernel/split/kernel_indirect_subsurface.h" -# include "kernel/split/kernel_buffer_update.h" -# include "kernel/split/kernel_adaptive_stopping.h" -# include "kernel/split/kernel_adaptive_filter_x.h" -# include "kernel/split/kernel_adaptive_filter_y.h" -# include "kernel/split/kernel_adaptive_adjust_samples.h" -# endif /* __SPLIT_KERNEL__ */ -#else -# define STUB_ASSERT(arch, name) \ - assert(!(#name " kernel stub for architecture " #arch " was called!")) - -# ifdef __SPLIT_KERNEL__ -# include "kernel/split/kernel_data_init.h" -# endif /* __SPLIT_KERNEL__ */ -#endif /* KERNEL_STUB */ -// clang-format on - -CCL_NAMESPACE_BEGIN - -#ifndef __SPLIT_KERNEL__ - -/* Path Tracing */ - -void KERNEL_FUNCTION_FULL_NAME(path_trace)( - KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) -{ -# ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, path_trace); -# else -# ifdef __BRANCHED_PATH__ - if (kernel_data.integrator.branched) { - kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride); - } - else -# endif - { - kernel_path_trace(kg, buffer, sample, x, y, offset, stride); - } -# endif /* KERNEL_STUB */ -} - -/* Film */ - -void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, - uchar4 *rgba, - float *buffer, - float sample_scale, - int x, - int y, - int offset, - int stride) -{ -# ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_byte); -# else - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -# endif /* KERNEL_STUB */ -} - -void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, - uchar4 *rgba, - float *buffer, - float sample_scale, - int x, - int y, - int offset, - int stride) -{ -# ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); -# else - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -# endif /* KERNEL_STUB */ -} - -/* Bake */ - -void KERNEL_FUNCTION_FULL_NAME(bake)( - KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) -{ -# ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, bake); -# else -# ifdef __BAKING__ - kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride); -# endif -# endif /* KERNEL_STUB */ -} - -/* Shader Evaluate */ - -void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, - uint4 *input, - float4 *output, - int type, - int filter, - int i, - int offset, - int sample) -{ -# ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, shader); -# else - if (type == SHADER_EVAL_DISPLACE) { - kernel_displace_evaluate(kg, input, output, i); - } - else { - kernel_background_evaluate(kg, input, output, i); - } -# endif /* KERNEL_STUB */ -} - -#else /* __SPLIT_KERNEL__ */ - -/* Split Kernel Path Tracing */ - -# ifdef KERNEL_STUB -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } -# else -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ - { \ - kernel_##name(kg); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ - { \ - ccl_local type locals; \ - kernel_##name(kg, &locals); \ - } -# endif /* KERNEL_STUB */ - -DEFINE_SPLIT_KERNEL_FUNCTION(path_init) -DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) -DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) -DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, - BackgroundAOLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) -#endif /* __SPLIT_KERNEL__ */ - -#undef KERNEL_STUB -#undef STUB_ASSERT -#undef KERNEL_ARCH - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp deleted file mode 100644 index 989f5e5aaa8..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CPU kernel entry points */ - -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this - * one with SSE2 intrinsics. - */ -#if defined(__x86_64__) || defined(_M_X64) -# define __KERNEL_SSE2__ -#endif - -#define __SPLIT_KERNEL__ - -/* When building kernel for native machine detect kernel features from the flags - * set by compiler. - */ -#ifdef WITH_KERNEL_NATIVE -# ifdef __SSE2__ -# ifndef __KERNEL_SSE2__ -# define __KERNEL_SSE2__ -# endif -# endif -# ifdef __SSE3__ -# define __KERNEL_SSE3__ -# endif -# ifdef __SSSE3__ -# define __KERNEL_SSSE3__ -# endif -# ifdef __SSE4_1__ -# define __KERNEL_SSE41__ -# endif -# ifdef __AVX__ -# define __KERNEL_AVX__ -# endif -# ifdef __AVX2__ -# define __KERNEL_SSE__ -# define __KERNEL_AVX2__ -# endif -#endif - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) -/* do nothing */ -#endif - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp deleted file mode 100644 index 40e485d27c0..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#define __SPLIT_KERNEL__ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_avx -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp deleted file mode 100644 index 8c44238470e..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2011-2014 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with AVX2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#define __SPLIT_KERNEL__ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# define __KERNEL_AVX__ -# define __KERNEL_AVX2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_avx2 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp deleted file mode 100644 index 7a3f218d5fc..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#define __SPLIT_KERNEL__ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse2 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp deleted file mode 100644 index 1cab59e0ea0..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#define __SPLIT_KERNEL__ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse3 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp deleted file mode 100644 index 637126d9d4c..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#define __SPLIT_KERNEL__ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse41 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp deleted file mode 100644 index 26d7fd4de48..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE2 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse2 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp deleted file mode 100644 index 3f259aa4480..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse3 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp deleted file mode 100644 index 68bae8c07c6..00000000000 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/util_optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug T36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE41__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ - -#include "kernel/kernel.h" -#define KERNEL_ARCH cpu_sse41 -#include "kernel/kernels/cpu/kernel_cpu_impl.h" diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu deleted file mode 100644 index 6c9642d1f03..00000000000 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CUDA kernel entry points */ - -#ifdef __CUDA_ARCH__ - -#include "kernel_config.h" - -#include "kernel/kernel_compat_cuda.h" - -#include "kernel/filter/filter_kernel.h" - -/* kernels */ - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_copy_input(float *buffer, - CCL_FILTER_TILE_INFO, - int4 prefilter_rect, - int buffer_pass_stride) -{ - int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; - int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; - if(x < prefilter_rect.z && y < prefilter_rect.w) { - int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); - int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); - int itile = ytile * 3 + xtile; - float *const in = ((float *)ccl_get_tile_buffer(itile)) + - (tile_info->offsets[itile] + y * tile_info->strides[itile] + x) * buffer_pass_stride; - buffer += ((y - prefilter_rect.y) * (prefilter_rect.z - prefilter_rect.x) + (x - prefilter_rect.x)) * buffer_pass_stride; - for (int i = 0; i < buffer_pass_stride; ++i) - buffer[i] = in[i]; - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int stride, int pass_stride, int3 pass_offset, int num_inputs, int num_samples) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < sw && y < sh) { - if (num_inputs > 0) { - float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float); - float *out = rgb + (x + y * sw) * 3; - out[0] = clamp(in[0] / num_samples, 0.0f, 10000.0f); - out[1] = clamp(in[1] / num_samples, 0.0f, 10000.0f); - out[2] = clamp(in[2] / num_samples, 0.0f, 10000.0f); - } - if (num_inputs > 1) { - float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float); - float *out = rgb + (x + y * sw) * 3 + (sw * sh) * 3; - out[0] = in[0] / num_samples; - out[1] = in[1] / num_samples; - out[2] = in[2] / num_samples; - } - if (num_inputs > 2) { - float *in = buf + x * pass_stride + (y * stride + pass_offset.z) / sizeof(float); - float *out = rgb + (x + y * sw) * 3 + (sw * sh * 2) * 3; - out[0] = in[0] / num_samples; - out[1] = in[1] / num_samples; - out[2] = in[2] / num_samples; - } - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_convert_from_rgb(float *rgb, float *buf, int ix, int iy, int iw, int ih, int sx, int sy, int sw, int sh, int offset, int stride, int pass_stride, int num_samples) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < sw && y < sh) { - float *in = rgb + ((ix + x) + (iy + y) * iw) * 3; - float *out = buf + (offset + (sx + x) + (sy + y) * stride) * pass_stride; - out[0] = in[0] * num_samples; - out[1] = in[1] * num_samples; - out[2] = in[2] * num_samples; - } -} - - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_divide_shadow(int sample, - CCL_FILTER_TILE_INFO, - float *unfilteredA, - float *unfilteredB, - float *sampleVariance, - float *sampleVarianceV, - float *bufferVariance, - int4 prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ - int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; - int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_divide_shadow(sample, - tile_info, - x, y, - unfilteredA, - unfilteredB, - sampleVariance, - sampleVarianceV, - bufferVariance, - prefilter_rect, - buffer_pass_stride, - buffer_denoising_offset); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_get_feature(int sample, - CCL_FILTER_TILE_INFO, - int m_offset, - int v_offset, - float *mean, - float *variance, - float scale, - int4 prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ - int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; - int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_get_feature(sample, - tile_info, - m_offset, v_offset, - x, y, - mean, variance, - scale, - prefilter_rect, - buffer_pass_stride, - buffer_denoising_offset); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_write_feature(int sample, - int4 buffer_params, - int4 filter_area, - float *from, - float *buffer, - int out_offset, - int4 prefilter_rect) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < filter_area.z && y < filter_area.w) { - kernel_filter_write_feature(sample, - x + filter_area.x, - y + filter_area.y, - buffer_params, - from, - buffer, - out_offset, - prefilter_rect); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_detect_outliers(float *image, - float *variance, - float *depth, - float *output, - int4 prefilter_rect, - int pass_stride) -{ - int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; - int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r) -{ - int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; - int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_construct_transform(float const* __restrict__ buffer, - CCL_FILTER_TILE_INFO, - float *transform, int *rank, - int4 filter_area, int4 rect, - int radius, float pca_threshold, - int pass_stride, int frame_stride, - bool use_time) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < filter_area.z && y < filter_area.w) { - int *l_rank = rank + y*filter_area.z + x; - float *l_transform = transform + y*filter_area.z + x; - kernel_filter_construct_transform(buffer, - tile_info, - x + filter_area.x, y + filter_area.y, - rect, - pass_stride, frame_stride, - use_time, - l_transform, l_rank, - radius, pca_threshold, - filter_area.z*filter_area.w, - threadIdx.y*blockDim.x + threadIdx.x); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_calc_difference(const float *ccl_restrict weight_image, - const float *ccl_restrict variance_image, - const float *ccl_restrict scale_image, - float *difference_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int channel_offset, - int frame_offset, - float a, - float k_2) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, - weight_image, - variance_image, - scale_image, - difference_image + ofs, - rect, stride, - channel_offset, - frame_offset, - a, k_2); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_blur(const float *ccl_restrict difference_image, - float *out_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_blur(co.x, co.y, - difference_image + ofs, - out_image + ofs, - rect, stride, f); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict difference_image, - float *out_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_calc_weight(co.x, co.y, - difference_image + ofs, - out_image + ofs, - rect, stride, f); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_update_output(const float *ccl_restrict difference_image, - const float *ccl_restrict image, - float *out_image, - float *accum_image, - int w, - int h, - int stride, - int pass_stride, - int channel_offset, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w, - difference_image + ofs, - image, - out_image, - accum_image, - rect, - channel_offset, - stride, f); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_normalize(float *out_image, - const float *ccl_restrict accum_image, - int w, - int h, - int stride) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < w && y < h) { - kernel_filter_nlm_normalize(x, y, out_image, accum_image, stride); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_nlm_construct_gramian(int t, - const float *ccl_restrict difference_image, - const float *ccl_restrict buffer, - float const* __restrict__ transform, - int *rank, - float *XtWX, - float3 *XtWY, - int4 filter_window, - int w, - int h, - int stride, - int pass_stride, - int r, - int f, - int frame_offset, - bool use_time) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { - kernel_filter_nlm_construct_gramian(co.x, co.y, - co.z, co.w, - t, - difference_image + ofs, - buffer, - transform, rank, - XtWX, XtWY, - rect, filter_window, - stride, f, - pass_stride, - frame_offset, - use_time, - threadIdx.y*blockDim.x + threadIdx.x); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_filter_finalize(float *buffer, - int *rank, - float *XtWX, - float3 *XtWY, - int4 filter_area, - int4 buffer_params, - int sample) -{ - int x = blockDim.x*blockIdx.x + threadIdx.x; - int y = blockDim.y*blockIdx.y + threadIdx.y; - if(x < filter_area.z && y < filter_area.w) { - int storage_ofs = y*filter_area.z+x; - rank += storage_ofs; - XtWX += storage_ofs; - XtWY += storage_ofs; - kernel_filter_finalize(x, y, buffer, rank, - filter_area.z*filter_area.w, - XtWX, XtWY, - buffer_params, sample); - } -} - -#endif - diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu deleted file mode 100644 index cf62b6e781e..00000000000 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CUDA kernel entry points */ - -#ifdef __CUDA_ARCH__ - -#include "kernel/kernel_compat_cuda.h" -#include "kernel_config.h" - -#include "util/util_atomic.h" - -#include "kernel/kernel_math.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" -#include "kernel/kernel_color.h" -#include "kernel/kernels/cuda/kernel_cuda_image.h" -#include "kernel/kernel_film.h" -#include "kernel/kernel_path.h" -#include "kernel/kernel_path_branched.h" -#include "kernel/kernel_bake.h" -#include "kernel/kernel_work_stealing.h" -#include "kernel/kernel_adaptive_sampling.h" - -/* kernels */ -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_path_trace(WorkTile *tile, uint total_work_size) -{ - int work_index = ccl_global_id(0); - bool thread_is_active = work_index < total_work_size; - uint x, y, sample; - KernelGlobals kg; - if(thread_is_active) { - get_work_pixel(tile, work_index, &x, &y, &sample); - - kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); - } - - if(kernel_data.film.cryptomatte_passes) { - __syncthreads(); - if(thread_is_active) { - kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); - } - } -} - -#ifdef __BRANCHED_PATH__ -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) -kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) -{ - int work_index = ccl_global_id(0); - bool thread_is_active = work_index < total_work_size; - uint x, y, sample; - KernelGlobals kg; - if(thread_is_active) { - get_work_pixel(tile, work_index, &x, &y, &sample); - - kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); - } - - if(kernel_data.film.cryptomatte_passes) { - __syncthreads(); - if(thread_is_active) { - kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); - } - } -} -#endif - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size) -{ - int work_index = ccl_global_id(0); - bool thread_is_active = work_index < total_work_size; - KernelGlobals kg; - if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) { - uint x = tile->x + work_index % tile->w; - uint y = tile->y + work_index / tile->w; - int index = tile->offset + x + y * tile->stride; - ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; - kernel_do_adaptive_stopping(&kg, buffer, sample); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint) -{ - KernelGlobals kg; - if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) { - if(ccl_global_id(0) < tile->h) { - int y = tile->y + ccl_global_id(0); - kernel_do_adaptive_filter_x(&kg, y, tile); - } - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint) -{ - KernelGlobals kg; - if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) { - if(ccl_global_id(0) < tile->w) { - int x = tile->x + ccl_global_id(0); - kernel_do_adaptive_filter_y(&kg, x, tile); - } - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size) -{ - if(kernel_data.film.pass_adaptive_aux_buffer) { - int work_index = ccl_global_id(0); - bool thread_is_active = work_index < total_work_size; - KernelGlobals kg; - if(thread_is_active) { - uint x = tile->x + work_index % tile->w; - uint y = tile->y + work_index / tile->w; - int index = tile->offset + x + y * tile->stride; - ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; - if(buffer[kernel_data.film.pass_sample_count] < 0.0f) { - buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; - float sample_multiplier = sample / buffer[kernel_data.film.pass_sample_count]; - if(sample_multiplier != 1.0f) { - kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier); - } - } - else { - kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f)); - } - } - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; - - if(x < sx + sw && y < sy + sh) { - kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; - - if(x < sx + sw && y < sy + sh) { - kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_displace(uint4 *input, - float4 *output, - int type, - int sx, - int sw, - int offset, - int sample) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - - if(x < sx + sw) { - KernelGlobals kg; - kernel_displace_evaluate(&kg, input, output, x); - } -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_background(uint4 *input, - float4 *output, - int type, - int sx, - int sw, - int offset, - int sample) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - - if(x < sx + sw) { - KernelGlobals kg; - kernel_background_evaluate(&kg, input, output, x); - } -} - -#ifdef __BAKING__ -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_bake(WorkTile *tile, uint total_work_size) -{ - int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; - get_work_pixel(tile, work_index, &x, &y, &sample); - - KernelGlobals kg; - kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); - } -} -#endif - -#endif - diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h deleted file mode 100644 index 2e47ce2de6c..00000000000 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* device data taken from CUDA occupancy calculator */ - -/* 3.0 and 3.5 */ -#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 3.2 */ -#elif __CUDA_ARCH__ == 320 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 3.7 */ -#elif __CUDA_ARCH__ == 370 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 63 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 5.x, 6.x */ -#elif __CUDA_ARCH__ <= 699 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of - * registers */ -# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600 -# define CUDA_KERNEL_MAX_REGISTERS 64 -# else -# define CUDA_KERNEL_MAX_REGISTERS 48 -# endif -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 - -/* 7.x, 8.x */ -#elif __CUDA_ARCH__ <= 899 -# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 -# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 -# define CUDA_BLOCK_MAX_THREADS 1024 -# define CUDA_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define CUDA_THREADS_BLOCK_WIDTH 16 -# define CUDA_KERNEL_MAX_REGISTERS 64 -# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72 - -/* unknown architecture */ -#else -# error "Unknown or unsupported CUDA architecture, can't determine launch bounds" -#endif - -/* For split kernel using all registers seems fastest for now, but this - * is unlikely to be optimal once we resolve other bottlenecks. */ - -#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS - -/* Compute number of threads per block and minimum blocks per multiprocessor - * given the maximum number of registers per thread. */ - -#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ - __launch_bounds__(threads_block_width *threads_block_width, \ - CUDA_MULTIPRESSOR_MAX_REGISTERS / \ - (threads_block_width * threads_block_width * thread_num_registers)) - -/* sanity checks */ - -#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS -# error "Maximum number of threads per block exceeded" -#endif - -#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \ - (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \ - CUDA_MULTIPROCESSOR_MAX_BLOCKS -# error "Maximum number of blocks per multiprocessor exceeded" -#endif - -#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS -# error "Maximum number of registers per thread exceeded" -#endif - -#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS -# error "Maximum number of registers per thread exceeded" -#endif diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h deleted file mode 100644 index 132653fa7ca..00000000000 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright 2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef WITH_NANOVDB -# define NDEBUG /* Disable "assert" in device code */ -# define NANOVDB_USE_INTRINSICS -# include "nanovdb/NanoVDB.h" -# include "nanovdb/util/SampleFromVoxels.h" -#endif - -/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ -ccl_device float cubic_w0(float a) -{ - return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); -} -ccl_device float cubic_w1(float a) -{ - return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); -} -ccl_device float cubic_w2(float a) -{ - return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); -} -ccl_device float cubic_w3(float a) -{ - return (1.0f / 6.0f) * (a * a * a); -} - -/* g0 and g1 are the two amplitude functions. */ -ccl_device float cubic_g0(float a) -{ - return cubic_w0(a) + cubic_w1(a); -} -ccl_device float cubic_g1(float a) -{ - return cubic_w2(a) + cubic_w3(a); -} - -/* h0 and h1 are the two offset functions */ -ccl_device float cubic_h0(float a) -{ - return (cubic_w1(a) / cubic_g0(a)) - 1.0f; -} -ccl_device float cubic_h1(float a) -{ - return (cubic_w3(a) / cubic_g1(a)) + 1.0f; -} - -/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ -template<typename T> -ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y) -{ - CUtexObject tex = (CUtexObject)info.data; - - x = (x * info.width) - 0.5f; - y = (y * info.height) - 0.5f; - - float px = floorf(x); - float py = floorf(y); - float fx = x - px; - float fy = y - py; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ - float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; - float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; - float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; - float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; - - return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) + - cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1)); -} - -/* Fast tricubic texture lookup using 8 trilinear lookups. */ -template<typename T> -ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z) -{ - CUtexObject tex = (CUtexObject)info.data; - - x = (x * info.width) - 0.5f; - y = (y * info.height) - 0.5f; - z = (z * info.depth) - 0.5f; - - float px = floorf(x); - float py = floorf(y); - float pz = floorf(z); - float fx = x - px; - float fy = y - py; - float fz = z - pz; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - float g0y = cubic_g0(fy); - float g1y = cubic_g1(fy); - float g0z = cubic_g0(fz); - float g1z = cubic_g1(fz); - - /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ - float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; - float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; - float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; - float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; - float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth; - float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth; - - return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) + - g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) + - g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) + - g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1))); -} - -#ifdef WITH_NANOVDB -template<typename T, typename S> -ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z) -{ - float px = floorf(x); - float py = floorf(y); - float pz = floorf(z); - float fx = x - px; - float fy = y - py; - float fz = z - pz; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - float g0y = cubic_g0(fy); - float g1y = cubic_g1(fy); - float g0z = cubic_g0(fz); - float g1z = cubic_g1(fz); - - float x0 = px + cubic_h0(fx); - float x1 = px + cubic_h1(fx); - float y0 = py + cubic_h0(fy); - float y1 = py + cubic_h1(fy); - float z0 = pz + cubic_h0(fz); - float z1 = pz + cubic_h1(fz); - - using namespace nanovdb; - - return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + - g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + - g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + - g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); -} - -template<typename T> -ccl_device_inline T kernel_tex_image_interp_nanovdb( - const TextureInfo &info, float x, float y, float z, uint interpolation) -{ - using namespace nanovdb; - - NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; - typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType; - AccessorType acc = grid->getAccessor(); - - switch (interpolation) { - case INTERPOLATION_CLOSEST: - return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z)); - case INTERPOLATION_LINEAR: - return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); - default: - SampleFromVoxels<AccessorType, 1, false> s(acc); - return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f); - } -} -#endif - -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) -{ - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - - /* float4, byte4, ushort4 and half4 */ - const int texture_type = info.data_type; - if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { - if (info.interpolation == INTERPOLATION_CUBIC) { - return kernel_tex_image_interp_bicubic<float4>(info, x, y); - } - else { - CUtexObject tex = (CUtexObject)info.data; - return tex2D<float4>(tex, x, y); - } - } - /* float, byte and half */ - else { - float f; - - if (info.interpolation == INTERPOLATION_CUBIC) { - f = kernel_tex_image_interp_bicubic<float>(info, x, y); - } - else { - CUtexObject tex = (CUtexObject)info.data; - f = tex2D<float>(tex, x, y); - } - - return make_float4(f, f, f, 1.0f); - } -} - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, - int id, - float3 P, - InterpolationType interp) -{ - const TextureInfo &info = kernel_tex_fetch(__texture_info, id); - - if (info.use_transform_3d) { - P = transform_point(&info.transform_3d, P); - } - - const float x = P.x; - const float y = P.y; - const float z = P.z; - - uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; - const int texture_type = info.data_type; - -#ifdef WITH_NANOVDB - if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { - float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation); - return make_float4(f, f, f, 1.0f); - } - if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { - nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>( - info, x, y, z, interpolation); - return make_float4(f[0], f[1], f[2], 1.0f); - } -#endif - if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { - if (interpolation == INTERPOLATION_CUBIC) { - return kernel_tex_image_interp_tricubic<float4>(info, x, y, z); - } - else { - CUtexObject tex = (CUtexObject)info.data; - return tex3D<float4>(tex, x, y, z); - } - } - else { - float f; - - if (interpolation == INTERPOLATION_CUBIC) { - f = kernel_tex_image_interp_tricubic<float>(info, x, y, z); - } - else { - CUtexObject tex = (CUtexObject)info.data; - f = tex3D<float>(tex, x, y, z); - } - - return make_float4(f, f, f, 1.0f); - } -} diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu deleted file mode 100644 index 95ad7599cf1..00000000000 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CUDA split kernel entry points */ - -#ifdef __CUDA_ARCH__ - -#define __SPLIT_KERNEL__ - -#include "kernel/kernel_compat_cuda.h" -#include "kernel_config.h" - -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_data_init.h" -#include "kernel/split/kernel_path_init.h" -#include "kernel/split/kernel_scene_intersect.h" -#include "kernel/split/kernel_lamp_emission.h" -#include "kernel/split/kernel_do_volume.h" -#include "kernel/split/kernel_queue_enqueue.h" -#include "kernel/split/kernel_indirect_background.h" -#include "kernel/split/kernel_shader_setup.h" -#include "kernel/split/kernel_shader_sort.h" -#include "kernel/split/kernel_shader_eval.h" -#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -#include "kernel/split/kernel_subsurface_scatter.h" -#include "kernel/split/kernel_direct_lighting.h" -#include "kernel/split/kernel_shadow_blocked_ao.h" -#include "kernel/split/kernel_shadow_blocked_dl.h" -#include "kernel/split/kernel_enqueue_inactive.h" -#include "kernel/split/kernel_next_iteration_setup.h" -#include "kernel/split/kernel_indirect_subsurface.h" -#include "kernel/split/kernel_buffer_update.h" -#include "kernel/split/kernel_adaptive_stopping.h" -#include "kernel/split/kernel_adaptive_filter_x.h" -#include "kernel/split/kernel_adaptive_filter_y.h" -#include "kernel/split/kernel_adaptive_adjust_samples.h" - -#include "kernel/kernel_film.h" - -/* kernels */ -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size) -{ - *size = split_data_buffer_size(NULL, num_threads); -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_path_trace_data_init( - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, - int queuesize, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, - ccl_global float *buffer) -{ - kernel_data_init(NULL, - NULL, - split_data_buffer, - num_elements, - ray_state, - start_sample, - end_sample, - sx, sy, sw, sh, offset, stride, - Queue_index, - queuesize, - use_queues_flag, - work_pool_wgs, - num_samples, - buffer); -} - -#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ - kernel_cuda_##name() \ - { \ - kernel_##name(NULL); \ - } - -#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ - kernel_cuda_##name() \ - { \ - ccl_local type locals; \ - kernel_##name(NULL, &locals); \ - } - -DEFINE_SPLIT_KERNEL_FUNCTION(path_init) -DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) -DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) -DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) -DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) -DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) -DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; - - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); -} - -extern "C" __global__ void -CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) -{ - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; - - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); -} - -#endif - diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl deleted file mode 100644 index 996bc27f71b..00000000000 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* OpenCL kernel entry points */ - -#include "kernel/kernel_compat_opencl.h" - -#include "kernel/filter/filter_kernel.h" - -/* kernels */ - -__kernel void kernel_ocl_filter_divide_shadow(int sample, - CCL_FILTER_TILE_INFO, - ccl_global float *unfilteredA, - ccl_global float *unfilteredB, - ccl_global float *sampleVariance, - ccl_global float *sampleVarianceV, - ccl_global float *bufferVariance, - int4 prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ - int x = prefilter_rect.x + get_global_id(0); - int y = prefilter_rect.y + get_global_id(1); - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_divide_shadow(sample, - CCL_FILTER_TILE_INFO_ARG, - x, y, - unfilteredA, - unfilteredB, - sampleVariance, - sampleVarianceV, - bufferVariance, - prefilter_rect, - buffer_pass_stride, - buffer_denoising_offset); - } -} - -__kernel void kernel_ocl_filter_get_feature(int sample, - CCL_FILTER_TILE_INFO, - int m_offset, - int v_offset, - ccl_global float *mean, - ccl_global float *variance, - float scale, - int4 prefilter_rect, - int buffer_pass_stride, - int buffer_denoising_offset) -{ - int x = prefilter_rect.x + get_global_id(0); - int y = prefilter_rect.y + get_global_id(1); - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_get_feature(sample, - CCL_FILTER_TILE_INFO_ARG, - m_offset, v_offset, - x, y, - mean, variance, - scale, - prefilter_rect, - buffer_pass_stride, - buffer_denoising_offset); - } -} - -__kernel void kernel_ocl_filter_write_feature(int sample, - int4 buffer_params, - int4 filter_area, - ccl_global float *from, - ccl_global float *buffer, - int out_offset, - int4 prefilter_rect) -{ - int x = get_global_id(0); - int y = get_global_id(1); - if(x < filter_area.z && y < filter_area.w) { - kernel_filter_write_feature(sample, - x + filter_area.x, - y + filter_area.y, - buffer_params, - from, - buffer, - out_offset, - prefilter_rect); - } -} - -__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image, - ccl_global float *variance, - ccl_global float *depth, - ccl_global float *output, - int4 prefilter_rect, - int pass_stride) -{ - int x = prefilter_rect.x + get_global_id(0); - int y = prefilter_rect.y + get_global_id(1); - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride); - } -} - -__kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean, - ccl_global float *variance, - ccl_global float *a, - ccl_global float *b, - int4 prefilter_rect, - int r) -{ - int x = prefilter_rect.x + get_global_id(0); - int y = prefilter_rect.y + get_global_id(1); - if(x < prefilter_rect.z && y < prefilter_rect.w) { - kernel_filter_combine_halves(x, y, mean, variance, a, b, prefilter_rect, r); - } -} - -__kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer, - CCL_FILTER_TILE_INFO, - ccl_global float *transform, - ccl_global int *rank, - int4 filter_area, - int4 rect, - int pass_stride, - int frame_stride, - char use_time, - int radius, - float pca_threshold) -{ - int x = get_global_id(0); - int y = get_global_id(1); - if(x < filter_area.z && y < filter_area.w) { - ccl_global int *l_rank = rank + y*filter_area.z + x; - ccl_global float *l_transform = transform + y*filter_area.z + x; - kernel_filter_construct_transform(buffer, - CCL_FILTER_TILE_INFO_ARG, - x + filter_area.x, y + filter_area.y, - rect, - pass_stride, frame_stride, - use_time, - l_transform, l_rank, - radius, pca_threshold, - filter_area.z*filter_area.w, - get_local_id(1)*get_local_size(0) + get_local_id(0)); - } -} - -__kernel void kernel_ocl_filter_nlm_calc_difference(const ccl_global float *ccl_restrict weight_image, - const ccl_global float *ccl_restrict variance_image, - const ccl_global float *ccl_restrict scale_image, - ccl_global float *difference_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int channel_offset, - int frame_offset, - float a, - float k_2) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_calc_difference(co.x, co.y, co.z, co.w, - weight_image, - variance_image, - scale_image, - difference_image + ofs, - rect, stride, - channel_offset, - frame_offset, - a, k_2); - } -} - -__kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict difference_image, - ccl_global float *out_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_blur(co.x, co.y, - difference_image + ofs, - out_image + ofs, - rect, stride, f); - } -} - -__kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict difference_image, - ccl_global float *out_image, - int w, - int h, - int stride, - int pass_stride, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_calc_weight(co.x, co.y, - difference_image + ofs, - out_image + ofs, - rect, stride, f); - } -} - -__kernel void kernel_ocl_filter_nlm_update_output(const ccl_global float *ccl_restrict difference_image, - const ccl_global float *ccl_restrict image, - ccl_global float *out_image, - ccl_global float *accum_image, - int w, - int h, - int stride, - int pass_stride, - int channel_offset, - int r, - int f) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords(w, h, r, pass_stride, &rect, &co, &ofs)) { - kernel_filter_nlm_update_output(co.x, co.y, co.z, co.w, - difference_image + ofs, - image, - out_image, - accum_image, - rect, - channel_offset, - stride, f); - } -} - -__kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *out_image, - const ccl_global float *ccl_restrict accum_image, - int w, - int h, - int stride) -{ - int x = get_global_id(0); - int y = get_global_id(1); - if(x < w && y < h) { - kernel_filter_nlm_normalize(x, y, out_image, accum_image, stride); - } -} - -__kernel void kernel_ocl_filter_nlm_construct_gramian(int t, - const ccl_global float *ccl_restrict difference_image, - const ccl_global float *ccl_restrict buffer, - const ccl_global float *ccl_restrict transform, - ccl_global int *rank, - ccl_global float *XtWX, - ccl_global float3 *XtWY, - int4 filter_window, - int w, - int h, - int stride, - int pass_stride, - int r, - int f, - int frame_offset, - char use_time) -{ - int4 co, rect; - int ofs; - if(get_nlm_coords_window(w, h, r, pass_stride, &rect, &co, &ofs, filter_window)) { - kernel_filter_nlm_construct_gramian(co.x, co.y, - co.z, co.w, - t, - difference_image + ofs, - buffer, - transform, rank, - XtWX, XtWY, - rect, filter_window, - stride, f, - pass_stride, - frame_offset, - use_time, - get_local_id(1)*get_local_size(0) + get_local_id(0)); - } -} - -__kernel void kernel_ocl_filter_finalize(ccl_global float *buffer, - ccl_global int *rank, - ccl_global float *XtWX, - ccl_global float3 *XtWY, - int4 filter_area, - int4 buffer_params, - int sample) -{ - int x = get_global_id(0); - int y = get_global_id(1); - if(x < filter_area.z && y < filter_area.w) { - int storage_ofs = y*filter_area.z+x; - rank += storage_ofs; - XtWX += storage_ofs; - XtWY += storage_ofs; - kernel_filter_finalize(x, y, buffer, rank, - filter_area.z*filter_area.w, - XtWX, XtWY, - buffer_params, sample); - } -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl deleted file mode 100644 index ebdb99d4730..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2019 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_adaptive_adjust_samples.h" - -#define KERNEL_NAME adaptive_adjust_samples -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl deleted file mode 100644 index 76d82d4184e..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2019 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_adaptive_filter_x.h" - -#define KERNEL_NAME adaptive_filter_x -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl deleted file mode 100644 index 1e6d15ba0f2..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2019 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_adaptive_filter_y.h" - -#define KERNEL_NAME adaptive_filter_y -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl deleted file mode 100644 index 51de0059667..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2019 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_adaptive_stopping.h" - -#define KERNEL_NAME adaptive_stopping -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl deleted file mode 100644 index 0e600676e82..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_background.cl +++ /dev/null @@ -1,35 +0,0 @@ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/kernel_math.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" -#include "kernel/kernel_color.h" -#include "kernel/kernels/opencl/kernel_opencl_image.h" - -#include "kernel/kernel_path.h" -#include "kernel/kernel_path_branched.h" - -#include "kernel/kernel_bake.h" - -__kernel void kernel_ocl_background( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_background_evaluate(kg, input, output, x); - } -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl deleted file mode 100644 index 7b81e387467..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl +++ /dev/null @@ -1,36 +0,0 @@ -#include "kernel/kernel_compat_opencl.h" -#include "kernel/kernel_math.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" -#include "kernel/kernel_color.h" -#include "kernel/kernels/opencl/kernel_opencl_image.h" - -#include "kernel/kernel_path.h" -#include "kernel/kernel_path_branched.h" - -#include "kernel/kernel_bake.h" - -__kernel void kernel_ocl_bake( - ccl_constant KernelData *data, - ccl_global float *buffer, - - KERNEL_BUFFER_PARAMS, - - int sx, int sy, int sw, int sh, int offset, int stride, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) { -#ifndef __NO_BAKING__ - kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride); -#endif - } -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_base.cl b/intern/cycles/kernel/kernels/opencl/kernel_base.cl deleted file mode 100644 index 1c2d89e8a92..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_base.cl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* OpenCL base kernels entry points */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" - -#include "kernel/kernel_film.h" - - -__kernel void kernel_ocl_convert_to_byte( - ccl_constant KernelData *data, - ccl_global uchar4 *rgba, - ccl_global float *buffer, - - KERNEL_BUFFER_PARAMS, - - float sample_scale, - int sx, int sy, int sw, int sh, int offset, int stride) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -__kernel void kernel_ocl_convert_to_half_float( - ccl_constant KernelData *data, - ccl_global uchar4 *rgba, - ccl_global float *buffer, - - KERNEL_BUFFER_PARAMS, - - float sample_scale, - int sx, int sy, int sw, int sh, int offset, int stride) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, uint64_t size, uint64_t offset) -{ - size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); - - if(i < size / sizeof(float4)) { - buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - else if(i == size / sizeof(float4)) { - ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)]; - - for(i = 0; i < size % sizeof(float4); i++) { - *(b++) = 0; - } - } -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl deleted file mode 100644 index dcea2630aef..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_buffer_update.h" - -#define KERNEL_NAME buffer_update -#define LOCALS_TYPE unsigned int -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl deleted file mode 100644 index 7125348a49f..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_data_init.h" - -__kernel void kernel_ocl_path_trace_data_init( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, - KERNEL_BUFFER_PARAMS, - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* size (capacity) of the queue */ - ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ - unsigned int num_samples, /* Total number of samples per pixel */ - ccl_global float *buffer) -{ - kernel_data_init((KernelGlobals*)kg, - data, - split_data_buffer, - num_elements, - ray_state, - KERNEL_BUFFER_ARGS, - start_sample, - end_sample, - sx, sy, sw, sh, offset, stride, - Queue_index, - queuesize, - use_queues_flag, - work_pool_wgs, - num_samples, - buffer); -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl deleted file mode 100644 index ed64ae01aae..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_direct_lighting.h" - -#define KERNEL_NAME direct_lighting -#define LOCALS_TYPE unsigned int -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl deleted file mode 100644 index 76cc36971f5..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl +++ /dev/null @@ -1,36 +0,0 @@ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/kernel_math.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" -#include "kernel/kernel_color.h" -#include "kernel/kernels/opencl/kernel_opencl_image.h" - -#include "kernel/kernel_path.h" -#include "kernel/kernel_path_branched.h" - -#include "kernel/kernel_bake.h" - -__kernel void kernel_ocl_displace( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_displace_evaluate(kg, input, output, x); - } -} - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl deleted file mode 100644 index 8afaa686e28..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_do_volume.h" - -#define KERNEL_NAME do_volume -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl deleted file mode 100644 index e68d4104a91..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_enqueue_inactive.h" - -#define KERNEL_NAME enqueue_inactive -#define LOCALS_TYPE unsigned int -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl deleted file mode 100644 index 9e1e57beba6..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" - -#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao -#define LOCALS_TYPE BackgroundAOLocals -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl deleted file mode 100644 index 192d01444ba..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_indirect_background.h" - -#define KERNEL_NAME indirect_background -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl deleted file mode 100644 index 84938b889e5..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_indirect_subsurface.h" - -#define KERNEL_NAME indirect_subsurface -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl deleted file mode 100644 index c314dc96c33..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_lamp_emission.h" - -#define KERNEL_NAME lamp_emission -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl deleted file mode 100644 index 8b1332bf013..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_next_iteration_setup.h" - -#define KERNEL_NAME next_iteration_setup -#define LOCALS_TYPE unsigned int -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h deleted file mode 100644 index bb6b8a40e8e..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright 2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef WITH_NANOVDB -/* Data type to replace `double` used in the NanoVDB headers. Cycles don't need doubles, and is - * safer and more portable to never use double datatype on GPU. - * Use a special structure, so that the following is true: - * - No unnoticed implicit cast or mathematical operations used on scalar 64bit type - * (which rules out trick like using `uint64_t` as a drop-in replacement for double). - * - Padding rules are matching exactly `double` - * (which rules out array of `uint8_t`). */ -typedef struct ccl_vdb_double_t { - uint64_t i; -} ccl_vdb_double_t; - -# define double ccl_vdb_double_t -# include "nanovdb/CNanoVDB.h" -# undef double -#endif - -/* For OpenCL we do manual lookup and interpolation. */ - -ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id) -{ - const uint tex_offset = id -#define KERNEL_TEX(type, name) +1 -#include "kernel/kernel_textures.h" - ; - - return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset]; -} - -#define tex_fetch(type, info, index) \ - ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)] - -ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) -{ - x %= width; - if (x < 0) - x += width; - return x; -} - -ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) -{ - return clamp(x, 0, width - 1); -} - -ccl_device_inline float4 svm_image_texture_read( - KernelGlobals *kg, const ccl_global TextureInfo *info, void *acc, int x, int y, int z) -{ - const int data_offset = x + info->width * y + info->width * info->height * z; - const int texture_type = info->data_type; - - /* Float4 */ - if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { - return tex_fetch(float4, info, data_offset); - } - /* Byte4 */ - else if (texture_type == IMAGE_DATA_TYPE_BYTE4) { - uchar4 r = tex_fetch(uchar4, info, data_offset); - float f = 1.0f / 255.0f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - /* Ushort4 */ - else if (texture_type == IMAGE_DATA_TYPE_USHORT4) { - ushort4 r = tex_fetch(ushort4, info, data_offset); - float f = 1.0f / 65535.f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - /* Float */ - else if (texture_type == IMAGE_DATA_TYPE_FLOAT) { - float f = tex_fetch(float, info, data_offset); - return make_float4(f, f, f, 1.0f); - } - /* UShort */ - else if (texture_type == IMAGE_DATA_TYPE_USHORT) { - ushort r = tex_fetch(ushort, info, data_offset); - float f = r * (1.0f / 65535.0f); - return make_float4(f, f, f, 1.0f); - } -#ifdef WITH_NANOVDB - /* NanoVDB Float */ - else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { - cnanovdb_coord coord; - coord.mVec[0] = x; - coord.mVec[1] = y; - coord.mVec[2] = z; - float f = cnanovdb_readaccessor_getValueF((cnanovdb_readaccessor *)acc, &coord); - return make_float4(f, f, f, 1.0f); - } - /* NanoVDB Float3 */ - else if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { - cnanovdb_coord coord; - coord.mVec[0] = x; - coord.mVec[1] = y; - coord.mVec[2] = z; - cnanovdb_Vec3F f = cnanovdb_readaccessor_getValueF3((cnanovdb_readaccessor *)acc, &coord); - return make_float4(f.mVec[0], f.mVec[1], f.mVec[2], 1.0f); - } -#endif -#ifdef __KERNEL_CL_KHR_FP16__ - /* Half and Half4 are optional in OpenCL */ - else if (texture_type == IMAGE_DATA_TYPE_HALF) { - float f = tex_fetch(half, info, data_offset); - return make_float4(f, f, f, 1.0f); - } - else if (texture_type == IMAGE_DATA_TYPE_HALF4) { - half4 r = tex_fetch(half4, info, data_offset); - return make_float4(r.x, r.y, r.z, r.w); - } -#endif - /* Byte */ - else { - uchar r = tex_fetch(uchar, info, data_offset); - float f = r * (1.0f / 255.0f); - return make_float4(f, f, f, 1.0f); - } -} - -ccl_device_inline float4 -svm_image_texture_read_2d(KernelGlobals *kg, int id, void *acc, int x, int y) -{ - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - -#ifdef WITH_NANOVDB - if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && - info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { -#endif - /* Wrap */ - if (info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); - } -#ifdef WITH_NANOVDB - } -#endif - - return svm_image_texture_read(kg, info, acc, x, y, 0); -} - -ccl_device_inline float4 -svm_image_texture_read_3d(KernelGlobals *kg, int id, void *acc, int x, int y, int z) -{ - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - -#ifdef WITH_NANOVDB - if (info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && - info->data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { -#endif - /* Wrap */ - if (info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - z = svm_image_texture_wrap_periodic(z, info->depth); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); - z = svm_image_texture_wrap_clamp(z, info->depth); - } -#ifdef WITH_NANOVDB - } -#endif - - return svm_image_texture_read(kg, info, acc, x, y, z); -} - -ccl_device_inline float svm_image_texture_frac(float x, int *ix) -{ - int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); - *ix = i; - return x - (float)i; -} - -#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ - u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ - u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } \ - (void)0 - -ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) -{ - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - if (info->extension == EXTENSION_CLIP) { - if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - if (info->interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy; - svm_image_texture_frac(x * info->width, &ix); - svm_image_texture_frac(y * info->height, &iy); - - return svm_image_texture_read_2d(kg, id, NULL, ix, iy); - } - else if (info->interpolation == INTERPOLATION_LINEAR) { - /* Bilinear interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); - - float4 r; - r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy); - r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy); - r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, NULL, ix, iy + 1); - r += ty * tx * svm_image_texture_read_2d(kg, id, NULL, ix + 1, iy + 1); - return r; - } - else { - /* Bicubic interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); - - float u[4], v[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - float weight = u[x] * v[y]; - r += weight * svm_image_texture_read_2d(kg, id, NULL, ix + x - 1, iy + y - 1); - } - } - return r; - } -} - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float3 P, int interp) -{ - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - if (info->use_transform_3d) { - Transform tfm = info->transform_3d; - P = transform_point(&tfm, P); - } - - float x = P.x; - float y = P.y; - float z = P.z; - - uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp; - -#ifdef WITH_NANOVDB - cnanovdb_readaccessor acc; - if (info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || - info->data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { - ccl_global cnanovdb_griddata *grid = - (ccl_global cnanovdb_griddata *)(kg->buffers[info->cl_buffer] + info->data); - cnanovdb_readaccessor_init(&acc, cnanovdb_treedata_rootF(cnanovdb_griddata_tree(grid))); - } - else { - if (info->extension == EXTENSION_CLIP) { - if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - x *= info->width; - y *= info->height; - z *= info->depth; - } -# define NANOVDB_ACCESS_POINTER &acc -#else -# define NANOVDB_ACCESS_POINTER NULL -#endif - - if (interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy, iz; - svm_image_texture_frac(x, &ix); - svm_image_texture_frac(y, &iy); - svm_image_texture_frac(z, &iz); - - return svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz); - } - else if (interpolation == INTERPOLATION_LINEAR) { - /* Trilinear interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x - 0.5f, &ix); - float ty = svm_image_texture_frac(y - 0.5f, &iy); - float tz = svm_image_texture_frac(z - 0.5f, &iz); - - float4 r; - r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz); - r += (1.0f - tz) * (1.0f - ty) * tx * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy, iz); - r += (1.0f - tz) * ty * (1.0f - tx) * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy + 1, iz); - r += (1.0f - tz) * ty * tx * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy + 1, iz); - - r += tz * (1.0f - ty) * (1.0f - tx) * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy, iz + 1); - r += tz * (1.0f - ty) * tx * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy, iz + 1); - r += tz * ty * (1.0f - tx) * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix, iy + 1, iz + 1); - r += tz * ty * tx * - svm_image_texture_read_3d(kg, id, NANOVDB_ACCESS_POINTER, ix + 1, iy + 1, iz + 1); - return r; - } - else { - /* Tricubic interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x - 0.5f, &ix); - float ty = svm_image_texture_frac(y - 0.5f, &iy); - float tz = svm_image_texture_frac(z - 0.5f, &iz); - - float u[4], v[4], w[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for (int z = 0; z < 4; z++) { - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - float weight = u[x] * v[y] * w[z]; - r += weight * svm_image_texture_read_3d( - kg, id, NANOVDB_ACCESS_POINTER, ix + x - 1, iy + y - 1, iz + z - 1); - } - } - } - return r; - } -#undef NANOVDB_ACCESS_POINTER -} - -#undef SET_CUBIC_SPLINE_WEIGHTS diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl deleted file mode 100644 index fa210e747c0..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_path_init.h" - -#define KERNEL_NAME path_init -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl deleted file mode 100644 index 68ee6f1d536..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_queue_enqueue.h" - -#define KERNEL_NAME queue_enqueue -#define LOCALS_TYPE QueueEnqueueLocals -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl deleted file mode 100644 index 10d09377ba9..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_scene_intersect.h" - -#define KERNEL_NAME scene_intersect -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl deleted file mode 100644 index 40eaa561863..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shader_eval.h" - -#define KERNEL_NAME shader_eval -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl deleted file mode 100644 index 8c36100f762..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shader_setup.h" - -#define KERNEL_NAME shader_setup -#define LOCALS_TYPE unsigned int -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl deleted file mode 100644 index bcacaa4a054..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shader_sort.h" - -__attribute__((reqd_work_group_size(64, 1, 1))) -#define KERNEL_NAME shader_sort -#define LOCALS_TYPE ShaderSortLocals -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME -#undef LOCALS_TYPE - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl deleted file mode 100644 index 8de250a375c..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shadow_blocked_ao.h" - -#define KERNEL_NAME shadow_blocked_ao -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl deleted file mode 100644 index 29da77022ed..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_shadow_blocked_dl.h" - -#define KERNEL_NAME shadow_blocked_dl -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl deleted file mode 100644 index c3b7b09460a..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" // PRECOMPILED -#include "kernel/split/kernel_split_common.h" // PRECOMPILED - -#include "kernel/kernels/opencl/kernel_data_init.cl" -#include "kernel/kernels/opencl/kernel_path_init.cl" -#include "kernel/kernels/opencl/kernel_state_buffer_size.cl" -#include "kernel/kernels/opencl/kernel_scene_intersect.cl" -#include "kernel/kernels/opencl/kernel_queue_enqueue.cl" -#include "kernel/kernels/opencl/kernel_shader_setup.cl" -#include "kernel/kernels/opencl/kernel_shader_sort.cl" -#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" -#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" -#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" -#include "kernel/kernels/opencl/kernel_buffer_update.cl" -#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl" -#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl" -#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl" -#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h deleted file mode 100644 index e123b4cd6ec..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define KERNEL_NAME_JOIN(a, b) a##_##b -#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) - -__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, - KERNEL_NAME)(ccl_global char *kg_global, - ccl_constant KernelData *data, - - ccl_global void *split_data_buffer, - ccl_global char *ray_state, - - KERNEL_BUFFER_PARAMS, - - ccl_global int *queue_index, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pools, - ccl_global float *buffer) -{ -#ifdef LOCALS_TYPE - ccl_local LOCALS_TYPE locals; -#endif - - KernelGlobals *kg = (KernelGlobals *)kg_global; - - if (ccl_local_id(0) + ccl_local_id(1) == 0) { - kg->data = data; - - kernel_split_params.queue_index = queue_index; - kernel_split_params.use_queues_flag = use_queues_flag; - kernel_split_params.work_pools = work_pools; - kernel_split_params.tile.buffer = buffer; - - split_data_init(kg, - &kernel_split_state, - ccl_global_size(0) * ccl_global_size(1), - split_data_buffer, - ray_state); - } - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - - KERNEL_NAME_EVAL(kernel, KERNEL_NAME) - (kg -#ifdef LOCALS_TYPE - , - &locals -#endif - ); -} - -#undef KERNEL_NAME_JOIN -#undef KERNEL_NAME_EVAL diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl deleted file mode 100644 index c10ecc426c6..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" - -__kernel void kernel_ocl_path_trace_state_buffer_size( - ccl_global char *kg, - ccl_constant KernelData *data, - uint num_threads, - ccl_global uint64_t *size) -{ - ((KernelGlobals*)kg)->data = data; - *size = split_data_buffer_size((KernelGlobals*)kg, num_threads); -} - diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl deleted file mode 100644 index 2b3be38df84..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright 2011-2017 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel/kernel_compat_opencl.h" -#include "kernel/split/kernel_split_common.h" -#include "kernel/split/kernel_subsurface_scatter.h" - -#define KERNEL_NAME subsurface_scatter -#include "kernel/kernels/opencl/kernel_split_function.h" -#undef KERNEL_NAME - diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu deleted file mode 100644 index 7f609eab474..00000000000 --- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright 2019, NVIDIA Corporation. - * Copyright 2019, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// clang-format off -#include "kernel/kernel_compat_optix.h" -#include "util/util_atomic.h" -#include "kernel/kernel_types.h" -#include "kernel/kernel_globals.h" -#include "../cuda/kernel_cuda_image.h" // Texture lookup uses normal CUDA intrinsics - -#include "kernel/kernel_path.h" -#include "kernel/kernel_bake.h" -// clang-format on - -template<typename T> ccl_device_forceinline T *get_payload_ptr_0() -{ - return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0()); -} -template<typename T> ccl_device_forceinline T *get_payload_ptr_2() -{ - return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2()); -} - -template<bool always = false> ccl_device_forceinline uint get_object_id() -{ -#ifdef __OBJECT_MOTION__ - // Always get the the instance ID from the TLAS - // There might be a motion transform node between TLAS and BLAS which does not have one - uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); -#else - uint object = optixGetInstanceId(); -#endif - // Choose between always returning object ID or only for instances - if (always || (object & 1) == 0) - // Can just remove the low bit since instance always contains object ID - return object >> 1; - else - // Set to OBJECT_NONE if this is not an instanced object - return OBJECT_NONE; -} - -extern "C" __global__ void __raygen__kernel_optix_path_trace() -{ - KernelGlobals kg; // Allocate stack storage for common data - - const uint3 launch_index = optixGetLaunchIndex(); - // Keep threads for same pixel together to improve occupancy of warps - uint pixel_offset = launch_index.x / __params.tile.num_samples; - uint sample_offset = launch_index.x % __params.tile.num_samples; - - kernel_path_trace(&kg, - __params.tile.buffer, - __params.tile.start_sample + sample_offset, - __params.tile.x + pixel_offset, - __params.tile.y + launch_index.y, - __params.tile.offset, - __params.tile.stride); -} - -#ifdef __BAKING__ -extern "C" __global__ void __raygen__kernel_optix_bake() -{ - KernelGlobals kg; - const ShaderParams &p = __params.shader; - kernel_bake_evaluate(&kg, - p.input, - p.output, - (ShaderEvalType)p.type, - p.filter, - p.sx + optixGetLaunchIndex().x, - p.offset, - p.sample); -} -#endif - -extern "C" __global__ void __raygen__kernel_optix_displace() -{ - KernelGlobals kg; - const ShaderParams &p = __params.shader; - kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); -} - -extern "C" __global__ void __raygen__kernel_optix_background() -{ - KernelGlobals kg; - const ShaderParams &p = __params.shader; - kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); -} - -extern "C" __global__ void __miss__kernel_optix_miss() -{ - // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); - optixSetPayload_5(PRIMITIVE_NONE); -} - -extern "C" __global__ void __anyhit__kernel_optix_local_hit() -{ -#ifdef __BVH_LOCAL__ - const uint object = get_object_id<true>(); - if (object != optixGetPayload_4() /* local_object */) { - // Only intersect with matching object - return optixIgnoreIntersection(); - } - - const uint max_hits = optixGetPayload_5(); - if (max_hits == 0) { - // Special case for when no hit information is requested, just report that something was hit - optixSetPayload_5(true); - return optixTerminateRay(); - } - - int hit = 0; - uint *const lcg_state = get_payload_ptr_0<uint>(); - LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); - - if (lcg_state) { - for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if (optixGetRayTmax() == local_isect->hits[i].t) { - return optixIgnoreIntersection(); - } - } - - hit = local_isect->num_hits++; - - if (local_isect->num_hits > max_hits) { - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - if (hit >= max_hits) { - return optixIgnoreIntersection(); - } - } - } - else { - if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { - // Record closest intersection only - // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit - return optixIgnoreIntersection(); - } - - local_isect->num_hits = 1; - } - - Intersection *isect = &local_isect->hits[hit]; - isect->t = optixGetRayTmax(); - isect->prim = optixGetPrimitiveIndex(); - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, isect->prim); - - const float2 barycentrics = optixGetTriangleBarycentrics(); - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - - // Record geometric normal - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)); - const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)); - const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); - local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - - // Continue tracing (without this the trace call would return after the first hit) - optixIgnoreIntersection(); -#endif -} - -extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() -{ -#ifdef __SHADOW_RECORD_ALL__ - const uint prim = optixGetPrimitiveIndex(); -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { - return optixIgnoreIntersection(); - } -# endif - - // Offset into array with num_hits - Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2(); - isect->t = optixGetRayTmax(); - isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, prim); - - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - } -# ifdef __HAIR__ - else { - const float u = __uint_as_float(optixGetAttribute_0()); - isect->u = u; - isect->v = __uint_as_float(optixGetAttribute_1()); - - // Filter out curve endcaps - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } - } -# endif - -# ifdef __TRANSPARENT_SHADOWS__ - // Detect if this surface has a shader with transparent shadows - if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) { -# endif - // This is an opaque hit or the hit limit has been reached, abort traversal - optixSetPayload_5(true); - return optixTerminateRay(); -# ifdef __TRANSPARENT_SHADOWS__ - } - - optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++ - - // Continue tracing - optixIgnoreIntersection(); -# endif -#endif -} - -extern "C" __global__ void __anyhit__kernel_optix_visibility_test() -{ - uint visibility = optixGetPayload_4(); -#ifdef __VISIBILITY_FLAG__ - const uint prim = optixGetPrimitiveIndex(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { - return optixIgnoreIntersection(); - } -#endif - -#ifdef __HAIR__ - if (!optixIsTriangleHit()) { - // Filter out curve endcaps - const float u = __uint_as_float(optixGetAttribute_0()); - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } - } -#endif - - // Shadow ray early termination - if (visibility & PATH_RAY_SHADOW_OPAQUE) { - return optixTerminateRay(); - } -} - -extern "C" __global__ void __closesthit__kernel_optix_hit() -{ - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance - optixSetPayload_3(optixGetPrimitiveIndex()); - optixSetPayload_4(get_object_id()); - // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index - optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex())); - - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); - optixSetPayload_2(__float_as_uint(barycentrics.x)); - } - else { - optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()' - optixSetPayload_2(optixGetAttribute_1()); - } -} - -#ifdef __HAIR__ -ccl_device_inline void optix_intersection_curve(const uint prim, const uint type) -{ - const uint object = get_object_id<true>(); - const uint visibility = optixGetPayload_4(); - - float3 P = optixGetObjectRayOrigin(); - float3 dir = optixGetObjectRayDirection(); - - // The direction is not normalized by default, but the curve intersection routine expects that - float len; - dir = normalize_len(dir, &len); - -# ifdef __OBJECT_MOTION__ - const float time = optixGetRayTime(); -# else - const float time = 0.0f; -# endif - - Intersection isect; - isect.t = optixGetRayTmax(); - // Transform maximum distance into object space - if (isect.t != FLT_MAX) - isect.t *= len; - - if (curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) { - optixReportIntersection(isect.t / len, - type & PRIMITIVE_ALL, - __float_as_int(isect.u), // Attribute_0 - __float_as_int(isect.v)); // Attribute_1 - } -} - -extern "C" __global__ void __intersection__curve_ribbon() -{ - const uint prim = optixGetPrimitiveIndex(); - const uint type = kernel_tex_fetch(__prim_type, prim); - - if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { - optix_intersection_curve(prim, type); - } -} - -extern "C" __global__ void __intersection__curve_all() -{ - const uint prim = optixGetPrimitiveIndex(); - const uint type = kernel_tex_fetch(__prim_type, prim); - optix_intersection_curve(prim, type); -} -#endif |