diff options
author | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:17:24 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:21:24 +0300 |
commit | e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch) | |
tree | 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel/kernels/cpu | |
parent | b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff) |
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211.
For details on usage and instructions for migrating branches
without conflicts, see:
https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/cycles/kernel/kernels/cpu')
23 files changed, 750 insertions, 789 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp index 2ff1a392dc3..145a6b6ac40 100644 --- a/intern/cycles/kernel/kernels/cpu/filter.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter.cpp @@ -53,7 +53,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/filter/filter.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp index 4a9e6047ecf..1d68214c8e7 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp index c22ec576254..b6709fbc529 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp @@ -33,7 +33,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index 02c85562db8..1423b182ab8 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -25,7 +25,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *sampleV, float *sampleVV, float *bufferV, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset); @@ -38,7 +38,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, float *mean, float *variance, float scale, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset); @@ -49,9 +49,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, float *from, float *buffer, int out_offset, - int* prefilter_rect); + int *prefilter_rect); -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -59,22 +60,17 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, int *rect, int pass_stride); -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, - float *mean, - float *variance, - float *a, - float *b, - int* prefilter_rect, - int r); +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( + int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r); -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, TileInfo *tiles, int x, int y, int storage_ofs, float *transform, int *rank, - int* rect, + int *rect, int pass_stride, int frame_stride, bool use_time, @@ -87,24 +83,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, float *variance_image, float *scale_image, float *difference_image, - int* rect, + int *rect, int stride, int channel_offset, int frame_offset, float a, float k_2); -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image, - float *out_image, - int* rect, - int stride, - int f); +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( + float *difference_image, float *out_image, int *rect, int stride, int f); -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image, - float *out_image, - int* rect, - int stride, - int f); +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( + float *difference_image, float *out_image, int *rect, int stride, int f); void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int dy, @@ -113,7 +103,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, float *temp_image, float *out_image, float *accum_image, - int* rect, + int *rect, int channel_offset, int stride, int f); @@ -137,7 +127,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, float *accum_image, - int* rect, + int *rect, int stride); void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index c29505880cb..3d4cb87e104 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -25,12 +25,12 @@ #include "kernel/filter/filter_kernel.h" #ifdef KERNEL_STUB -# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +# define STUB_ASSERT(arch, name) \ + assert(!(#name " kernel stub for architecture " #arch " was called!")) #endif CCL_NAMESPACE_BEGIN - /* Denoise filter */ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, @@ -42,23 +42,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *sampleVariance, float *sampleVarianceV, float *bufferVariance, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); + STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); #else - kernel_filter_divide_shadow(sample, tile_info, - x, y, - unfilteredA, - unfilteredB, - sampleVariance, - sampleVarianceV, - bufferVariance, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); + kernel_filter_divide_shadow(sample, + tile_info, + x, + y, + unfilteredA, + unfilteredB, + sampleVariance, + sampleVarianceV, + bufferVariance, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset); #endif } @@ -68,23 +70,28 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, int v_offset, int x, int y, - float *mean, float *variance, + float *mean, + float *variance, float scale, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_get_feature); + STUB_ASSERT(KERNEL_ARCH, filter_get_feature); #else - kernel_filter_get_feature(sample, tile_info, - m_offset, v_offset, - x, y, - mean, variance, - scale, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); + kernel_filter_get_feature(sample, + tile_info, + m_offset, + v_offset, + x, + y, + mean, + variance, + scale, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset); #endif } @@ -95,16 +102,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, float *from, float *buffer, int out_offset, - int* prefilter_rect) + int *prefilter_rect) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_write_feature); + STUB_ASSERT(KERNEL_ARCH, filter_write_feature); #else - kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); + kernel_filter_write_feature( + sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -113,35 +122,31 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, int pass_stride) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); + STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); #else - kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride); + kernel_filter_detect_outliers( + x, y, image, variance, depth, output, load_int4(rect), pass_stride); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, - float *mean, - float *variance, - float *a, - float *b, - int* prefilter_rect, - int r) +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( + int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); + STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); #else - kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); + kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, TileInfo *tile_info, int x, int y, int storage_ofs, float *transform, int *rank, - int* prefilter_rect, + int *prefilter_rect, int pass_stride, int frame_stride, bool use_time, @@ -149,21 +154,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, float pca_threshold) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); + STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); #else - rank += storage_ofs; - transform += storage_ofs*TRANSFORM_SIZE; - kernel_filter_construct_transform(buffer, - tile_info, - x, y, - load_int4(prefilter_rect), - pass_stride, - frame_stride, - use_time, - transform, - rank, - radius, - pca_threshold); + rank += storage_ofs; + transform += storage_ofs * TRANSFORM_SIZE; + kernel_filter_construct_transform(buffer, + tile_info, + x, + y, + load_int4(prefilter_rect), + pass_stride, + frame_stride, + use_time, + transform, + rank, + radius, + pca_threshold); #endif } @@ -181,44 +187,40 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, float k_2) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); #else - kernel_filter_nlm_calc_difference(dx, dy, - weight_image, - variance_image, - scale_image, - difference_image, - load_int4(rect), - stride, - channel_offset, - frame_offset, - a, k_2); + kernel_filter_nlm_calc_difference(dx, + dy, + weight_image, + variance_image, + scale_image, + difference_image, + load_int4(rect), + stride, + channel_offset, + frame_offset, + a, + k_2); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image, - float *out_image, - int *rect, - int stride, - int f) +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( + float *difference_image, float *out_image, int *rect, int stride, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); #else - kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f); + kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image, - float *out_image, - int *rect, - int stride, - int f) +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( + float *difference_image, float *out_image, int *rect, int stride, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); #else - kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f); + kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f); #endif } @@ -235,17 +237,19 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); #else - kernel_filter_nlm_update_output(dx, dy, - difference_image, - image, - temp_image, - out_image, - accum_image, - load_int4(rect), - channel_offset, - stride, f); + kernel_filter_nlm_update_output(dx, + dy, + difference_image, + image, + temp_image, + out_image, + accum_image, + load_int4(rect), + channel_offset, + stride, + f); #endif } @@ -267,19 +271,24 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, bool use_time) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, t, - difference_image, - buffer, - transform, rank, - XtWX, XtWY, - load_int4(rect), - load_int4(filter_window), - stride, f, - pass_stride, - frame_offset, - use_time); + kernel_filter_nlm_construct_gramian(dx, + dy, + t, + difference_image, + buffer, + transform, + rank, + XtWX, + XtWY, + load_int4(rect), + load_int4(filter_window), + stride, + f, + pass_stride, + frame_offset, + use_time); #endif } @@ -289,9 +298,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, int stride) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); #else - kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride); + kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride); #endif } @@ -306,12 +315,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, int sample) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_finalize); + STUB_ASSERT(KERNEL_ARCH, filter_finalize); #else - XtWX += storage_ofs*XTWX_SIZE; - XtWY += storage_ofs*XTWY_SIZE; - rank += storage_ofs; - kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); + XtWX += storage_ofs * XTWX_SIZE; + XtWY += storage_ofs * XTWY_SIZE; + rank += storage_ofs; + kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp index f7c9935f1d0..6c6c3e78696 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp @@ -27,7 +27,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp index 070b95a3505..e2243000331 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp @@ -29,7 +29,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp index 254025be4e2..068889365e3 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp @@ -31,7 +31,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse41 diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index de487f6123f..f2146302a27 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -53,7 +53,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/kernel.h" @@ -66,29 +66,27 @@ CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size) { - if(strcmp(name, "__data") == 0) - memcpy(&kg->__data, host, size); - else - assert(0); + if (strcmp(name, "__data") == 0) + memcpy(&kg->__data, host, size); + else + assert(0); } -void kernel_tex_copy(KernelGlobals *kg, - const char *name, - void *mem, - size_t size) +void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) { - if(0) { - } + if (0) { + } #define KERNEL_TEX(type, tname) \ - else if(strcmp(name, #tname) == 0) { \ - kg->tname.data = (type*)mem; \ - kg->tname.width = size; \ - } + else if (strcmp(name, #tname) == 0) \ + { \ + kg->tname.data = (type *)mem; \ + kg->tname.width = size; \ + } #include "kernel/kernel_textures.h" - else { - assert(0); - } + else { + assert(0); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index a645fb4d8dd..0656fc9dd00 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index 6bbb87727b9..5baafdc699e 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -33,7 +33,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 6bdb8546a24..f5d981fb71a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -16,25 +16,24 @@ /* Templated common declaration part of all CPU kernels. */ -void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, - float *buffer, - int sample, - int x, int y, - int offset, - int stride); +void KERNEL_FUNCTION_FULL_NAME(path_trace)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, - int offset, int stride); + int x, + int y, + int offset, + int stride); void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride); @@ -49,24 +48,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, /* Split kernels */ -void KERNEL_FUNCTION_FULL_NAME(data_init)( - KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, - int queuesize, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, - ccl_global float *buffer); +void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + int start_sample, + int end_sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data); DECLARE_SPLIT_KERNEL_FUNCTION(path_init) DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index ae4fd85780d..4289e2bbb85 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -19,523 +19,508 @@ CCL_NAMESPACE_BEGIN -template<typename T> struct TextureInterpolator { +template<typename T> struct TextureInterpolator { #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ - u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ - u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void) 0 - - static ccl_always_inline float4 read(float4 r) - { - return r; - } - - static ccl_always_inline float4 read(uchar4 r) - { - float f = 1.0f / 255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - static ccl_always_inline float4 read(uchar r) - { - float f = r * (1.0f / 255.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(float r) - { - /* TODO(dingto): Optimize this, so interpolation - * happens on float instead of float4 */ - return make_float4(r, r, r, 1.0f); - } - - static ccl_always_inline float4 read(half4 r) - { - return half4_to_float4(r); - } - - static ccl_always_inline float4 read(half r) - { - float f = half_to_float(r); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(uint16_t r) - { - float f = r*(1.0f/65535.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(ushort4 r) - { - float f = 1.0f/65535.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - static ccl_always_inline float4 read(const T *data, - int x, int y, - int width, int height) - { - if(x < 0 || y < 0 || x >= width || y >= height) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[y * width + x]); - } - - static ccl_always_inline int wrap_periodic(int x, int width) - { - x %= width; - if(x < 0) - x += width; - return x; - } - - static ccl_always_inline int wrap_clamp(int x, int width) - { - return clamp(x, 0, width-1); - } - - static ccl_always_inline float frac(float x, int *ix) - { - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; - } - - /* ******** 2D interpolation ******** */ - - static ccl_always_inline float4 interp_closest(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy; - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[ix + iy*width]); - } - - static ccl_always_inline float4 interp_linear(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - break; - case EXTENSION_CLIP: - nix = ix + 1; - niy = iy + 1; - break; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) + - (1.0f - ty) * tx * read(data, nix, iy, width, height) + - ty * (1.0f - tx) * read(data, ix, niy, width, height) + - ty * tx * read(data, nix, niy, width, height); - } - - static ccl_always_inline float4 interp_cubic(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - int pix, piy, nnix, nniy; - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - break; - case EXTENSION_CLIP: - pix = ix - 1; - piy = iy - 1; - nix = ix + 1; - niy = iy + 1; - nnix = ix + 2; - nniy = iy + 2; - break; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {piy, iy, niy, nniy}; - float u[4], v[4]; - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + { \ + u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ + u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ + u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } \ + (void)0 + + static ccl_always_inline float4 read(float4 r) + { + return r; + } + + static ccl_always_inline float4 read(uchar4 r) + { + float f = 1.0f / 255.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + + static ccl_always_inline float4 read(uchar r) + { + float f = r * (1.0f / 255.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(float r) + { + /* TODO(dingto): Optimize this, so interpolation + * happens on float instead of float4 */ + return make_float4(r, r, r, 1.0f); + } + + static ccl_always_inline float4 read(half4 r) + { + return half4_to_float4(r); + } + + static ccl_always_inline float4 read(half r) + { + float f = half_to_float(r); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(uint16_t r) + { + float f = r * (1.0f / 65535.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(ushort4 r) + { + float f = 1.0f / 65535.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + + static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height) + { + if (x < 0 || y < 0 || x >= width || y >= height) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[y * width + x]); + } + + static ccl_always_inline int wrap_periodic(int x, int width) + { + x %= width; + if (x < 0) + x += width; + return x; + } + + static ccl_always_inline int wrap_clamp(int x, int width) + { + return clamp(x, 0, width - 1); + } + + static ccl_always_inline float frac(float x, int *ix) + { + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; + } + + /* ******** 2D interpolation ******** */ + + static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy; + frac(x * (float)width, &ix); + frac(y * (float)height, &iy); + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[ix + iy * width]); + } + + static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy, nix, niy; + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + break; + case EXTENSION_CLIP: + nix = ix + 1; + niy = iy + 1; + break; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) + + (1.0f - ty) * tx * read(data, nix, iy, width, height) + + ty * (1.0f - tx) * read(data, ix, niy, width, height) + + ty * tx * read(data, nix, niy, width, height); + } + + static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy, nix, niy; + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + int pix, piy, nnix, nniy; + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + pix = wrap_periodic(ix - 1, width); + piy = wrap_periodic(iy - 1, height); + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + nnix = wrap_periodic(ix + 2, width); + nniy = wrap_periodic(iy + 2, height); + break; + case EXTENSION_CLIP: + pix = ix - 1; + piy = iy - 1; + nix = ix + 1; + niy = iy + 1; + nnix = ix + 2; + nniy = iy + 2; + break; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix - 1, width); + piy = wrap_clamp(iy - 1, height); + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + nnix = wrap_clamp(ix + 2, width); + nniy = wrap_clamp(iy + 2, height); + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {piy, iy, niy, nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y) (read(data, xc[x], yc[y], width, height)) #define TERM(col) \ - (v[col] * (u[0] * DATA(0, col) + \ - u[1] * DATA(1, col) + \ - u[2] * DATA(2, col) + \ - u[3] * DATA(3, col))) + (v[col] * \ + (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); - /* Actual interpolation. */ - return TERM(0) + TERM(1) + TERM(2) + TERM(3); + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); #undef TERM #undef DATA - } - - static ccl_always_inline float4 interp(const TextureInfo& info, - float x, float y) - { - if(UNLIKELY(!info.data)) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - switch(info.interpolation) { - case INTERPOLATION_CLOSEST: - return interp_closest(info, x, y); - case INTERPOLATION_LINEAR: - return interp_linear(info, x, y); - default: - return interp_cubic(info, x, y); - } - } - - /* ******** 3D interpolation ******** */ - - static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, - float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T*)info.data; - return read(data[ix + iy*width + iz*width*height]); - } - - static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, - float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T*)info.data; - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; - } - - /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are - * causing stack overflow issue in this function unless it is inlined. - * - * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization - * enabled. - */ + } + + static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y) + { + if (UNLIKELY(!info.data)) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + switch (info.interpolation) { + case INTERPOLATION_CLOSEST: + return interp_closest(info, x, y); + case INTERPOLATION_LINEAR: + return interp_linear(info, x, y); + default: + return interp_cubic(info, x, y); + } + } + + /* ******** 3D interpolation ******** */ + + static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info, + float x, + float y, + float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + + frac(x * (float)width, &ix); + frac(y * (float)height, &iy); + frac(z * (float)depth, &iz); + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T *)info.data; + return read(data[ix + iy * width + iz * width * height]); + } + + static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info, + float x, + float y, + float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x * (float)width - 0.5f, &ix); + float ty = frac(y * (float)height - 0.5f, &iy); + float tz = frac(z * (float)depth - 0.5f, &iz); + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + niz = wrap_periodic(iz + 1, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + niz = wrap_clamp(iz + 1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T *)info.data; + float4 r; + + r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * + read(data[ix + iy * width + iz * width * height]); + r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]); + r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]); + r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]); + + r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]); + r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]); + r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]); + r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]); + + return r; + } + + /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are + * causing stack overflow issue in this function unless it is inlined. + * + * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization + * enabled. + */ #if defined(__GNUC__) || defined(__clang__) - static ccl_always_inline + static ccl_always_inline #else - static ccl_never_inline + static ccl_never_inline #endif - float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + float4 + interp_3d_tricubic(const TextureInfo &info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + const float tz = frac(z * (float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix - 1, width); + piy = wrap_periodic(iy - 1, height); + piz = wrap_periodic(iz - 1, depth); + + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + niz = wrap_periodic(iz + 1, depth); + + nnix = wrap_periodic(ix + 2, width); + nniy = wrap_periodic(iy + 2, height); + nniz = wrap_periodic(iz + 2, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix - 1, width); + piy = wrap_clamp(iy - 1, height); + piz = wrap_clamp(iz - 1, depth); + + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + niz = wrap_clamp(iz + 1, depth); + + nnix = wrap_clamp(ix + 2, width); + nniy = wrap_clamp(iy + 2, height); + nniz = wrap_clamp(iz + 2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, width * iy, width * niy, width * nniy}; + const int zc[4] = { + width * height * piz, width * height * iz, width * height * niz, width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) #define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) + (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) #define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) + (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); - /* Actual interpolation. */ - const T *data = (const T*)info.data; - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + /* Actual interpolation. */ + const T *data = (const T *)info.data; + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); #undef COL_TERM #undef ROW_TERM #undef DATA - } - - static ccl_always_inline float4 interp_3d(const TextureInfo& info, - float x, float y, float z, - InterpolationType interp) - { - if(UNLIKELY(!info.data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) { - case INTERPOLATION_CLOSEST: - return interp_3d_closest(info, x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_linear(info, x, y, z); - default: - return interp_3d_tricubic(info, x, y, z); - } - } + } + + static ccl_always_inline float4 + interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp) + { + if (UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { + case INTERPOLATION_CLOSEST: + return interp_3d_closest(info, x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_linear(info, x, y, z); + default: + return interp_3d_tricubic(info, x, y, z); + } + } #undef SET_CUBIC_SPLINE_WEIGHTS }; ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - - switch(kernel_tex_type(id)) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp(info, x, y); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp(info, x, y); - default: - assert(0); - return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + switch (kernel_tex_type(id)) { + case IMAGE_DATA_TYPE_HALF: + return TextureInterpolator<half>::interp(info, x, y); + case IMAGE_DATA_TYPE_BYTE: + return TextureInterpolator<uchar>::interp(info, x, y); + case IMAGE_DATA_TYPE_USHORT: + return TextureInterpolator<uint16_t>::interp(info, x, y); + case IMAGE_DATA_TYPE_FLOAT: + return TextureInterpolator<float>::interp(info, x, y); + case IMAGE_DATA_TYPE_HALF4: + return TextureInterpolator<half4>::interp(info, x, y); + case IMAGE_DATA_TYPE_BYTE4: + return TextureInterpolator<uchar4>::interp(info, x, y); + case IMAGE_DATA_TYPE_USHORT4: + return TextureInterpolator<ushort4>::interp(info, x, y); + case IMAGE_DATA_TYPE_FLOAT4: + return TextureInterpolator<float4>::interp(info, x, y); + default: + assert(0); + return make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } } -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +ccl_device float4 kernel_tex_image_interp_3d( + KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - - switch(kernel_tex_type(id)) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); - default: - assert(0); - return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + switch (kernel_tex_type(id)) { + case IMAGE_DATA_TYPE_HALF: + return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_BYTE: + return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_USHORT: + return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_FLOAT: + return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_HALF4: + return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_BYTE4: + return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_USHORT4: + return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_FLOAT4: + return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); + default: + assert(0); + return make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 759b7e4c20d..9ca3f46b5b6 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -58,14 +58,15 @@ # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" -# endif /* __SPLIT_KERNEL__ */ +# endif /* __SPLIT_KERNEL__ */ #else -# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +# define STUB_ASSERT(arch, name) \ + assert(!(#name " kernel stub for architecture " #arch " was called!")) # ifdef __SPLIT_KERNEL__ # include "kernel/split/kernel_data_init.h" -# endif /* __SPLIT_KERNEL__ */ -#endif /* KERNEL_STUB */ +# endif /* __SPLIT_KERNEL__ */ +#endif /* KERNEL_STUB */ CCL_NAMESPACE_BEGIN @@ -73,31 +74,22 @@ CCL_NAMESPACE_BEGIN /* Path Tracing */ -void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, - float *buffer, - int sample, - int x, int y, - int offset, - int stride) +void KERNEL_FUNCTION_FULL_NAME(path_trace)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, path_trace); -#else -# ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) { - kernel_branched_path_trace(kg, - buffer, - sample, - x, y, - offset, - stride); - } - else -# endif - { - kernel_path_trace(kg, buffer, sample, x, y, offset, stride); - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, path_trace); +# else +# ifdef __BRANCHED_PATH__ + if (kernel_data.integrator.branched) { + kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride); + } + else +# endif + { + kernel_path_trace(kg, buffer, sample, x, y, offset, stride); + } +# endif /* KERNEL_STUB */ } /* Film */ @@ -106,42 +98,32 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_byte); -#else - kernel_film_convert_to_byte(kg, - rgba, - buffer, - sample_scale, - x, y, - offset, - stride); -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_byte); +# else + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +# endif /* KERNEL_STUB */ } void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); -#else - kernel_film_convert_to_half_float(kg, - rgba, - buffer, - sample_scale, - x, y, - offset, - stride); -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); +# else + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +# endif /* KERNEL_STUB */ } /* Shader Evaluate */ @@ -155,60 +137,53 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, shader); -#else - if(type >= SHADER_EVAL_BAKE) { -# ifdef __BAKING__ - kernel_bake_evaluate(kg, - input, - output, - (ShaderEvalType)type, - filter, - i, - offset, - sample); -# endif - } - else if(type == SHADER_EVAL_DISPLACE) { - kernel_displace_evaluate(kg, input, output, i); - } - else { - kernel_background_evaluate(kg, input, output, i); - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, shader); +# else + if (type >= SHADER_EVAL_BAKE) { +# ifdef __BAKING__ + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample); +# endif + } + else if (type == SHADER_EVAL_DISPLACE) { + kernel_displace_evaluate(kg, input, output, i); + } + else { + kernel_background_evaluate(kg, input, output, i); + } +# endif /* KERNEL_STUB */ } -#else /* __SPLIT_KERNEL__ */ +#else /* __SPLIT_KERNEL__ */ /* Split Kernel Path Tracing */ -#ifdef KERNEL_STUB -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } -#else -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - kernel_##name(kg); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - ccl_local type locals; \ - kernel_##name(kg, &locals); \ - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + STUB_ASSERT(KERNEL_ARCH, name); \ + } + +# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + STUB_ASSERT(KERNEL_ARCH, name); \ + } +# else +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + kernel_##name(kg); \ + } + +# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + ccl_local type locals; \ + kernel_##name(kg, &locals); \ + } +# endif /* KERNEL_STUB */ DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) @@ -219,7 +194,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, + BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) @@ -228,7 +204,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) -#endif /* __SPLIT_KERNEL__ */ +#endif /* __SPLIT_KERNEL__ */ #undef KERNEL_STUB #undef STUB_ASSERT diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp index c5e199b0a69..989f5e5aaa8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp @@ -54,7 +54,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/kernel.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp index 6ba3425a343..1b2e2516751 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -34,7 +34,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp index 76b2d77ebb8..43b8bfbf864 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -35,7 +35,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp index b468b6f44c8..9743789179d 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -29,7 +29,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp index 3e5792d0b17..1bec7633500 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -31,7 +31,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp index 3629f21cd29..c0efc2350e9 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse41 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index 57530c88710..173be8e93ce 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -27,7 +27,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index c607753bc4b..31273fe3344 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -29,7 +29,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index a278554731c..1d020b7fee6 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -30,7 +30,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse41 |