Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h42
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h229
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp34
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h53
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h965
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h182
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_config.h14
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h237
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h425
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h59
27 files changed, 1116 insertions, 1158 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp
index 2ff1a392dc3..145a6b6ac40 100644
--- a/intern/cycles/kernel/kernels/cpu/filter.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter.cpp
@@ -53,7 +53,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/filter/filter.h"
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
index 4a9e6047ecf..1d68214c8e7 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
index c22ec576254..b6709fbc529 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp
@@ -33,7 +33,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 02c85562db8..1423b182ab8 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -25,7 +25,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *sampleV,
float *sampleVV,
float *bufferV,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset);
@@ -38,7 +38,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *mean,
float *variance,
float scale,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset);
@@ -49,9 +49,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
float *from,
float *buffer,
int out_offset,
- int* prefilter_rect);
+ int *prefilter_rect);
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -59,22 +60,17 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
int *rect,
int pass_stride);
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
- float *mean,
- float *variance,
- float *a,
- float *b,
- int* prefilter_rect,
- int r);
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+ int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r);
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
TileInfo *tiles,
int x,
int y,
int storage_ofs,
float *transform,
int *rank,
- int* rect,
+ int *rect,
int pass_stride,
int frame_stride,
bool use_time,
@@ -87,24 +83,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
float *variance_image,
float *scale_image,
float *difference_image,
- int* rect,
+ int *rect,
int stride,
int channel_offset,
int frame_offset,
float a,
float k_2);
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
- float *out_image,
- int* rect,
- int stride,
- int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+ float *difference_image, float *out_image, int *rect, int stride, int f);
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
- float *out_image,
- int* rect,
- int stride,
- int f);
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+ float *difference_image, float *out_image, int *rect, int stride, int f);
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int dy,
@@ -113,7 +103,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
float *temp_image,
float *out_image,
float *accum_image,
- int* rect,
+ int *rect,
int channel_offset,
int stride,
int f);
@@ -137,7 +127,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
float *accum_image,
- int* rect,
+ int *rect,
int stride);
void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index c29505880cb..3d4cb87e104 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -25,12 +25,12 @@
#include "kernel/filter/filter_kernel.h"
#ifdef KERNEL_STUB
-# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+# define STUB_ASSERT(arch, name) \
+ assert(!(#name " kernel stub for architecture " #arch " was called!"))
#endif
CCL_NAMESPACE_BEGIN
-
/* Denoise filter */
void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
@@ -42,23 +42,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *sampleVariance,
float *sampleVarianceV,
float *bufferVariance,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
+ STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
#else
- kernel_filter_divide_shadow(sample, tile_info,
- x, y,
- unfilteredA,
- unfilteredB,
- sampleVariance,
- sampleVarianceV,
- bufferVariance,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
+ kernel_filter_divide_shadow(sample,
+ tile_info,
+ x,
+ y,
+ unfilteredA,
+ unfilteredB,
+ sampleVariance,
+ sampleVarianceV,
+ bufferVariance,
+ load_int4(prefilter_rect),
+ buffer_pass_stride,
+ buffer_denoising_offset);
#endif
}
@@ -68,23 +70,28 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int v_offset,
int x,
int y,
- float *mean, float *variance,
+ float *mean,
+ float *variance,
float scale,
- int* prefilter_rect,
+ int *prefilter_rect,
int buffer_pass_stride,
int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
+ STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
#else
- kernel_filter_get_feature(sample, tile_info,
- m_offset, v_offset,
- x, y,
- mean, variance,
- scale,
- load_int4(prefilter_rect),
- buffer_pass_stride,
- buffer_denoising_offset);
+ kernel_filter_get_feature(sample,
+ tile_info,
+ m_offset,
+ v_offset,
+ x,
+ y,
+ mean,
+ variance,
+ scale,
+ load_int4(prefilter_rect),
+ buffer_pass_stride,
+ buffer_denoising_offset);
#endif
}
@@ -95,16 +102,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample,
float *from,
float *buffer,
int out_offset,
- int* prefilter_rect)
+ int *prefilter_rect)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
+ STUB_ASSERT(KERNEL_ARCH, filter_write_feature);
#else
- kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
+ kernel_filter_write_feature(
+ sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect));
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
+void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x,
+ int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
@@ -113,35 +122,31 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
int pass_stride)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
+ STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
#else
- kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
+ kernel_filter_detect_outliers(
+ x, y, image, variance, depth, output, load_int4(rect), pass_stride);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
- float *mean,
- float *variance,
- float *a,
- float *b,
- int* prefilter_rect,
- int r)
+void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(
+ int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
+ STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
#else
- kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
+ kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
+void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer,
TileInfo *tile_info,
int x,
int y,
int storage_ofs,
float *transform,
int *rank,
- int* prefilter_rect,
+ int *prefilter_rect,
int pass_stride,
int frame_stride,
bool use_time,
@@ -149,21 +154,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer,
float pca_threshold)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
+ STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
#else
- rank += storage_ofs;
- transform += storage_ofs*TRANSFORM_SIZE;
- kernel_filter_construct_transform(buffer,
- tile_info,
- x, y,
- load_int4(prefilter_rect),
- pass_stride,
- frame_stride,
- use_time,
- transform,
- rank,
- radius,
- pca_threshold);
+ rank += storage_ofs;
+ transform += storage_ofs * TRANSFORM_SIZE;
+ kernel_filter_construct_transform(buffer,
+ tile_info,
+ x,
+ y,
+ load_int4(prefilter_rect),
+ pass_stride,
+ frame_stride,
+ use_time,
+ transform,
+ rank,
+ radius,
+ pca_threshold);
#endif
}
@@ -181,44 +187,40 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx,
float k_2)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference);
#else
- kernel_filter_nlm_calc_difference(dx, dy,
- weight_image,
- variance_image,
- scale_image,
- difference_image,
- load_int4(rect),
- stride,
- channel_offset,
- frame_offset,
- a, k_2);
+ kernel_filter_nlm_calc_difference(dx,
+ dy,
+ weight_image,
+ variance_image,
+ scale_image,
+ difference_image,
+ load_int4(rect),
+ stride,
+ channel_offset,
+ frame_offset,
+ a,
+ k_2);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image,
- float *out_image,
- int *rect,
- int stride,
- int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(
+ float *difference_image, float *out_image, int *rect, int stride, int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur);
#else
- kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f);
#endif
}
-void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image,
- float *out_image,
- int *rect,
- int stride,
- int f)
+void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(
+ float *difference_image, float *out_image, int *rect, int stride, int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight);
#else
- kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
+ kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f);
#endif
}
@@ -235,17 +237,19 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx,
int f)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output);
#else
- kernel_filter_nlm_update_output(dx, dy,
- difference_image,
- image,
- temp_image,
- out_image,
- accum_image,
- load_int4(rect),
- channel_offset,
- stride, f);
+ kernel_filter_nlm_update_output(dx,
+ dy,
+ difference_image,
+ image,
+ temp_image,
+ out_image,
+ accum_image,
+ load_int4(rect),
+ channel_offset,
+ stride,
+ f);
#endif
}
@@ -267,19 +271,24 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx,
bool use_time)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian);
#else
- kernel_filter_nlm_construct_gramian(dx, dy, t,
- difference_image,
- buffer,
- transform, rank,
- XtWX, XtWY,
- load_int4(rect),
- load_int4(filter_window),
- stride, f,
- pass_stride,
- frame_offset,
- use_time);
+ kernel_filter_nlm_construct_gramian(dx,
+ dy,
+ t,
+ difference_image,
+ buffer,
+ transform,
+ rank,
+ XtWX,
+ XtWY,
+ load_int4(rect),
+ load_int4(filter_window),
+ stride,
+ f,
+ pass_stride,
+ frame_offset,
+ use_time);
#endif
}
@@ -289,9 +298,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image,
int stride)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
+ STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize);
#else
- kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
+ kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride);
#endif
}
@@ -306,12 +315,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x,
int sample)
{
#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, filter_finalize);
+ STUB_ASSERT(KERNEL_ARCH, filter_finalize);
#else
- XtWX += storage_ofs*XTWX_SIZE;
- XtWY += storage_ofs*XTWY_SIZE;
- rank += storage_ofs;
- kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
+ XtWX += storage_ofs * XTWX_SIZE;
+ XtWY += storage_ofs * XTWY_SIZE;
+ rank += storage_ofs;
+ kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
index f7c9935f1d0..6c6c3e78696 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp
@@ -27,7 +27,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
index 070b95a3505..e2243000331 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp
@@ -29,7 +29,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
index 254025be4e2..068889365e3 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
@@ -31,7 +31,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/filter/filter.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index de487f6123f..f2146302a27 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -53,7 +53,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/kernel.h"
@@ -66,29 +66,27 @@ CCL_NAMESPACE_BEGIN
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
{
- if(strcmp(name, "__data") == 0)
- memcpy(&kg->__data, host, size);
- else
- assert(0);
+ if (strcmp(name, "__data") == 0)
+ memcpy(&kg->__data, host, size);
+ else
+ assert(0);
}
-void kernel_tex_copy(KernelGlobals *kg,
- const char *name,
- void *mem,
- size_t size)
+void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
{
- if(0) {
- }
+ if (0) {
+ }
#define KERNEL_TEX(type, tname) \
- else if(strcmp(name, #tname) == 0) { \
- kg->tname.data = (type*)mem; \
- kg->tname.width = size; \
- }
+ else if (strcmp(name, #tname) == 0) \
+ { \
+ kg->tname.data = (type *)mem; \
+ kg->tname.width = size; \
+ }
#include "kernel/kernel_textures.h"
- else {
- assert(0);
- }
+ else {
+ assert(0);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
index a645fb4d8dd..0656fc9dd00 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
index 6bbb87727b9..5baafdc699e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
@@ -33,7 +33,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 6bdb8546a24..f5d981fb71a 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -16,25 +16,24 @@
/* Templated common declaration part of all CPU kernels. */
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
- float *buffer,
- int sample,
- int x, int y,
- int offset,
- int stride);
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
- int offset, int stride);
+ int x,
+ int y,
+ int offset,
+ int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride);
@@ -49,24 +48,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
/* Split kernels */
-void KERNEL_FUNCTION_FULL_NAME(data_init)(
- KernelGlobals *kg,
- ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- int num_elements,
- ccl_global char *ray_state,
- int start_sample,
- int end_sample,
- int sx, int sy, int sw, int sh, int offset, int stride,
- ccl_global int *Queue_index,
- int queuesize,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pool_wgs,
- unsigned int num_samples,
- ccl_global float *buffer);
+void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ ccl_global void *split_data_buffer,
+ int num_elements,
+ ccl_global char *ray_state,
+ int start_sample,
+ int end_sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride,
+ ccl_global int *Queue_index,
+ int queuesize,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pool_wgs,
+ unsigned int num_samples,
+ ccl_global float *buffer);
#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data);
DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index ae4fd85780d..4289e2bbb85 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -19,523 +19,508 @@
CCL_NAMESPACE_BEGIN
-template<typename T> struct TextureInterpolator {
+template<typename T> struct TextureInterpolator {
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
- u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
- u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void) 0
-
- static ccl_always_inline float4 read(float4 r)
- {
- return r;
- }
-
- static ccl_always_inline float4 read(uchar4 r)
- {
- float f = 1.0f / 255.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
-
- static ccl_always_inline float4 read(uchar r)
- {
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(float r)
- {
- /* TODO(dingto): Optimize this, so interpolation
- * happens on float instead of float4 */
- return make_float4(r, r, r, 1.0f);
- }
-
- static ccl_always_inline float4 read(half4 r)
- {
- return half4_to_float4(r);
- }
-
- static ccl_always_inline float4 read(half r)
- {
- float f = half_to_float(r);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(uint16_t r)
- {
- float f = r*(1.0f/65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
-
- static ccl_always_inline float4 read(ushort4 r)
- {
- float f = 1.0f/65535.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
-
- static ccl_always_inline float4 read(const T *data,
- int x, int y,
- int width, int height)
- {
- if(x < 0 || y < 0 || x >= width || y >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[y * width + x]);
- }
-
- static ccl_always_inline int wrap_periodic(int x, int width)
- {
- x %= width;
- if(x < 0)
- x += width;
- return x;
- }
-
- static ccl_always_inline int wrap_clamp(int x, int width)
- {
- return clamp(x, 0, width-1);
- }
-
- static ccl_always_inline float frac(float x, int *ix)
- {
- int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
- *ix = i;
- return x - (float)i;
- }
-
- /* ******** 2D interpolation ******** */
-
- static ccl_always_inline float4 interp_closest(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy;
- frac(x*(float)width, &ix);
- frac(y*(float)height, &iy);
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return read(data[ix + iy*width]);
- }
-
- static ccl_always_inline float4 interp_linear(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- break;
- case EXTENSION_CLIP:
- nix = ix + 1;
- niy = iy + 1;
- break;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
- (1.0f - ty) * tx * read(data, nix, iy, width, height) +
- ty * (1.0f - tx) * read(data, ix, niy, width, height) +
- ty * tx * read(data, nix, niy, width, height);
- }
-
- static ccl_always_inline float4 interp_cubic(const TextureInfo& info,
- float x, float y)
- {
- const T *data = (const T*)info.data;
- const int width = info.width;
- const int height = info.height;
- int ix, iy, nix, niy;
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- int pix, piy, nnix, nniy;
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- pix = wrap_periodic(ix-1, width);
- piy = wrap_periodic(iy-1, height);
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- nnix = wrap_periodic(ix+2, width);
- nniy = wrap_periodic(iy+2, height);
- break;
- case EXTENSION_CLIP:
- pix = ix - 1;
- piy = iy - 1;
- nix = ix + 1;
- niy = iy + 1;
- nnix = ix + 2;
- nniy = iy + 2;
- break;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix-1, width);
- piy = wrap_clamp(iy-1, height);
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- nnix = wrap_clamp(ix+2, width);
- nniy = wrap_clamp(iy+2, height);
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {piy, iy, niy, nniy};
- float u[4], v[4];
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
+ { \
+ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+ u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+ u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } \
+ (void)0
+
+ static ccl_always_inline float4 read(float4 r)
+ {
+ return r;
+ }
+
+ static ccl_always_inline float4 read(uchar4 r)
+ {
+ float f = 1.0f / 255.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+
+ static ccl_always_inline float4 read(uchar r)
+ {
+ float f = r * (1.0f / 255.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(float r)
+ {
+ /* TODO(dingto): Optimize this, so interpolation
+ * happens on float instead of float4 */
+ return make_float4(r, r, r, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(half4 r)
+ {
+ return half4_to_float4(r);
+ }
+
+ static ccl_always_inline float4 read(half r)
+ {
+ float f = half_to_float(r);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(uint16_t r)
+ {
+ float f = r * (1.0f / 65535.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+
+ static ccl_always_inline float4 read(ushort4 r)
+ {
+ float f = 1.0f / 65535.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+
+ static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
+ {
+ if (x < 0 || y < 0 || x >= width || y >= height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return read(data[y * width + x]);
+ }
+
+ static ccl_always_inline int wrap_periodic(int x, int width)
+ {
+ x %= width;
+ if (x < 0)
+ x += width;
+ return x;
+ }
+
+ static ccl_always_inline int wrap_clamp(int x, int width)
+ {
+ return clamp(x, 0, width - 1);
+ }
+
+ static ccl_always_inline float frac(float x, int *ix)
+ {
+ int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+ *ix = i;
+ return x - (float)i;
+ }
+
+ /* ******** 2D interpolation ******** */
+
+ static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy;
+ frac(x * (float)width, &ix);
+ frac(y * (float)height, &iy);
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return read(data[ix + iy * width]);
+ }
+
+ static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy, nix, niy;
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ break;
+ case EXTENSION_CLIP:
+ nix = ix + 1;
+ niy = iy + 1;
+ break;
+ case EXTENSION_EXTEND:
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
+ (1.0f - ty) * tx * read(data, nix, iy, width, height) +
+ ty * (1.0f - tx) * read(data, ix, niy, width, height) +
+ ty * tx * read(data, nix, niy, width, height);
+ }
+
+ static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
+ {
+ const T *data = (const T *)info.data;
+ const int width = info.width;
+ const int height = info.height;
+ int ix, iy, nix, niy;
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ int pix, piy, nnix, nniy;
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ pix = wrap_periodic(ix - 1, width);
+ piy = wrap_periodic(iy - 1, height);
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ nnix = wrap_periodic(ix + 2, width);
+ nniy = wrap_periodic(iy + 2, height);
+ break;
+ case EXTENSION_CLIP:
+ pix = ix - 1;
+ piy = iy - 1;
+ nix = ix + 1;
+ niy = iy + 1;
+ nnix = ix + 2;
+ nniy = iy + 2;
+ break;
+ case EXTENSION_EXTEND:
+ pix = wrap_clamp(ix - 1, width);
+ piy = wrap_clamp(iy - 1, height);
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ nnix = wrap_clamp(ix + 2, width);
+ nniy = wrap_clamp(iy + 2, height);
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {piy, iy, niy, nniy};
+ float u[4], v[4];
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
#define DATA(x, y) (read(data, xc[x], yc[y], width, height))
#define TERM(col) \
- (v[col] * (u[0] * DATA(0, col) + \
- u[1] * DATA(1, col) + \
- u[2] * DATA(2, col) + \
- u[3] * DATA(3, col)))
+ (v[col] * \
+ (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- /* Actual interpolation. */
- return TERM(0) + TERM(1) + TERM(2) + TERM(3);
+ /* Actual interpolation. */
+ return TERM(0) + TERM(1) + TERM(2) + TERM(3);
#undef TERM
#undef DATA
- }
-
- static ccl_always_inline float4 interp(const TextureInfo& info,
- float x, float y)
- {
- if(UNLIKELY(!info.data)) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- switch(info.interpolation) {
- case INTERPOLATION_CLOSEST:
- return interp_closest(info, x, y);
- case INTERPOLATION_LINEAR:
- return interp_linear(info, x, y);
- default:
- return interp_cubic(info, x, y);
- }
- }
-
- /* ******** 3D interpolation ******** */
-
- static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info,
- float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
-
- frac(x*(float)width, &ix);
- frac(y*(float)height, &iy);
- frac(z*(float)depth, &iz);
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T*)info.data;
- return read(data[ix + iy*width + iz*width*height]);
- }
-
- static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
- float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
-
- float tx = frac(x*(float)width - 0.5f, &ix);
- float ty = frac(y*(float)height - 0.5f, &iy);
- float tz = frac(z*(float)depth - 0.5f, &iz);
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- niz = wrap_periodic(iz+1, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- niz = wrap_clamp(iz+1, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const T *data = (const T*)info.data;
- float4 r;
-
- r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
- r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
- r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
- r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
-
- r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
- r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
- r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
- r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
-
- return r;
- }
-
- /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
- * causing stack overflow issue in this function unless it is inlined.
- *
- * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
- * enabled.
- */
+ }
+
+ static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
+ {
+ if (UNLIKELY(!info.data)) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ switch (info.interpolation) {
+ case INTERPOLATION_CLOSEST:
+ return interp_closest(info, x, y);
+ case INTERPOLATION_LINEAR:
+ return interp_linear(info, x, y);
+ default:
+ return interp_cubic(info, x, y);
+ }
+ }
+
+ /* ******** 3D interpolation ******** */
+
+ static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
+ float x,
+ float y,
+ float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+
+ frac(x * (float)width, &ix);
+ frac(y * (float)height, &iy);
+ frac(z * (float)depth, &iz);
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const T *data = (const T *)info.data;
+ return read(data[ix + iy * width + iz * width * height]);
+ }
+
+ static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info,
+ float x,
+ float y,
+ float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+ int nix, niy, niz;
+
+ float tx = frac(x * (float)width - 0.5f, &ix);
+ float ty = frac(y * (float)height - 0.5f, &iy);
+ float tz = frac(z * (float)depth - 0.5f, &iz);
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ niz = wrap_periodic(iz + 1, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ niz = wrap_clamp(iz + 1, depth);
+
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const T *data = (const T *)info.data;
+ float4 r;
+
+ r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+ read(data[ix + iy * width + iz * width * height]);
+ r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]);
+ r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]);
+ r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]);
+
+ r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]);
+ r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]);
+ r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]);
+ r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]);
+
+ return r;
+ }
+
+ /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
+ * causing stack overflow issue in this function unless it is inlined.
+ *
+ * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
+ * enabled.
+ */
#if defined(__GNUC__) || defined(__clang__)
- static ccl_always_inline
+ static ccl_always_inline
#else
- static ccl_never_inline
+ static ccl_never_inline
#endif
- float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
- {
- int width = info.width;
- int height = info.height;
- int depth = info.depth;
- int ix, iy, iz;
- int nix, niy, niz;
- /* Tricubic b-spline interpolation. */
- const float tx = frac(x*(float)width - 0.5f, &ix);
- const float ty = frac(y*(float)height - 0.5f, &iy);
- const float tz = frac(z*(float)depth - 0.5f, &iz);
- int pix, piy, piz, nnix, nniy, nniz;
-
- switch(info.extension) {
- case EXTENSION_REPEAT:
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
- iz = wrap_periodic(iz, depth);
-
- pix = wrap_periodic(ix-1, width);
- piy = wrap_periodic(iy-1, height);
- piz = wrap_periodic(iz-1, depth);
-
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
- niz = wrap_periodic(iz+1, depth);
-
- nnix = wrap_periodic(ix+2, width);
- nniy = wrap_periodic(iy+2, height);
- nniz = wrap_periodic(iz+2, depth);
- break;
- case EXTENSION_CLIP:
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- ATTR_FALLTHROUGH;
- case EXTENSION_EXTEND:
- pix = wrap_clamp(ix-1, width);
- piy = wrap_clamp(iy-1, height);
- piz = wrap_clamp(iz-1, depth);
-
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
- niz = wrap_clamp(iz+1, depth);
-
- nnix = wrap_clamp(ix+2, width);
- nniy = wrap_clamp(iy+2, height);
- nniz = wrap_clamp(iz+2, depth);
-
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
- iz = wrap_clamp(iz, depth);
- break;
- default:
- kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
-
- const int xc[4] = {pix, ix, nix, nnix};
- const int yc[4] = {width * piy,
- width * iy,
- width * niy,
- width * nniy};
- const int zc[4] = {width * height * piz,
- width * height * iz,
- width * height * niz,
- width * height * nniz};
- float u[4], v[4], w[4];
-
- /* Some helper macro to keep code reasonable size,
- * let compiler to inline all the matrix multiplications.
- */
+ float4
+ interp_3d_tricubic(const TextureInfo &info, float x, float y, float z)
+ {
+ int width = info.width;
+ int height = info.height;
+ int depth = info.depth;
+ int ix, iy, iz;
+ int nix, niy, niz;
+ /* Tricubic b-spline interpolation. */
+ const float tx = frac(x * (float)width - 0.5f, &ix);
+ const float ty = frac(y * (float)height - 0.5f, &iy);
+ const float tz = frac(z * (float)depth - 0.5f, &iz);
+ int pix, piy, piz, nnix, nniy, nniz;
+
+ switch (info.extension) {
+ case EXTENSION_REPEAT:
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ pix = wrap_periodic(ix - 1, width);
+ piy = wrap_periodic(iy - 1, height);
+ piz = wrap_periodic(iz - 1, depth);
+
+ nix = wrap_periodic(ix + 1, width);
+ niy = wrap_periodic(iy + 1, height);
+ niz = wrap_periodic(iz + 1, depth);
+
+ nnix = wrap_periodic(ix + 2, width);
+ nniy = wrap_periodic(iy + 2, height);
+ nniz = wrap_periodic(iz + 2, depth);
+ break;
+ case EXTENSION_CLIP:
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ ATTR_FALLTHROUGH;
+ case EXTENSION_EXTEND:
+ pix = wrap_clamp(ix - 1, width);
+ piy = wrap_clamp(iy - 1, height);
+ piz = wrap_clamp(iz - 1, depth);
+
+ nix = wrap_clamp(ix + 1, width);
+ niy = wrap_clamp(iy + 1, height);
+ niz = wrap_clamp(iz + 1, depth);
+
+ nnix = wrap_clamp(ix + 2, width);
+ nniy = wrap_clamp(iy + 2, height);
+ nniz = wrap_clamp(iz + 2, depth);
+
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ break;
+ default:
+ kernel_assert(0);
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ const int xc[4] = {pix, ix, nix, nnix};
+ const int yc[4] = {width * piy, width * iy, width * niy, width * nniy};
+ const int zc[4] = {
+ width * height * piz, width * height * iz, width * height * niz, width * height * nniz};
+ float u[4], v[4], w[4];
+
+ /* Some helper macro to keep code reasonable size,
+ * let compiler to inline all the matrix multiplications.
+ */
#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
#define COL_TERM(col, row) \
- (v[col] * (u[0] * DATA(0, col, row) + \
- u[1] * DATA(1, col, row) + \
- u[2] * DATA(2, col, row) + \
- u[3] * DATA(3, col, row)))
+ (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
+ u[3] * DATA(3, col, row)))
#define ROW_TERM(row) \
- (w[row] * (COL_TERM(0, row) + \
- COL_TERM(1, row) + \
- COL_TERM(2, row) + \
- COL_TERM(3, row)))
+ (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(w, tz);
- /* Actual interpolation. */
- const T *data = (const T*)info.data;
- return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
+ /* Actual interpolation. */
+ const T *data = (const T *)info.data;
+ return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
#undef COL_TERM
#undef ROW_TERM
#undef DATA
- }
-
- static ccl_always_inline float4 interp_3d(const TextureInfo& info,
- float x, float y, float z,
- InterpolationType interp)
- {
- if(UNLIKELY(!info.data))
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
- case INTERPOLATION_CLOSEST:
- return interp_3d_closest(info, x, y, z);
- case INTERPOLATION_LINEAR:
- return interp_3d_linear(info, x, y, z);
- default:
- return interp_3d_tricubic(info, x, y, z);
- }
- }
+ }
+
+ static ccl_always_inline float4
+ interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
+ {
+ if (UNLIKELY(!info.data))
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
+ case INTERPOLATION_CLOSEST:
+ return interp_3d_closest(info, x, y, z);
+ case INTERPOLATION_LINEAR:
+ return interp_3d_linear(info, x, y, z);
+ default:
+ return interp_3d_tricubic(info, x, y, z);
+ }
+ }
#undef SET_CUBIC_SPLINE_WEIGHTS
};
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
- switch(kernel_tex_type(id)) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp(info, x, y);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp(info, x, y);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp(info, x, y);
- default:
- assert(0);
- return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ switch (kernel_tex_type(id)) {
+ case IMAGE_DATA_TYPE_HALF:
+ return TextureInterpolator<half>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_BYTE:
+ return TextureInterpolator<uchar>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_USHORT:
+ return TextureInterpolator<uint16_t>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_FLOAT:
+ return TextureInterpolator<float>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_HALF4:
+ return TextureInterpolator<half4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_BYTE4:
+ return TextureInterpolator<uchar4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_USHORT4:
+ return TextureInterpolator<ushort4>::interp(info, x, y);
+ case IMAGE_DATA_TYPE_FLOAT4:
+ return TextureInterpolator<float4>::interp(info, x, y);
+ default:
+ assert(0);
+ return make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
}
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+ KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
-
- switch(kernel_tex_type(id)) {
- case IMAGE_DATA_TYPE_HALF:
- return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_BYTE:
- return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_USHORT:
- return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_FLOAT:
- return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_HALF4:
- return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_BYTE4:
- return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_USHORT4:
- return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
- case IMAGE_DATA_TYPE_FLOAT4:
- return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
- default:
- assert(0);
- return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+
+ switch (kernel_tex_type(id)) {
+ case IMAGE_DATA_TYPE_HALF:
+ return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_BYTE:
+ return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_USHORT:
+ return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_FLOAT:
+ return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_HALF4:
+ return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_BYTE4:
+ return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_USHORT4:
+ return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp);
+ case IMAGE_DATA_TYPE_FLOAT4:
+ return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
+ default:
+ assert(0);
+ return make_float4(
+ TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 759b7e4c20d..9ca3f46b5b6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -58,14 +58,15 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
-# endif /* __SPLIT_KERNEL__ */
+# endif /* __SPLIT_KERNEL__ */
#else
-# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
+# define STUB_ASSERT(arch, name) \
+ assert(!(#name " kernel stub for architecture " #arch " was called!"))
# ifdef __SPLIT_KERNEL__
# include "kernel/split/kernel_data_init.h"
-# endif /* __SPLIT_KERNEL__ */
-#endif /* KERNEL_STUB */
+# endif /* __SPLIT_KERNEL__ */
+#endif /* KERNEL_STUB */
CCL_NAMESPACE_BEGIN
@@ -73,31 +74,22 @@ CCL_NAMESPACE_BEGIN
/* Path Tracing */
-void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
- float *buffer,
- int sample,
- int x, int y,
- int offset,
- int stride)
+void KERNEL_FUNCTION_FULL_NAME(path_trace)(
+ KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, path_trace);
-#else
-# ifdef __BRANCHED_PATH__
- if(kernel_data.integrator.branched) {
- kernel_branched_path_trace(kg,
- buffer,
- sample,
- x, y,
- offset,
- stride);
- }
- else
-# endif
- {
- kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, path_trace);
+# else
+# ifdef __BRANCHED_PATH__
+ if (kernel_data.integrator.branched) {
+ kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+ else
+# endif
+ {
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
+ }
+# endif /* KERNEL_STUB */
}
/* Film */
@@ -106,42 +98,32 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
-#else
- kernel_film_convert_to_byte(kg,
- rgba,
- buffer,
- sample_scale,
- x, y,
- offset,
- stride);
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, convert_to_byte);
+# else
+ kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+# endif /* KERNEL_STUB */
}
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
- int x, int y,
+ int x,
+ int y,
int offset,
int stride)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
-#else
- kernel_film_convert_to_half_float(kg,
- rgba,
- buffer,
- sample_scale,
- x, y,
- offset,
- stride);
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, convert_to_half_float);
+# else
+ kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
+# endif /* KERNEL_STUB */
}
/* Shader Evaluate */
@@ -155,60 +137,53 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample)
{
-#ifdef KERNEL_STUB
- STUB_ASSERT(KERNEL_ARCH, shader);
-#else
- if(type >= SHADER_EVAL_BAKE) {
-# ifdef __BAKING__
- kernel_bake_evaluate(kg,
- input,
- output,
- (ShaderEvalType)type,
- filter,
- i,
- offset,
- sample);
-# endif
- }
- else if(type == SHADER_EVAL_DISPLACE) {
- kernel_displace_evaluate(kg, input, output, i);
- }
- else {
- kernel_background_evaluate(kg, input, output, i);
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, shader);
+# else
+ if (type >= SHADER_EVAL_BAKE) {
+# ifdef __BAKING__
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
+# endif
+ }
+ else if (type == SHADER_EVAL_DISPLACE) {
+ kernel_displace_evaluate(kg, input, output, i);
+ }
+ else {
+ kernel_background_evaluate(kg, input, output, i);
+ }
+# endif /* KERNEL_STUB */
}
-#else /* __SPLIT_KERNEL__ */
+#else /* __SPLIT_KERNEL__ */
/* Split Kernel Path Tracing */
-#ifdef KERNEL_STUB
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- STUB_ASSERT(KERNEL_ARCH, name); \
- }
-#else
-# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- kernel_##name(kg); \
- }
-
-# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
- void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
- { \
- ccl_local type locals; \
- kernel_##name(kg, &locals); \
- }
-#endif /* KERNEL_STUB */
+# ifdef KERNEL_STUB
+# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, name); \
+ }
+
+# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, name); \
+ }
+# else
+# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ kernel_##name(kg); \
+ }
+
+# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \
+ { \
+ ccl_local type locals; \
+ kernel_##name(kg, &locals); \
+ }
+# endif /* KERNEL_STUB */
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
@@ -219,7 +194,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao,
+ BackgroundAOLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
@@ -228,7 +204,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
-#endif /* __SPLIT_KERNEL__ */
+#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
#undef STUB_ASSERT
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
index c5e199b0a69..989f5e5aaa8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp
@@ -54,7 +54,7 @@
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
- /* do nothing */
+/* do nothing */
#endif
#include "kernel/kernel.h"
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
index 6ba3425a343..1b2e2516751 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
@@ -34,7 +34,7 @@
# define __KERNEL_SSE41__
# define __KERNEL_AVX__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
index 76b2d77ebb8..43b8bfbf864 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
@@ -35,7 +35,7 @@
# define __KERNEL_AVX__
# define __KERNEL_AVX2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_avx2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
index b468b6f44c8..9743789179d 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
@@ -29,7 +29,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
index 3e5792d0b17..1bec7633500 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
@@ -31,7 +31,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
index 3629f21cd29..c0efc2350e9 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
@@ -32,7 +32,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
index 57530c88710..173be8e93ce 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
@@ -27,7 +27,7 @@
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE2__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse2
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
index c607753bc4b..31273fe3344 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
@@ -29,7 +29,7 @@
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse3
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
index a278554731c..1d020b7fee6 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
@@ -30,7 +30,7 @@
# define __KERNEL_SSSE3__
# define __KERNEL_SSE41__
# endif
-#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
+#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#include "kernel/kernel.h"
#define KERNEL_ARCH cpu_sse41
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index 6d41dc15785..d9f349837a8 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -81,7 +81,6 @@
# define CUDA_KERNEL_MAX_REGISTERS 64
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
-
/* unknown architecture */
#else
# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
@@ -96,18 +95,19 @@
* given the maximum number of registers per thread. */
#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
- __launch_bounds__( \
- threads_block_width*threads_block_width, \
- CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \
- )
+ __launch_bounds__(threads_block_width *threads_block_width, \
+ CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+ (threads_block_width * threads_block_width * thread_num_registers))
/* sanity checks */
-#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
+#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
# error "Maximum number of threads per block exceeded"
#endif
-#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS
+#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \
+ (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \
+ CUDA_MULTIPROCESSOR_MAX_BLOCKS
# error "Maximum number of blocks per multiprocessor exceeded"
#endif
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 37cfbbcb235..7c68f08ea10 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -17,174 +17,165 @@
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
- return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
+ return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
ccl_device float cubic_w1(float a)
{
- return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
+ return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
ccl_device float cubic_w2(float a)
{
- return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
+ return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
ccl_device float cubic_w3(float a)
{
- return (1.0f/6.0f)*(a*a*a);
+ return (1.0f / 6.0f) * (a * a * a);
}
/* g0 and g1 are the two amplitude functions. */
ccl_device float cubic_g0(float a)
{
- return cubic_w0(a) + cubic_w1(a);
+ return cubic_w0(a) + cubic_w1(a);
}
ccl_device float cubic_g1(float a)
{
- return cubic_w2(a) + cubic_w3(a);
+ return cubic_w2(a) + cubic_w3(a);
}
/* h0 and h1 are the two offset functions */
ccl_device float cubic_h0(float a)
{
- /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
- return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
+ /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
+ return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
}
ccl_device float cubic_h1(float a)
{
- return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
+ return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
}
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
+ccl_device T
+kernel_tex_image_interp_bicubic(const TextureInfo &info, CUtexObject tex, float x, float y)
{
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
-
- float px = floor(x);
- float py = floor(y);
- float fx = x - px;
- float fy = y - py;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
-
- return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
- g1x * tex2D<T>(tex, x1, y0)) +
- cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
- g1x * tex2D<T>(tex, x1, y1));
+ x = (x * info.width) - 0.5f;
+ y = (y * info.height) - 0.5f;
+
+ float px = floor(x);
+ float py = floor(y);
+ float fx = x - px;
+ float fy = y - py;
+
+ float g0x = cubic_g0(fx);
+ float g1x = cubic_g1(fx);
+ float x0 = (px + cubic_h0(fx)) / info.width;
+ float x1 = (px + cubic_h1(fx)) / info.width;
+ float y0 = (py + cubic_h0(fy)) / info.height;
+ float y1 = (py + cubic_h1(fy)) / info.height;
+
+ return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) +
+ cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1));
}
/* Fast tricubic texture lookup using 8 trilinear lookups. */
template<typename T>
-ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
+ccl_device T kernel_tex_image_interp_bicubic_3d(
+ const TextureInfo &info, CUtexObject tex, float x, float y, float z)
{
- x = (x * info.width) - 0.5f;
- y = (y * info.height) - 0.5f;
- z = (z * info.depth) - 0.5f;
-
- float px = floor(x);
- float py = floor(y);
- float pz = floor(z);
- float fx = x - px;
- float fy = y - py;
- float fz = z - pz;
-
- float g0x = cubic_g0(fx);
- float g1x = cubic_g1(fx);
- float g0y = cubic_g0(fy);
- float g1y = cubic_g1(fy);
- float g0z = cubic_g0(fz);
- float g1z = cubic_g1(fz);
-
- float x0 = (px + cubic_h0(fx)) / info.width;
- float x1 = (px + cubic_h1(fx)) / info.width;
- float y0 = (py + cubic_h0(fy)) / info.height;
- float y1 = (py + cubic_h1(fy)) / info.height;
- float z0 = (pz + cubic_h0(fz)) / info.depth;
- float z1 = (pz + cubic_h1(fz)) / info.depth;
-
- return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
- g1x * tex3D<T>(tex, x1, y0, z0)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
- g1x * tex3D<T>(tex, x1, y1, z0))) +
- g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
- g1x * tex3D<T>(tex, x1, y0, z1)) +
- g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
- g1x * tex3D<T>(tex, x1, y1, z1)));
+ x = (x * info.width) - 0.5f;
+ y = (y * info.height) - 0.5f;
+ z = (z * info.depth) - 0.5f;
+
+ float px = floor(x);
+ float py = floor(y);
+ float pz = floor(z);
+ float fx = x - px;
+ float fy = y - py;
+ float fz = z - pz;
+
+ float g0x = cubic_g0(fx);
+ float g1x = cubic_g1(fx);
+ float g0y = cubic_g0(fy);
+ float g1y = cubic_g1(fy);
+ float g0z = cubic_g0(fz);
+ float g1z = cubic_g1(fz);
+
+ float x0 = (px + cubic_h0(fx)) / info.width;
+ float x1 = (px + cubic_h1(fx)) / info.width;
+ float y0 = (py + cubic_h0(fy)) / info.height;
+ float y1 = (py + cubic_h1(fy)) / info.height;
+ float z0 = (pz + cubic_h0(fz)) / info.depth;
+ float z1 = (pz + cubic_h1(fz)) / info.depth;
+
+ return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) +
+ g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) +
+ g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) +
+ g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1)));
}
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
- CUtexObject tex = (CUtexObject)info.data;
-
- /* float4, byte4, ushort4 and half4 */
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
- texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 ||
- texture_type == IMAGE_DATA_TYPE_USHORT4)
- {
- if(info.interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
- }
- else {
- return tex2D<float4>(tex, x, y);
- }
- }
- /* float, byte and half */
- else {
- float f;
-
- if(info.interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
- }
- else {
- f = tex2D<float>(tex, x, y);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ CUtexObject tex = (CUtexObject)info.data;
+
+ /* float4, byte4, ushort4 and half4 */
+ const int texture_type = kernel_tex_type(id);
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ if (info.interpolation == INTERPOLATION_CUBIC) {
+ return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
+ }
+ else {
+ return tex2D<float4>(tex, x, y);
+ }
+ }
+ /* float, byte and half */
+ else {
+ float f;
+
+ if (info.interpolation == INTERPOLATION_CUBIC) {
+ f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
+ }
+ else {
+ f = tex2D<float>(tex, x, y);
+ }
+
+ return make_float4(f, f, f, 1.0f);
+ }
}
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
+ccl_device float4 kernel_tex_image_interp_3d(
+ KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
- const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
- CUtexObject tex = (CUtexObject)info.data;
- uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
-
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
- texture_type == IMAGE_DATA_TYPE_BYTE4 ||
- texture_type == IMAGE_DATA_TYPE_HALF4 ||
- texture_type == IMAGE_DATA_TYPE_USHORT4)
- {
- if(interpolation == INTERPOLATION_CUBIC) {
- return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
- }
- else {
- return tex3D<float4>(tex, x, y, z);
- }
- }
- else {
- float f;
-
- if(interpolation == INTERPOLATION_CUBIC) {
- f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
- }
- else {
- f = tex3D<float>(tex, x, y, z);
- }
-
- return make_float4(f, f, f, 1.0f);
- }
+ const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+ CUtexObject tex = (CUtexObject)info.data;
+ uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
+
+ const int texture_type = kernel_tex_type(id);
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ if (interpolation == INTERPOLATION_CUBIC) {
+ return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
+ }
+ else {
+ return tex3D<float4>(tex, x, y, z);
+ }
+ }
+ else {
+ float f;
+
+ if (interpolation == INTERPOLATION_CUBIC) {
+ f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
+ }
+ else {
+ f = tex3D<float>(tex, x, y, z);
+ }
+
+ return make_float4(f, f, f, 1.0f);
+ }
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 79af831c2fb..b6390679331 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -16,254 +16,257 @@
/* For OpenCL we do manual lookup and interpolation. */
-ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
- const uint tex_offset = id
-#define KERNEL_TEX(type, name) + 1
+ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id)
+{
+ const uint tex_offset = id
+#define KERNEL_TEX(type, name) +1
#include "kernel/kernel_textures.h"
- ;
+ ;
- return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
+ return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset];
}
-#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
+#define tex_fetch(type, info, index) \
+ ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)]
ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
{
- x %= width;
- if(x < 0)
- x += width;
- return x;
+ x %= width;
+ if (x < 0)
+ x += width;
+ return x;
}
ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
{
- return clamp(x, 0, width-1);
+ return clamp(x, 0, width - 1);
}
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_global TextureInfo *info, int id, int offset)
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
+ const ccl_global TextureInfo *info,
+ int id,
+ int offset)
{
- const int texture_type = kernel_tex_type(id);
-
- /* Float4 */
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
- return tex_fetch(float4, info, offset);
- }
- /* Byte4 */
- else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
- uchar4 r = tex_fetch(uchar4, info, offset);
- float f = 1.0f/255.0f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
- /* Ushort4 */
- else if(texture_type == IMAGE_DATA_TYPE_USHORT4) {
- ushort4 r = tex_fetch(ushort4, info, offset);
- float f = 1.0f/65535.f;
- return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
- }
- /* Float */
- else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
- float f = tex_fetch(float, info, offset);
- return make_float4(f, f, f, 1.0f);
- }
- /* UShort */
- else if(texture_type == IMAGE_DATA_TYPE_USHORT) {
- ushort r = tex_fetch(ushort, info, offset);
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
- }
- /* Byte */
+ const int texture_type = kernel_tex_type(id);
+
+ /* Float4 */
+ if (texture_type == IMAGE_DATA_TYPE_FLOAT4) {
+ return tex_fetch(float4, info, offset);
+ }
+ /* Byte4 */
+ else if (texture_type == IMAGE_DATA_TYPE_BYTE4) {
+ uchar4 r = tex_fetch(uchar4, info, offset);
+ float f = 1.0f / 255.0f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+ /* Ushort4 */
+ else if (texture_type == IMAGE_DATA_TYPE_USHORT4) {
+ ushort4 r = tex_fetch(ushort4, info, offset);
+ float f = 1.0f / 65535.f;
+ return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
+ }
+ /* Float */
+ else if (texture_type == IMAGE_DATA_TYPE_FLOAT) {
+ float f = tex_fetch(float, info, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* UShort */
+ else if (texture_type == IMAGE_DATA_TYPE_USHORT) {
+ ushort r = tex_fetch(ushort, info, offset);
+ float f = r * (1.0f / 65535.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* Byte */
#ifdef cl_khr_fp16
- /* half and half4 are optional in OpenCL */
- else if(texture_type == IMAGE_DATA_TYPE_HALF) {
- float f = tex_fetch(half, info, offset);
- return make_float4(f, f, f, 1.0f);
- }
- else if(texture_type == IMAGE_DATA_TYPE_HALF4) {
- half4 r = tex_fetch(half4, info, offset);
- return make_float4(r.x, r.y, r.z, r.w);
- }
+ /* half and half4 are optional in OpenCL */
+ else if (texture_type == IMAGE_DATA_TYPE_HALF) {
+ float f = tex_fetch(half, info, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ else if (texture_type == IMAGE_DATA_TYPE_HALF4) {
+ half4 r = tex_fetch(half4, info, offset);
+ return make_float4(r.x, r.y, r.z, r.w);
+ }
#endif
- else {
- uchar r = tex_fetch(uchar, info, offset);
- float f = r * (1.0f/255.0f);
- return make_float4(f, f, f, 1.0f);
- }
+ else {
+ uchar r = tex_fetch(uchar, info, offset);
+ float f = r * (1.0f / 255.0f);
+ return make_float4(f, f, f, 1.0f);
+ }
}
ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- /* Wrap */
- if(info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- }
-
- int offset = x + info->width * y;
- return svm_image_texture_read(kg, info, id, offset);
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ /* Wrap */
+ if (info->extension == EXTENSION_REPEAT) {
+ x = svm_image_texture_wrap_periodic(x, info->width);
+ y = svm_image_texture_wrap_periodic(y, info->height);
+ }
+ else {
+ x = svm_image_texture_wrap_clamp(x, info->width);
+ y = svm_image_texture_wrap_clamp(y, info->height);
+ }
+
+ int offset = x + info->width * y;
+ return svm_image_texture_read(kg, info, id, offset);
}
ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- /* Wrap */
- if(info->extension == EXTENSION_REPEAT) {
- x = svm_image_texture_wrap_periodic(x, info->width);
- y = svm_image_texture_wrap_periodic(y, info->height);
- z = svm_image_texture_wrap_periodic(z, info->depth);
- }
- else {
- x = svm_image_texture_wrap_clamp(x, info->width);
- y = svm_image_texture_wrap_clamp(y, info->height);
- z = svm_image_texture_wrap_clamp(z, info->depth);
- }
-
- int offset = x + info->width * y + info->width * info->height * z;
- return svm_image_texture_read(kg, info, id, offset);
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ /* Wrap */
+ if (info->extension == EXTENSION_REPEAT) {
+ x = svm_image_texture_wrap_periodic(x, info->width);
+ y = svm_image_texture_wrap_periodic(y, info->height);
+ z = svm_image_texture_wrap_periodic(z, info->depth);
+ }
+ else {
+ x = svm_image_texture_wrap_clamp(x, info->width);
+ y = svm_image_texture_wrap_clamp(y, info->height);
+ z = svm_image_texture_wrap_clamp(z, info->depth);
+ }
+
+ int offset = x + info->width * y + info->width * info->height * z;
+ return svm_image_texture_read(kg, info, id, offset);
}
-
ccl_device_inline float svm_image_texture_frac(float x, int *ix)
{
- int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
- *ix = i;
- return x - (float)i;
+ int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
+ *ix = i;
+ return x - (float)i;
}
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
- { \
- u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
- u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \
- u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
- u[3] = (1.0f / 6.0f) * t * t * t; \
- } (void) 0
+ { \
+ u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
+ u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
+ u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
+ u[3] = (1.0f / 6.0f) * t * t * t; \
+ } \
+ (void)0
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if(info->extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- if(info->interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy;
- svm_image_texture_frac(x*info->width, &ix);
- svm_image_texture_frac(y*info->height, &iy);
-
- return svm_image_texture_read_2d(kg, id, ix, iy);
- }
- else if(info->interpolation == INTERPOLATION_LINEAR) {
- /* Bilinear interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
- float4 r;
- r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy);
- r += (1.0f - ty)*tx*svm_image_texture_read_2d(kg, id, ix+1, iy);
- r += ty*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy+1);
- r += ty*tx*svm_image_texture_read_2d(kg, id, ix+1, iy+1);
- return r;
- }
- else {
- /* Bicubic interpolation. */
- int ix, iy;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
-
- float u[4], v[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for(int y = 0; y < 4; y++) {
- for(int x = 0; x < 4; x++) {
- float weight = u[x]*v[y];
- r += weight*svm_image_texture_read_2d(kg, id, ix+x-1, iy+y-1);
- }
- }
- return r;
- }
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ if (info->extension == EXTENSION_CLIP) {
+ if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ if (info->interpolation == INTERPOLATION_CLOSEST) {
+ /* Closest interpolation. */
+ int ix, iy;
+ svm_image_texture_frac(x * info->width, &ix);
+ svm_image_texture_frac(y * info->height, &iy);
+
+ return svm_image_texture_read_2d(kg, id, ix, iy);
+ }
+ else if (info->interpolation == INTERPOLATION_LINEAR) {
+ /* Bilinear interpolation. */
+ int ix, iy;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+ float4 r;
+ r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy);
+ r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy);
+ r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1);
+ r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1);
+ return r;
+ }
+ else {
+ /* Bicubic interpolation. */
+ int ix, iy;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+
+ float u[4], v[4];
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ float weight = u[x] * v[y];
+ r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1);
+ }
+ }
+ return r;
+ }
}
-
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
+ccl_device float4
+kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
{
- const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
-
- if(info->extension == EXTENSION_CLIP) {
- if(x < 0.0f || y < 0.0f || z < 0.0f ||
- x > 1.0f || y > 1.0f || z > 1.0f)
- {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
- uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
-
- if(interpolation == INTERPOLATION_CLOSEST) {
- /* Closest interpolation. */
- int ix, iy, iz;
- svm_image_texture_frac(x*info->width, &ix);
- svm_image_texture_frac(y*info->height, &iy);
- svm_image_texture_frac(z*info->depth, &iz);
-
- return svm_image_texture_read_3d(kg, id, ix, iy, iz);
- }
- else if(interpolation == INTERPOLATION_LINEAR) {
- /* Bilinear interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
- float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
- float4 r;
- r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz);
- r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz);
- r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz);
- r += (1.0f - tz)*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz);
-
- r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz+1);
- r += tz*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz+1);
- r += tz*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz+1);
- r += tz*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz+1);
- return r;
- }
- else {
- /* Bicubic interpolation. */
- int ix, iy, iz;
- float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix);
- float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy);
- float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz);
-
- float u[4], v[4], w[4];
- SET_CUBIC_SPLINE_WEIGHTS(u, tx);
- SET_CUBIC_SPLINE_WEIGHTS(v, ty);
- SET_CUBIC_SPLINE_WEIGHTS(w, tz);
-
- float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
- for(int z = 0; z < 4; z++) {
- for(int y = 0; y < 4; y++) {
- for(int x = 0; x < 4; x++) {
- float weight = u[x]*v[y]*w[z];
- r += weight*svm_image_texture_read_3d(kg, id, ix+x-1, iy+y-1, iz+z-1);
- }
- }
- }
- return r;
- }
+ const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
+
+ if (info->extension == EXTENSION_CLIP) {
+ if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
+
+ uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp;
+
+ if (interpolation == INTERPOLATION_CLOSEST) {
+ /* Closest interpolation. */
+ int ix, iy, iz;
+ svm_image_texture_frac(x * info->width, &ix);
+ svm_image_texture_frac(y * info->height, &iy);
+ svm_image_texture_frac(z * info->depth, &iz);
+
+ return svm_image_texture_read_3d(kg, id, ix, iy, iz);
+ }
+ else if (interpolation == INTERPOLATION_LINEAR) {
+ /* Bilinear interpolation. */
+ int ix, iy, iz;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+ float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+ float4 r;
+ r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz);
+ r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz);
+ r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz);
+ r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz);
+
+ r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1);
+ r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1);
+ r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1);
+ r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1);
+ return r;
+ }
+ else {
+ /* Bicubic interpolation. */
+ int ix, iy, iz;
+ float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix);
+ float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy);
+ float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz);
+
+ float u[4], v[4], w[4];
+ SET_CUBIC_SPLINE_WEIGHTS(u, tx);
+ SET_CUBIC_SPLINE_WEIGHTS(v, ty);
+ SET_CUBIC_SPLINE_WEIGHTS(w, tz);
+
+ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ for (int z = 0; z < 4; z++) {
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ float weight = u[x] * v[y] * w[z];
+ r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1);
+ }
+ }
+ }
+ return r;
+ }
}
#undef SET_CUBIC_SPLINE_WEIGHTS
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index 05e1ddf6da2..e123b4cd6ec 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -14,50 +14,53 @@
* limitations under the License.
*/
-#define KERNEL_NAME_JOIN(a, b) a ## _ ## b
+#define KERNEL_NAME_JOIN(a, b) a##_##b
#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b)
-__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
- ccl_global char *kg_global,
- ccl_constant KernelData *data,
+__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace,
+ KERNEL_NAME)(ccl_global char *kg_global,
+ ccl_constant KernelData *data,
- ccl_global void *split_data_buffer,
- ccl_global char *ray_state,
+ ccl_global void *split_data_buffer,
+ ccl_global char *ray_state,
- KERNEL_BUFFER_PARAMS,
+ KERNEL_BUFFER_PARAMS,
- ccl_global int *queue_index,
- ccl_global char *use_queues_flag,
- ccl_global unsigned int *work_pools,
- ccl_global float *buffer
- )
+ ccl_global int *queue_index,
+ ccl_global char *use_queues_flag,
+ ccl_global unsigned int *work_pools,
+ ccl_global float *buffer)
{
#ifdef LOCALS_TYPE
- ccl_local LOCALS_TYPE locals;
+ ccl_local LOCALS_TYPE locals;
#endif
- KernelGlobals *kg = (KernelGlobals*)kg_global;
+ KernelGlobals *kg = (KernelGlobals *)kg_global;
- if(ccl_local_id(0) + ccl_local_id(1) == 0) {
- kg->data = data;
+ if (ccl_local_id(0) + ccl_local_id(1) == 0) {
+ kg->data = data;
- kernel_split_params.queue_index = queue_index;
- kernel_split_params.use_queues_flag = use_queues_flag;
- kernel_split_params.work_pools = work_pools;
- kernel_split_params.tile.buffer = buffer;
+ kernel_split_params.queue_index = queue_index;
+ kernel_split_params.use_queues_flag = use_queues_flag;
+ kernel_split_params.work_pools = work_pools;
+ kernel_split_params.tile.buffer = buffer;
- split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
+ split_data_init(kg,
+ &kernel_split_state,
+ ccl_global_size(0) * ccl_global_size(1),
+ split_data_buffer,
+ ray_state);
+ }
- }
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
- kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
-
- KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
- kg
+ KERNEL_NAME_EVAL(kernel, KERNEL_NAME)
+ (kg
#ifdef LOCALS_TYPE
- , &locals
+ ,
+ &locals
#endif
- );
+ );
}
#undef KERNEL_NAME_JOIN