Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu.h6
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h12
-rw-r--r--intern/cycles/kernel/kernels/cpu/filter_sse41.cpp1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h17
-rw-r--r--intern/cycles/kernel/kernels/cuda/filter.cu12
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu69
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_config.h9
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu6
-rw-r--r--intern/cycles/kernel/kernels/opencl/filter.cl14
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl83
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_data_init.cl13
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split_function.h13
14 files changed, 123 insertions, 138 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
index 2ed713299fd..bf13ba62806 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h
@@ -27,8 +27,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *bufferV,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance);
+ int buffer_denoising_offset);
void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
TilesInfo *tiles,
@@ -40,8 +39,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *variance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance);
+ int buffer_denoising_offset);
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
index 8dc1a8d583c..2fbb0ea2bdb 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h
@@ -45,8 +45,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
float *bufferVariance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
@@ -60,8 +59,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample,
bufferVariance,
load_int4(prefilter_rect),
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
#endif
}
@@ -74,8 +72,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
float *mean, float *variance,
int* prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
@@ -86,8 +83,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
mean, variance,
load_int4(prefilter_rect),
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
#endif
}
diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
index 1a7b2040da1..254025be4e2 100644
--- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
+++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp
@@ -25,6 +25,7 @@
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
+# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index c8938534fe8..6bdb8546a24 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -18,7 +18,6 @@
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
- unsigned int *rng_state,
int sample,
int x, int y,
int offset,
@@ -42,7 +41,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
- float *output_luma,
int type,
int filter,
int i,
@@ -57,7 +55,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index d4315ee5ec4..fdeb7dcd3e4 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -75,7 +75,6 @@ CCL_NAMESPACE_BEGIN
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
- unsigned int *rng_state,
int sample,
int x, int y,
int offset,
@@ -88,7 +87,6 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
if(kernel_data.integrator.branched) {
kernel_branched_path_trace(kg,
buffer,
- rng_state,
sample,
x, y,
offset,
@@ -97,7 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
else
# endif
{
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
#endif /* KERNEL_STUB */
}
@@ -151,7 +149,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
- float *output_luma,
int type,
int filter,
int i,
@@ -162,7 +159,6 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
STUB_ASSERT(KERNEL_ARCH, shader);
#else
if(type >= SHADER_EVAL_BAKE) {
- kernel_assert(output_luma == NULL);
# ifdef __BAKING__
kernel_bake_evaluate(kg,
input,
@@ -174,14 +170,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
sample);
# endif
}
+ else if(type == SHADER_EVAL_DISPLACE) {
+ kernel_displace_evaluate(kg, input, output, i);
+ }
else {
- kernel_shader_evaluate(kg,
- input,
- output,
- output_luma,
- (ShaderEvalType)type,
- i,
- sample);
+ kernel_background_evaluate(kg, input, output, i);
}
#endif /* KERNEL_STUB */
}
diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu
index 009c3fde9d5..c8172355a7f 100644
--- a/intern/cycles/kernel/kernels/cuda/filter.cu
+++ b/intern/cycles/kernel/kernels/cuda/filter.cu
@@ -37,8 +37,7 @@ kernel_cuda_filter_divide_shadow(int sample,
float *bufferVariance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
@@ -53,8 +52,7 @@ kernel_cuda_filter_divide_shadow(int sample,
bufferVariance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
@@ -68,8 +66,7 @@ kernel_cuda_filter_get_feature(int sample,
float *variance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- bool use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
@@ -81,8 +78,7 @@ kernel_cuda_filter_get_feature(int sample,
mean, variance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index dc343cb387a..1ac6afd167a 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -20,6 +20,9 @@
#include "kernel/kernel_compat_cuda.h"
#include "kernel_config.h"
+
+#include "util/util_atomic.h"
+
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
@@ -27,32 +30,37 @@
#include "kernel/kernel_path.h"
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
+#include "kernel/kernel_work_stealing.h"
/* kernels */
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+ int work_index = ccl_global_id(0);
+
+ if(work_index < total_work_size) {
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- if(x < sx + sw && y < sy + sh) {
KernelGlobals kg;
- kernel_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#ifdef __BRANCHED_PATH__
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
-kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
{
- int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
- int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+ int work_index = ccl_global_id(0);
+
+ if(work_index < total_work_size) {
+ uint x, y, sample;
+ get_work_pixel(tile, work_index, &x, &y, &sample);
- if(x < sx + sw && y < sy + sh) {
KernelGlobals kg;
- kernel_branched_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#endif
@@ -83,26 +91,37 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input,
- float4 *output,
- float *output_luma,
- int type,
- int sx,
- int sw,
- int offset,
- int sample)
+kernel_cuda_displace(uint4 *input,
+ float4 *output,
+ int type,
+ int sx,
+ int sw,
+ int offset,
+ int sample)
+{
+ int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+
+ if(x < sx + sw) {
+ KernelGlobals kg;
+ kernel_displace_evaluate(&kg, input, output, x);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_background(uint4 *input,
+ float4 *output,
+ int type,
+ int sx,
+ int sw,
+ int offset,
+ int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw) {
KernelGlobals kg;
- kernel_shader_evaluate(&kg,
- input,
- output,
- output_luma,
- (ShaderEvalType)type,
- x,
- sample);
+ kernel_background_evaluate(&kg, input, output, x);
}
}
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
index 9fa39dc9ebb..7ae205b7e14 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -81,8 +81,13 @@
# error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
#endif
-/* compute number of threads per block and minimum blocks per multiprocessor
- * given the maximum number of registers per thread */
+/* For split kernel using all registers seems fastest for now, but this
+ * is unlikely to be optimal once we resolve other bottlenecks. */
+
+#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS
+
+/* Compute number of threads per block and minimum blocks per multiprocessor
+ * given the maximum number of registers per thread. */
#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
__launch_bounds__( \
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 628891b1458..43b3d0aa0e6 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -60,7 +60,6 @@ kernel_cuda_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
@@ -76,7 +75,6 @@ kernel_cuda_path_trace_data_init(
split_data_buffer,
num_elements,
ray_state,
- rng_state,
start_sample,
end_sample,
sx, sy, sw, sh, offset, stride,
@@ -90,7 +88,7 @@ kernel_cuda_path_trace_data_init(
#define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
extern "C" __global__ void \
- CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
+ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
kernel_cuda_##name() \
{ \
kernel_##name(NULL); \
@@ -98,7 +96,7 @@ kernel_cuda_path_trace_data_init(
#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
extern "C" __global__ void \
- CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \
+ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \
kernel_cuda_##name() \
{ \
ccl_local type locals; \
diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl
index ba53ba4b26f..7a7b596a350 100644
--- a/intern/cycles/kernel/kernels/opencl/filter.cl
+++ b/intern/cycles/kernel/kernels/opencl/filter.cl
@@ -31,8 +31,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
ccl_global float *bufferVariance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- char use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + get_global_id(0);
int y = prefilter_rect.y + get_global_id(1);
@@ -47,8 +46,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample,
bufferVariance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
@@ -60,8 +58,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
ccl_global float *variance,
int4 prefilter_rect,
int buffer_pass_stride,
- int buffer_denoising_offset,
- char use_split_variance)
+ int buffer_denoising_offset)
{
int x = prefilter_rect.x + get_global_id(0);
int y = prefilter_rect.y + get_global_id(1);
@@ -73,8 +70,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
mean, variance,
prefilter_rect,
buffer_pass_stride,
- buffer_denoising_offset,
- use_split_variance);
+ buffer_denoising_offset);
}
}
@@ -235,7 +231,7 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
}
__kernel void kernel_ocl_filter_finalize(int w,
- int h,
+ int h,
ccl_global float *buffer,
ccl_global int *rank,
ccl_global float *XtWX,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 078acc1631e..66b6e19de84 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -50,11 +50,8 @@
__kernel void kernel_ocl_path_trace(
ccl_constant KernelData *data,
ccl_global float *buffer,
- ccl_global uint *rng_state,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
int sample,
int sx, int sy, int sw, int sh, int offset, int stride)
@@ -63,28 +60,24 @@ __kernel void kernel_ocl_path_trace(
kg->data = data;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1);
if(x < sx + sw && y < sy + sh)
- kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+ kernel_path_trace(kg, buffer, sample, x, y, offset, stride);
}
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
-__kernel void kernel_ocl_shader(
+__kernel void kernel_ocl_displace(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
- ccl_global float *output_luma,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
@@ -92,20 +85,35 @@ __kernel void kernel_ocl_shader(
kg->data = data;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
- kernel_shader_evaluate(kg,
- input,
- output,
- output_luma,
- (ShaderEvalType)type,
- x,
- sample);
+ kernel_displace_evaluate(kg, input, output, x);
+ }
+}
+__kernel void kernel_ocl_background(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+ KERNEL_BUFFER_PARAMS,
+
+ int type, int sx, int sw, int offset, int sample)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
+
+ int x = sx + ccl_global_id(0);
+
+ if(x < sx + sw) {
+ kernel_background_evaluate(kg, input, output, x);
}
}
@@ -114,9 +122,7 @@ __kernel void kernel_ocl_bake(
ccl_global uint4 *input,
ccl_global float4 *output,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
int type, int filter, int sx, int sw, int offset, int sample)
{
@@ -124,9 +130,8 @@ __kernel void kernel_ocl_bake(
kg->data = data;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
@@ -144,9 +149,7 @@ __kernel void kernel_ocl_convert_to_byte(
ccl_global uchar4 *rgba,
ccl_global float *buffer,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride)
@@ -155,9 +158,8 @@ __kernel void kernel_ocl_convert_to_byte(
kg->data = data;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1);
@@ -171,9 +173,7 @@ __kernel void kernel_ocl_convert_to_half_float(
ccl_global uchar4 *rgba,
ccl_global float *buffer,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride)
@@ -182,9 +182,8 @@ __kernel void kernel_ocl_convert_to_half_float(
kg->data = data;
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
+ kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1);
@@ -193,7 +192,7 @@ __kernel void kernel_ocl_convert_to_half_float(
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
-__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset)
+__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, uint64_t size, uint64_t offset)
{
size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
index 8b85d362f8a..7125348a49f 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
@@ -24,12 +24,7 @@ __kernel void kernel_ocl_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
-
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
-
+ KERNEL_BUFFER_PARAMS,
int start_sample,
int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride,
@@ -45,11 +40,7 @@ __kernel void kernel_ocl_path_trace_data_init(
split_data_buffer,
num_elements,
ray_state,
- rng_state,
-
-#define KERNEL_TEX(type, ttype, name) name,
-#include "kernel/kernel_textures.h"
-
+ KERNEL_BUFFER_ARGS,
start_sample,
end_sample,
sx, sy, sw, sh, offset, stride,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 651addb02f4..4cbda1bc2e7 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -14,6 +14,9 @@
* limitations under the License.
*/
+#include "kernel/kernel_compat_opencl.h" // PRECOMPILED
+#include "kernel/split/kernel_split_common.h" // PRECOMPILED
+
#include "kernel/kernels/opencl/kernel_state_buffer_size.cl"
#include "kernel/kernels/opencl/kernel_data_init.cl"
#include "kernel/kernels/opencl/kernel_path_init.cl"
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index f1e914a70d4..6aa7681cbed 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -23,11 +23,8 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
ccl_global void *split_data_buffer,
ccl_global char *ray_state,
- ccl_global uint *rng_state,
-#define KERNEL_TEX(type, ttype, name) \
- ccl_global type *name,
-#include "kernel/kernel_textures.h"
+ KERNEL_BUFFER_PARAMS,
ccl_global int *queue_index,
ccl_global char *use_queues_flag,
@@ -44,20 +41,16 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
if(ccl_local_id(0) + ccl_local_id(1) == 0) {
kg->data = data;
- kernel_split_params.rng_state = rng_state;
kernel_split_params.queue_index = queue_index;
kernel_split_params.use_queues_flag = use_queues_flag;
kernel_split_params.work_pools = work_pools;
- kernel_split_params.buffer = buffer;
+ kernel_split_params.tile.buffer = buffer;
split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
-#define KERNEL_TEX(type, ttype, name) \
- kg->name = name;
-#include "kernel/kernel_textures.h"
}
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
kg