diff options
Diffstat (limited to 'intern/cycles/kernel/kernels')
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/filter_sse41.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/filter.cu | 12 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel.cu | 69 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_config.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_split.cu | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/filter.cl | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel.cl | 83 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_data_init.cl | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_split.cl | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_split_function.h | 13 |
14 files changed, 123 insertions, 138 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index 2ed713299fd..bf13ba62806 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -27,8 +27,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *bufferV, int* prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance); + int buffer_denoising_offset); void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, TilesInfo *tiles, @@ -40,8 +39,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, float *variance, int* prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance); + int buffer_denoising_offset); void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, ccl_global float *image, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 8dc1a8d583c..2fbb0ea2bdb 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -45,8 +45,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *bufferVariance, int* prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance) + int buffer_denoising_offset) { #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); @@ -60,8 +59,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, bufferVariance, load_int4(prefilter_rect), buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); #endif } @@ -74,8 +72,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, float *mean, float *variance, int* prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance) + int buffer_denoising_offset) { #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_get_feature); @@ -86,8 +83,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, mean, variance, load_int4(prefilter_rect), buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp index 1a7b2040da1..254025be4e2 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp @@ -25,6 +25,7 @@ #else /* SSE optimization disabled for now on 32 bit, see bug #36316 */ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ # define __KERNEL_SSE2__ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index c8938534fe8..6bdb8546a24 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -18,7 +18,6 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, float *buffer, - unsigned int *rng_state, int sample, int x, int y, int offset, @@ -42,7 +41,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, uint4 *input, float4 *output, - float *output_luma, int type, int filter, int i, @@ -57,7 +55,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, - ccl_global uint *rng_state, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index d4315ee5ec4..fdeb7dcd3e4 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -75,7 +75,6 @@ CCL_NAMESPACE_BEGIN void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, float *buffer, - unsigned int *rng_state, int sample, int x, int y, int offset, @@ -88,7 +87,6 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, if(kernel_data.integrator.branched) { kernel_branched_path_trace(kg, buffer, - rng_state, sample, x, y, offset, @@ -97,7 +95,7 @@ void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, else # endif { - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + kernel_path_trace(kg, buffer, sample, x, y, offset, stride); } #endif /* KERNEL_STUB */ } @@ -151,7 +149,6 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, uint4 *input, float4 *output, - float *output_luma, int type, int filter, int i, @@ -162,7 +159,6 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, STUB_ASSERT(KERNEL_ARCH, shader); #else if(type >= SHADER_EVAL_BAKE) { - kernel_assert(output_luma == NULL); # ifdef __BAKING__ kernel_bake_evaluate(kg, input, @@ -174,14 +170,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, sample); # endif } + else if(type == SHADER_EVAL_DISPLACE) { + kernel_displace_evaluate(kg, input, output, i); + } else { - kernel_shader_evaluate(kg, - input, - output, - output_luma, - (ShaderEvalType)type, - i, - sample); + kernel_background_evaluate(kg, input, output, i); } #endif /* KERNEL_STUB */ } diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 009c3fde9d5..c8172355a7f 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -37,8 +37,7 @@ kernel_cuda_filter_divide_shadow(int sample, float *bufferVariance, int4 prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance) + int buffer_denoising_offset) { int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; @@ -53,8 +52,7 @@ kernel_cuda_filter_divide_shadow(int sample, bufferVariance, prefilter_rect, buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); } } @@ -68,8 +66,7 @@ kernel_cuda_filter_get_feature(int sample, float *variance, int4 prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - bool use_split_variance) + int buffer_denoising_offset) { int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x; int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y; @@ -81,8 +78,7 @@ kernel_cuda_filter_get_feature(int sample, mean, variance, prefilter_rect, buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); } } diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index dc343cb387a..1ac6afd167a 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -20,6 +20,9 @@ #include "kernel/kernel_compat_cuda.h" #include "kernel_config.h" + +#include "util/util_atomic.h" + #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" #include "kernel/kernel_globals.h" @@ -27,32 +30,37 @@ #include "kernel/kernel_path.h" #include "kernel/kernel_path_branched.h" #include "kernel/kernel_bake.h" +#include "kernel/kernel_work_stealing.h" /* kernels */ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +kernel_cuda_path_trace(WorkTile *tile, uint total_work_size) { - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + int work_index = ccl_global_id(0); + + if(work_index < total_work_size) { + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - if(x < sx + sw && y < sy + sh) { KernelGlobals kg; - kernel_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride); + kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } } #ifdef __BRANCHED_PATH__ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) -kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) { - int x = sx + blockDim.x*blockIdx.x + threadIdx.x; - int y = sy + blockDim.y*blockIdx.y + threadIdx.y; + int work_index = ccl_global_id(0); + + if(work_index < total_work_size) { + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - if(x < sx + sw && y < sy + sh) { KernelGlobals kg; - kernel_branched_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride); + kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } } #endif @@ -83,26 +91,37 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) -kernel_cuda_shader(uint4 *input, - float4 *output, - float *output_luma, - int type, - int sx, - int sw, - int offset, - int sample) +kernel_cuda_displace(uint4 *input, + float4 *output, + int type, + int sx, + int sw, + int offset, + int sample) +{ + int x = sx + blockDim.x*blockIdx.x + threadIdx.x; + + if(x < sx + sw) { + KernelGlobals kg; + kernel_displace_evaluate(&kg, input, output, x); + } +} + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_background(uint4 *input, + float4 *output, + int type, + int sx, + int sw, + int offset, + int sample) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; if(x < sx + sw) { KernelGlobals kg; - kernel_shader_evaluate(&kg, - input, - output, - output_luma, - (ShaderEvalType)type, - x, - sample); + kernel_background_evaluate(&kg, input, output, x); } } diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index 9fa39dc9ebb..7ae205b7e14 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -81,8 +81,13 @@ # error "Unknown or unsupported CUDA architecture, can't determine launch bounds" #endif -/* compute number of threads per block and minimum blocks per multiprocessor - * given the maximum number of registers per thread */ +/* For split kernel using all registers seems fastest for now, but this + * is unlikely to be optimal once we resolve other bottlenecks. */ + +#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS + +/* Compute number of threads per block and minimum blocks per multiprocessor + * given the maximum number of registers per thread. */ #define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ __launch_bounds__( \ diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 628891b1458..43b3d0aa0e6 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -60,7 +60,6 @@ kernel_cuda_path_trace_data_init( ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, - ccl_global uint *rng_state, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, @@ -76,7 +75,6 @@ kernel_cuda_path_trace_data_init( split_data_buffer, num_elements, ray_state, - rng_state, start_sample, end_sample, sx, sy, sw, sh, offset, stride, @@ -90,7 +88,7 @@ kernel_cuda_path_trace_data_init( #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ kernel_cuda_##name() \ { \ kernel_##name(NULL); \ @@ -98,7 +96,7 @@ kernel_cuda_path_trace_data_init( #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ kernel_cuda_##name() \ { \ ccl_local type locals; \ diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index ba53ba4b26f..7a7b596a350 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -31,8 +31,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample, ccl_global float *bufferVariance, int4 prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - char use_split_variance) + int buffer_denoising_offset) { int x = prefilter_rect.x + get_global_id(0); int y = prefilter_rect.y + get_global_id(1); @@ -47,8 +46,7 @@ __kernel void kernel_ocl_filter_divide_shadow(int sample, bufferVariance, prefilter_rect, buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); } } @@ -60,8 +58,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample, ccl_global float *variance, int4 prefilter_rect, int buffer_pass_stride, - int buffer_denoising_offset, - char use_split_variance) + int buffer_denoising_offset) { int x = prefilter_rect.x + get_global_id(0); int y = prefilter_rect.y + get_global_id(1); @@ -73,8 +70,7 @@ __kernel void kernel_ocl_filter_get_feature(int sample, mean, variance, prefilter_rect, buffer_pass_stride, - buffer_denoising_offset, - use_split_variance); + buffer_denoising_offset); } } @@ -235,7 +231,7 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, } __kernel void kernel_ocl_filter_finalize(int w, - int h, + int h, ccl_global float *buffer, ccl_global int *rank, ccl_global float *XtWX, diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 078acc1631e..66b6e19de84 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -50,11 +50,8 @@ __kernel void kernel_ocl_path_trace( ccl_constant KernelData *data, ccl_global float *buffer, - ccl_global uint *rng_state, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int sample, int sx, int sy, int sw, int sh, int offset, int stride) @@ -63,28 +60,24 @@ __kernel void kernel_ocl_path_trace( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); + kernel_path_trace(kg, buffer, sample, x, y, offset, stride); } #else /* __COMPILE_ONLY_MEGAKERNEL__ */ -__kernel void kernel_ocl_shader( +__kernel void kernel_ocl_displace( ccl_constant KernelData *data, ccl_global uint4 *input, ccl_global float4 *output, - ccl_global float *output_luma, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int type, int sx, int sw, int offset, int sample) { @@ -92,20 +85,35 @@ __kernel void kernel_ocl_shader( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); if(x < sx + sw) { - kernel_shader_evaluate(kg, - input, - output, - output_luma, - (ShaderEvalType)type, - x, - sample); + kernel_displace_evaluate(kg, input, output, x); + } +} +__kernel void kernel_ocl_background( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { + kernel_background_evaluate(kg, input, output, x); } } @@ -114,9 +122,7 @@ __kernel void kernel_ocl_bake( ccl_global uint4 *input, ccl_global float4 *output, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int type, int filter, int sx, int sw, int offset, int sample) { @@ -124,9 +130,8 @@ __kernel void kernel_ocl_bake( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); @@ -144,9 +149,7 @@ __kernel void kernel_ocl_convert_to_byte( ccl_global uchar4 *rgba, ccl_global float *buffer, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -155,9 +158,8 @@ __kernel void kernel_ocl_convert_to_byte( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); @@ -171,9 +173,7 @@ __kernel void kernel_ocl_convert_to_half_float( ccl_global uchar4 *rgba, ccl_global float *buffer, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -182,9 +182,8 @@ __kernel void kernel_ocl_convert_to_half_float( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); @@ -193,7 +192,7 @@ __kernel void kernel_ocl_convert_to_half_float( kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } -__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset) +__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, uint64_t size, uint64_t offset) { size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 8b85d362f8a..7125348a49f 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -24,12 +24,7 @@ __kernel void kernel_ocl_path_trace_data_init( ccl_global void *split_data_buffer, int num_elements, ccl_global char *ray_state, - ccl_global uint *rng_state, - -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" - + KERNEL_BUFFER_PARAMS, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, @@ -45,11 +40,7 @@ __kernel void kernel_ocl_path_trace_data_init( split_data_buffer, num_elements, ray_state, - rng_state, - -#define KERNEL_TEX(type, ttype, name) name, -#include "kernel/kernel_textures.h" - + KERNEL_BUFFER_ARGS, start_sample, end_sample, sx, sy, sw, sh, offset, stride, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 651addb02f4..4cbda1bc2e7 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -14,6 +14,9 @@ * limitations under the License. */ +#include "kernel/kernel_compat_opencl.h" // PRECOMPILED +#include "kernel/split/kernel_split_common.h" // PRECOMPILED + #include "kernel/kernels/opencl/kernel_state_buffer_size.cl" #include "kernel/kernels/opencl/kernel_data_init.cl" #include "kernel/kernels/opencl/kernel_path_init.cl" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h index f1e914a70d4..6aa7681cbed 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -23,11 +23,8 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( ccl_global void *split_data_buffer, ccl_global char *ray_state, - ccl_global uint *rng_state, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, ccl_global int *queue_index, ccl_global char *use_queues_flag, @@ -44,20 +41,16 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( if(ccl_local_id(0) + ccl_local_id(1) == 0) { kg->data = data; - kernel_split_params.rng_state = rng_state; kernel_split_params.queue_index = queue_index; kernel_split_params.use_queues_flag = use_queues_flag; kernel_split_params.work_pools = work_pools; - kernel_split_params.buffer = buffer; + kernel_split_params.tile.buffer = buffer; split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( kg |