diff options
Diffstat (limited to 'intern/cycles/kernel/kernels')
31 files changed, 213 insertions, 166 deletions
diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index ffd34c293fc..2ed713299fd 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -107,8 +107,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index 261176846b1..8dc1a8d583c 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -213,8 +213,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, int dy, float *difference_image, float *buffer, - float *color_pass, - float *variance_pass, float *transform, int *rank, float *XtWX, @@ -229,7 +227,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, color_pass, variance_pass, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); + kernel_filter_nlm_construct_gramian(dx, dy, difference_image, buffer, transform, rank, XtWX, XtWY, load_int4(rect), load_int4(filter_rect), w, h, f, pass_stride); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp index 1a7b2040da1..254025be4e2 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp @@ -25,6 +25,7 @@ #else /* SSE optimization disabled for now on 32 bit, see bug #36316 */ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) +# define __KERNEL_SSE__ # define __KERNEL_SSE2__ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 9895080d328..c8938534fe8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -85,6 +85,7 @@ DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 9b85a864153..d4315ee5ec4 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -53,6 +53,7 @@ # include "kernel/split/kernel_direct_lighting.h" # include "kernel/split/kernel_shadow_blocked_ao.h" # include "kernel/split/kernel_shadow_blocked_dl.h" +# include "kernel/split/kernel_enqueue_inactive.h" # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" @@ -230,6 +231,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/cuda/filter.cu b/intern/cycles/kernel/kernels/cuda/filter.cu index 2edbff08087..009c3fde9d5 100644 --- a/intern/cycles/kernel/kernels/cuda/filter.cu +++ b/intern/cycles/kernel/kernels/cuda/filter.cu @@ -207,8 +207,6 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, const float *ccl_restrict difference_image, const float *ccl_restrict buffer, - float *color_pass, - float *variance_pass, float const* __restrict__ transform, int *rank, float *XtWX, @@ -225,7 +223,6 @@ kernel_cuda_filter_nlm_construct_gramian(int dx, int dy, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index 9fa39dc9ebb..7ae205b7e14 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -81,8 +81,13 @@ # error "Unknown or unsupported CUDA architecture, can't determine launch bounds" #endif -/* compute number of threads per block and minimum blocks per multiprocessor - * given the maximum number of registers per thread */ +/* For split kernel using all registers seems fastest for now, but this + * is unlikely to be optimal once we resolve other bottlenecks. */ + +#define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS + +/* Compute number of threads per block and minimum blocks per multiprocessor + * given the maximum number of registers per thread. */ #define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ __launch_bounds__( \ diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 8b7f1a8d405..e97e87285a5 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -39,6 +39,7 @@ #include "kernel/split/kernel_direct_lighting.h" #include "kernel/split/kernel_shadow_blocked_ao.h" #include "kernel/split/kernel_shadow_blocked_dl.h" +#include "kernel/split/kernel_enqueue_inactive.h" #include "kernel/split/kernel_next_iteration_setup.h" #include "kernel/split/kernel_indirect_subsurface.h" #include "kernel/split/kernel_buffer_update.h" @@ -89,7 +90,7 @@ kernel_cuda_path_trace_data_init( #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ kernel_cuda_##name() \ { \ kernel_##name(NULL); \ @@ -97,7 +98,7 @@ kernel_cuda_path_trace_data_init( #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ extern "C" __global__ void \ - CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ + CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_SPLIT_MAX_REGISTERS) \ kernel_cuda_##name() \ { \ ccl_local type locals; \ @@ -118,6 +119,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) diff --git a/intern/cycles/kernel/kernels/opencl/filter.cl b/intern/cycles/kernel/kernels/opencl/filter.cl index 0462ca6f9bc..ba53ba4b26f 100644 --- a/intern/cycles/kernel/kernels/opencl/filter.cl +++ b/intern/cycles/kernel/kernels/opencl/filter.cl @@ -207,8 +207,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, int dy, const ccl_global float *ccl_restrict difference_image, const ccl_global float *ccl_restrict buffer, - ccl_global float *color_pass, - ccl_global float *variance_pass, const ccl_global float *ccl_restrict transform, ccl_global int *rank, ccl_global float *XtWX, @@ -227,7 +225,6 @@ __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx, dx, dy, difference_image, buffer, - color_pass, variance_pass, transform, rank, XtWX, XtWY, rect, filter_rect, diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 078acc1631e..b7108f3d0f8 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -52,9 +52,7 @@ __kernel void kernel_ocl_path_trace( ccl_global float *buffer, ccl_global uint *rng_state, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int sample, int sx, int sy, int sw, int sh, int offset, int stride) @@ -63,9 +61,8 @@ __kernel void kernel_ocl_path_trace( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); @@ -82,9 +79,7 @@ __kernel void kernel_ocl_shader( ccl_global float4 *output, ccl_global float *output_luma, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int type, int sx, int sw, int offset, int sample) { @@ -92,9 +87,8 @@ __kernel void kernel_ocl_shader( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); @@ -114,9 +108,7 @@ __kernel void kernel_ocl_bake( ccl_global uint4 *input, ccl_global float4 *output, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, int type, int filter, int sx, int sw, int offset, int sample) { @@ -124,9 +116,8 @@ __kernel void kernel_ocl_bake( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); @@ -144,9 +135,7 @@ __kernel void kernel_ocl_convert_to_byte( ccl_global uchar4 *rgba, ccl_global float *buffer, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -155,9 +144,8 @@ __kernel void kernel_ocl_convert_to_byte( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); @@ -171,9 +159,7 @@ __kernel void kernel_ocl_convert_to_half_float( ccl_global uchar4 *rgba, ccl_global float *buffer, -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" + KERNEL_BUFFER_PARAMS, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -182,9 +168,8 @@ __kernel void kernel_ocl_convert_to_half_float( kg->data = data; -#define KERNEL_TEX(type, ttype, name) \ - kg->name = name; -#include "kernel/kernel_textures.h" + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); @@ -193,7 +178,7 @@ __kernel void kernel_ocl_convert_to_half_float( kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } -__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset) +__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, uint64_t size, uint64_t offset) { size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index db65c91baf7..dcea2630aef 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_buffer_update.h" -__kernel void kernel_ocl_path_trace_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME buffer_update +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 8b85d362f8a..95b35e40a45 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -25,11 +25,7 @@ __kernel void kernel_ocl_path_trace_data_init( int num_elements, ccl_global char *ray_state, ccl_global uint *rng_state, - -#define KERNEL_TEX(type, ttype, name) \ - ccl_global type *name, -#include "kernel/kernel_textures.h" - + KERNEL_BUFFER_PARAMS, int start_sample, int end_sample, int sx, int sy, int sw, int sh, int offset, int stride, @@ -46,10 +42,7 @@ __kernel void kernel_ocl_path_trace_data_init( num_elements, ray_state, rng_state, - -#define KERNEL_TEX(type, ttype, name) name, -#include "kernel/kernel_textures.h" - + KERNEL_BUFFER_ARGS, start_sample, end_sample, sx, sy, sw, sh, offset, stride, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index eb34f750881..ed64ae01aae 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_direct_lighting.h" -__kernel void kernel_ocl_path_trace_direct_lighting( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME direct_lighting +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl index 83ef5f5f3f2..8afaa686e28 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_do_volume.h" -__kernel void kernel_ocl_path_trace_do_volume( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_do_volume((KernelGlobals*)kg); -} +#define KERNEL_NAME do_volume +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl new file mode 100644 index 00000000000..e68d4104a91 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_enqueue_inactive.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_enqueue_inactive.h" + +#define KERNEL_NAME enqueue_inactive +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index d071b39aa6f..9e1e57beba6 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -18,12 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" -__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local BackgroundAOLocals locals; - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals*)kg, - &locals); -} +#define KERNEL_NAME holdout_emission_blurring_pathtermination_ao +#define LOCALS_TYPE BackgroundAOLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl index 8c213ff5cb2..192d01444ba 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_background.h" -__kernel void kernel_ocl_path_trace_indirect_background( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_background((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_background +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl index 998ebc4c0c3..84938b889e5 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_indirect_subsurface.h" -__kernel void kernel_ocl_path_trace_indirect_subsurface( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_indirect_subsurface((KernelGlobals*)kg); -} +#define KERNEL_NAME indirect_subsurface +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 822d2287715..c314dc96c33 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_lamp_emission.h" -__kernel void kernel_ocl_path_trace_lamp_emission( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_lamp_emission((KernelGlobals*)kg); -} +#define KERNEL_NAME lamp_emission +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d207253a40..8b1332bf013 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_next_iteration_setup.h" -__kernel void kernel_ocl_path_trace_next_iteration_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME next_iteration_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl index bd9aa9538c8..fa210e747c0 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_path_init.h" -__kernel void kernel_ocl_path_trace_path_init( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_path_init((KernelGlobals*)kg); -} +#define KERNEL_NAME path_init +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 9be154e3d75..68ee6f1d536 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_queue_enqueue.h" -__kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local QueueEnqueueLocals locals; - kernel_queue_enqueue((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME queue_enqueue +#define LOCALS_TYPE QueueEnqueueLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index eb4fb4d153a..10d09377ba9 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_scene_intersect.h" -__kernel void kernel_ocl_path_trace_scene_intersect( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_scene_intersect((KernelGlobals*)kg); -} +#define KERNEL_NAME scene_intersect +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index 5bfb31b193a..40eaa561863 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_eval.h" -__kernel void kernel_ocl_path_trace_shader_eval( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shader_eval((KernelGlobals*)kg); -} +#define KERNEL_NAME shader_eval +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl index 38bfd04ad4c..8c36100f762 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl @@ -18,10 +18,9 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shader_setup.h" -__kernel void kernel_ocl_path_trace_shader_setup( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local unsigned int local_queue_atomics; - kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics); -} +#define KERNEL_NAME shader_setup +#define LOCALS_TYPE unsigned int +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl index 6f722915d45..bcacaa4a054 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl @@ -19,10 +19,9 @@ #include "kernel/split/kernel_shader_sort.h" __attribute__((reqd_work_group_size(64, 1, 1))) -__kernel void kernel_ocl_path_trace_shader_sort( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - ccl_local ShaderSortLocals locals; - kernel_shader_sort((KernelGlobals*)kg, &locals); -} +#define KERNEL_NAME shader_sort +#define LOCALS_TYPE ShaderSortLocals +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME +#undef LOCALS_TYPE + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl index 6a8ef81b32a..8de250a375c 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_ao.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_ao( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_ao((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_ao +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl index b255cc5ef8b..29da77022ed 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_shadow_blocked_dl.h" -__kernel void kernel_ocl_path_trace_shadow_blocked_dl( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_shadow_blocked_dl((KernelGlobals*)kg); -} +#define KERNEL_NAME shadow_blocked_dl +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl index 8de82db7afe..4cbda1bc2e7 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -14,6 +14,9 @@ * limitations under the License. */ +#include "kernel/kernel_compat_opencl.h" // PRECOMPILED +#include "kernel/split/kernel_split_common.h" // PRECOMPILED + #include "kernel/kernels/opencl/kernel_state_buffer_size.cl" #include "kernel/kernels/opencl/kernel_data_init.cl" #include "kernel/kernels/opencl/kernel_path_init.cl" @@ -31,6 +34,7 @@ #include "kernel/kernels/opencl/kernel_direct_lighting.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl" #include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl" +#include "kernel/kernels/opencl/kernel_enqueue_inactive.cl" #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" #include "kernel/kernels/opencl/kernel_buffer_update.cl" diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h new file mode 100644 index 00000000000..591c3846ef2 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -0,0 +1,67 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define KERNEL_NAME_JOIN(a, b) a ## _ ## b +#define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) + +__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( + ccl_global char *kg_global, + ccl_constant KernelData *data, + + ccl_global void *split_data_buffer, + ccl_global char *ray_state, + ccl_global uint *rng_state, + + KERNEL_BUFFER_PARAMS, + + ccl_global int *queue_index, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pools, + ccl_global float *buffer + ) +{ +#ifdef LOCALS_TYPE + ccl_local LOCALS_TYPE locals; +#endif + + KernelGlobals *kg = (KernelGlobals*)kg_global; + + if(ccl_local_id(0) + ccl_local_id(1) == 0) { + kg->data = data; + + kernel_split_params.rng_state = rng_state; + kernel_split_params.queue_index = queue_index; + kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.work_pools = work_pools; + kernel_split_params.buffer = buffer; + + split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); + + } + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + + KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( + kg +#ifdef LOCALS_TYPE + , &locals +#endif + ); +} + +#undef KERNEL_NAME_JOIN +#undef KERNEL_NAME_EVAL + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl index 99b74a1802b..2b3be38df84 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -18,9 +18,7 @@ #include "kernel/split/kernel_split_common.h" #include "kernel/split/kernel_subsurface_scatter.h" -__kernel void kernel_ocl_path_trace_subsurface_scatter( - ccl_global char *kg, - ccl_constant KernelData *data) -{ - kernel_subsurface_scatter((KernelGlobals*)kg); -} +#define KERNEL_NAME subsurface_scatter +#include "kernel/kernels/opencl/kernel_split_function.h" +#undef KERNEL_NAME + |