diff options
Diffstat (limited to 'intern/cycles/kernel/kernels/opencl')
22 files changed, 388 insertions, 823 deletions
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index a68f97857b6..078acc1631e 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -16,34 +16,34 @@ /* OpenCL kernel entry points - unfinished */ -#include "../../kernel_compat_opencl.h" -#include "../../kernel_math.h" -#include "../../kernel_types.h" -#include "../../kernel_globals.h" -#include "../../kernel_image_opencl.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_image_opencl.h" -#include "../../kernel_film.h" +#include "kernel/kernel_film.h" #if defined(__COMPILE_ONLY_MEGAKERNEL__) || !defined(__NO_BAKING__) -# include "../../kernel_path.h" -# include "../../kernel_path_branched.h" +# include "kernel/kernel_path.h" +# include "kernel/kernel_path_branched.h" #else /* __COMPILE_ONLY_MEGAKERNEL__ */ /* Include only actually used headers for the case * when path tracing kernels are not needed. */ -# include "../../kernel_random.h" -# include "../../kernel_differential.h" -# include "../../kernel_montecarlo.h" -# include "../../kernel_projection.h" -# include "../../geom/geom.h" -# include "../../bvh/bvh.h" - -# include "../../kernel_accumulate.h" -# include "../../kernel_camera.h" -# include "../../kernel_shader.h" +# include "kernel/kernel_random.h" +# include "kernel/kernel_differential.h" +# include "kernel/kernel_montecarlo.h" +# include "kernel/kernel_projection.h" +# include "kernel/geom/geom.h" +# include "kernel/bvh/bvh.h" + +# include "kernel/kernel_accumulate.h" +# include "kernel/kernel_camera.h" +# include "kernel/kernel_shader.h" #endif /* defined(__COMPILE_ONLY_MEGAKERNEL__) || !defined(__NO_BAKING__) */ -#include "../../kernel_bake.h" +#include "kernel/kernel_bake.h" #ifdef __COMPILE_ONLY_MEGAKERNEL__ @@ -54,7 +54,7 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" int sample, int sx, int sy, int sw, int sh, int offset, int stride) @@ -65,10 +65,10 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); @@ -84,7 +84,7 @@ __kernel void kernel_ocl_shader( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" int type, int sx, int sw, int offset, int sample) { @@ -94,9 +94,9 @@ __kernel void kernel_ocl_shader( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int x = sx + get_global_id(0); + int x = sx + ccl_global_id(0); if(x < sx + sw) { kernel_shader_evaluate(kg, @@ -116,7 +116,7 @@ __kernel void kernel_ocl_bake( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" int type, int filter, int sx, int sw, int offset, int sample) { @@ -126,9 +126,9 @@ __kernel void kernel_ocl_bake( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int x = sx + get_global_id(0); + int x = sx + ccl_global_id(0); if(x < sx + sw) { #ifdef __NO_BAKING__ @@ -146,7 +146,7 @@ __kernel void kernel_ocl_convert_to_byte( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -157,10 +157,10 @@ __kernel void kernel_ocl_convert_to_byte( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); @@ -173,7 +173,7 @@ __kernel void kernel_ocl_convert_to_half_float( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -184,13 +184,29 @@ __kernel void kernel_ocl_convert_to_half_float( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int x = sx + get_global_id(0); - int y = sy + get_global_id(1); + int x = sx + ccl_global_id(0); + int y = sy + ccl_global_id(1); if(x < sx + sw && y < sy + sh) kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); } +__kernel void kernel_ocl_zero_buffer(ccl_global float4 *buffer, ulong size, ulong offset) +{ + size_t i = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); + + if(i < size / sizeof(float4)) { + buffer[i+offset/sizeof(float4)] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + else if(i == size / sizeof(float4)) { + ccl_global uchar *b = (ccl_global uchar*)&buffer[i+offset/sizeof(float4)]; + + for(i = 0; i < size % sizeof(float4); i++) { + *(b++) = 0; + } + } +} + #endif /* __COMPILE_ONLY_MEGAKERNEL__ */ diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl deleted file mode 100644 index 1914d241eb1..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "split/kernel_background_buffer_update.h" - -__kernel void kernel_ocl_path_trace_background_buffer_update( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* Required for buffer Update */ - ccl_global float3 *throughput_coop, /* Required for background hit processing */ - PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ - ccl_global Ray *Ray_coop, /* Required for background hit processing */ - ccl_global PathState *PathState_coop, /* Required for background hit processing */ - ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ - ccl_global char *ray_state, /* Stores information on the current state of a ray */ - int sw, int sh, int sx, int sy, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global unsigned int *work_array, /* Denotes work of each ray */ - ccl_global int *Queue_data, /* Queues memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ - int end_sample, - int start_sample, -#ifdef __WORK_STEALING__ - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ -{ - ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - if(ray_index == 0) { - /* We will empty this queue in this kernel. */ - Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - char enqueue_flag = 0; - ray_index = get_ray_index(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - Queue_data, - queuesize, - 1); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = - kernel_background_buffer_update((KernelGlobals *)kg, - per_sample_output_buffers, - rng_state, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - L_transparent_coop, - ray_state, - sw, sh, sx, sy, stride, - rng_state_offset_x, - rng_state_offset_y, - rng_state_stride, - work_array, - end_sample, - start_sample, -#ifdef __WORK_STEALING__ - work_pool_wgs, - num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - parallel_samples, - ray_index); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; - * These rays will be made active during next SceneIntersectkernel. - */ - enqueue_ray_index_local(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl new file mode 100644 index 00000000000..db65c91baf7 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -0,0 +1,27 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_buffer_update.h" + +__kernel void kernel_ocl_path_trace_buffer_update( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + ccl_local unsigned int local_queue_atomics; + kernel_buffer_update((KernelGlobals*)kg, &local_queue_atomics); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl index 18139687eab..8b85d362f8a 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -14,77 +14,49 @@ * limitations under the License. */ -#include "split/kernel_data_init.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_data_init.h" __kernel void kernel_ocl_path_trace_data_init( - ccl_global char *globals, - ccl_global char *sd_DL_shadow, + ccl_global char *kg, ccl_constant KernelData *data, - ccl_global float *per_sample_output_buffers, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, ccl_global uint *rng_state, - ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ - ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ - ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ - PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ - ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ - ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ - Intersection *Intersection_coop_shadow, - ccl_global char *ray_state, /* Stores information on current state of a ray */ #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, - int rng_state_offset_x, - int rng_state_offset_y, - int rng_state_stride, - ccl_global int *Queue_data, /* Memory for queues */ + int start_sample, + int end_sample, + int sx, int sy, int sw, int sh, int offset, int stride, ccl_global int *Queue_index, /* Tracks the number of elements in queues */ int queuesize, /* size (capacity) of the queue */ ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ -#ifdef __WORK_STEALING__ ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ unsigned int num_samples, /* Total number of samples per pixel */ -#endif -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_global float *buffer) { - kernel_data_init((KernelGlobals *)globals, - (ShaderData *)sd_DL_shadow, + kernel_data_init((KernelGlobals*)kg, data, - per_sample_output_buffers, - rng_state, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop_shadow, + split_data_buffer, + num_elements, ray_state, + rng_state, #define KERNEL_TEX(type, ttype, name) name, -#include "../../kernel_textures.h" +#include "kernel/kernel_textures.h" - start_sample, sx, sy, sw, sh, offset, stride, - rng_state_offset_x, - rng_state_offset_y, - rng_state_stride, - Queue_data, + start_sample, + end_sample, + sx, sy, sw, sh, offset, stride, Queue_index, queuesize, use_queues_flag, - work_array, -#ifdef __WORK_STEALING__ work_pool_wgs, num_samples, -#endif -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - parallel_samples); + buffer); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl index c6a2c8d050c..eb34f750881 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -14,74 +14,14 @@ * limitations under the License. */ -#include "split/kernel_direct_lighting.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_direct_lighting.h" __kernel void kernel_ocl_path_trace_direct_lighting( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required for direct lighting */ - ccl_global uint *rng_coop, /* Required for direct lighting */ - ccl_global PathState *PathState_coop, /* Required for direct lighting */ - ccl_global int *ISLamp_coop, /* Required for direct lighting */ - ccl_global Ray *LightRay_coop, /* Required for direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ + ccl_constant KernelData *data) { ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = kernel_direct_lighting((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - PathState_coop, - ISLamp_coop, - LightRay_coop, - BSDFEval_coop, - ray_state, - ray_index); - -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - -#ifdef __EMISSION__ - /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_DL_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); -#endif + kernel_direct_lighting((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl new file mode 100644 index 00000000000..83ef5f5f3f2 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_do_volume.h" + +__kernel void kernel_ocl_path_trace_do_volume( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_do_volume((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl index e063614da1a..d071b39aa6f 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -14,110 +14,16 @@ * limitations under the License. */ -#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h" __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required throughout the kernel except probabilistic path termination and AO */ - ccl_global float *per_sample_output_buffers, - ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ - ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ - ccl_global float *L_transparent_coop, /* Required for handling holdout material */ - PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ - ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ - Intersection *Intersection_coop, /* Required for indirect primitive emission */ - ccl_global float3 *AOAlpha_coop, /* Required for AO */ - ccl_global float3 *AOBSDF_coop, /* Required for AO */ - ccl_global Ray *AOLightRay_coop, /* Required for AO */ - int sw, int sh, int sx, int sy, int stride, - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ -#ifdef __WORK_STEALING__ - unsigned int start_sample, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_constant KernelData *data) { - ccl_local unsigned int local_queue_atomics_bg; - ccl_local unsigned int local_queue_atomics_ao; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics_bg = 0; - local_queue_atomics_ao = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - char enqueue_flag_AO_SHADOW_RAY_CAST = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif /* __COMPUTE_DEVICE_GPU__ */ - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - kernel_holdout_emission_blurring_pathtermination_ao( - (KernelGlobals *)kg, - (ShaderData *)sd, - per_sample_output_buffers, - rng_coop, - throughput_coop, - L_transparent_coop, - PathRadiance_coop, - PathState_coop, - Intersection_coop, - AOAlpha_coop, - AOBSDF_coop, - AOLightRay_coop, - sw, sh, sx, sy, stride, - ray_state, - work_array, -#ifdef __WORK_STEALING__ - start_sample, -#endif - parallel_samples, - ray_index, - &enqueue_flag, - &enqueue_flag_AO_SHADOW_RAY_CAST); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics_bg, - Queue_data, - Queue_index); - -#ifdef __AO__ - /* Enqueue to-shadow-ray-cast rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_AO_RAYS, - enqueue_flag_AO_SHADOW_RAY_CAST, - queuesize, - &local_queue_atomics_ao, - Queue_data, - Queue_index); -#endif + ccl_local BackgroundAOLocals locals; + kernel_holdout_emission_blurring_pathtermination_ao( + (KernelGlobals*)kg, + &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl new file mode 100644 index 00000000000..8c213ff5cb2 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_indirect_background.h" + +__kernel void kernel_ocl_path_trace_indirect_background( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_indirect_background((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl new file mode 100644 index 00000000000..998ebc4c0c3 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_indirect_subsurface.h" + +__kernel void kernel_ocl_path_trace_indirect_subsurface( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_indirect_subsurface((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl index 267bddc2ffc..822d2287715 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -14,67 +14,13 @@ * limitations under the License. */ -#include "split/kernel_lamp_emission.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_lamp_emission.h" __kernel void kernel_ocl_path_trace_lamp_emission( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global float3 *throughput_coop, /* Required for lamp emission */ - PathRadiance *PathRadiance_coop, /* Required for lamp emission */ - ccl_global Ray *Ray_coop, /* Required for lamp emission */ - ccl_global PathState *PathState_coop, /* Required for lamp emission */ - Intersection *Intersection_coop, /* Required for lamp emission */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global int *Queue_data, /* Memory for queues */ - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* Size (capacity) of queues */ - ccl_global char *use_queues_flag, /* Used to decide if this kernel should use - * queues to fetch ray index - */ - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_constant KernelData *data) { - int x = get_global_id(0); - int y = get_global_id(1); - - /* We will empty this queue in this kernel. */ - if(get_global_id(0) == 0 && get_global_id(1) == 0) { - Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - } - /* Fetch use_queues_flag. */ - ccl_local char local_use_queues_flag; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_use_queues_flag = use_queues_flag[0]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index; - if(local_use_queues_flag) { - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 1); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } else { - if(x < (sw * parallel_samples) && y < sh) { - ray_index = x + y * (sw * parallel_samples); - } else { - return; - } - } - - kernel_lamp_emission((KernelGlobals *)kg, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - sw, sh, - use_queues_flag, - ray_index); + kernel_lamp_emission((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl index 6d49b6294a8..6d207253a40 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -14,101 +14,14 @@ * limitations under the License. */ -#include "split/kernel_next_iteration_setup.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_next_iteration_setup.h" __kernel void kernel_ocl_path_trace_next_iteration_setup( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Required for setting up ray for next iteration */ - ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ - ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ - PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ - ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ - ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ - ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ - ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ - ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ - ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize, /* Size (capacity) of each queue */ - ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should - * use queues to fetch ray index */ + ccl_constant KernelData *data) { ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if(get_global_id(0) == 0 && get_global_id(1) == 0) { - /* If we are here, then it means that scene-intersect kernel - * has already been executed atleast once. From the next time, - * scene-intersect kernel may operate on queues to fetch ray index - */ - use_queues_flag[0] = 1; - - /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and - * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the - * previous kernel. - */ - Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; - Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; - } - - char enqueue_flag = 0; - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - -#ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif - -#ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { -#endif - enqueue_flag = kernel_next_iteration_setup((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - throughput_coop, - PathRadiance_coop, - Ray_coop, - PathState_coop, - LightRay_dl_coop, - ISLamp_coop, - BSDFEval_coop, - LightRay_ao_coop, - AOBSDF_coop, - AOAlpha_coop, - ray_state, - use_queues_flag, - ray_index); -#ifndef __COMPUTE_DEVICE_GPU__ - } -#endif - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); + kernel_next_iteration_setup((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl new file mode 100644 index 00000000000..bd9aa9538c8 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_path_init.h" + +__kernel void kernel_ocl_path_trace_path_init( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_path_init((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl index 3156dc255fb..9be154e3d75 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -14,93 +14,14 @@ * limitations under the License. */ -#include "../../kernel_compat_opencl.h" -#include "../../kernel_math.h" -#include "../../kernel_types.h" -#include "../../kernel_globals.h" -#include "../../kernel_queues.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_queue_enqueue.h" -/* - * The kernel "kernel_queue_enqueue" enqueues rays of - * different ray state into their appropriate Queues; - * 1. Rays that have been determined to hit the background from the - * "kernel_scene_intersect" kernel - * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. - * - * The input and output of the kernel is as follows, - * - * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | - * queuesize -------------------------------------------| | - * - * Note on Queues : - * State of queues during the first time this kernel is called : - * At entry, - * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. - * At exit, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays - * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays. - * - * State of queue during other times this kernel is called : - * At entry, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty. - * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays. - * At exit, - * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays. - * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. - */ __kernel void kernel_ocl_path_trace_queue_enqueue( - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int queuesize) /* Size (capacity) of each queue */ + ccl_global char *kg, + ccl_constant KernelData *data) { - /* We have only 2 cases (Hit/Not-Hit) */ - ccl_local unsigned int local_queue_atomics[2]; - - int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - - if(lidx < 2 ) { - local_queue_atomics[lidx] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int queue_number = -1; - - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - } - else if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; - } - - unsigned int my_lqidx; - if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); - } - barrier(CLK_LOCAL_MEM_FENCE); - - if(lidx == 0) { - local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = - get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - local_queue_atomics, - Queue_index); - local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = - get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - local_queue_atomics, - Queue_index); - } - barrier(CLK_LOCAL_MEM_FENCE); - - unsigned int my_gqidx; - if(queue_number != -1) { - my_gqidx = get_global_queue_index(queue_number, - queuesize, - my_lqidx, - local_queue_atomics); - Queue_data[my_gqidx] = ray_index; - } + ccl_local QueueEnqueueLocals locals; + kernel_queue_enqueue((KernelGlobals*)kg, &locals); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl index 7f3f433c7a6..eb4fb4d153a 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -14,67 +14,13 @@ * limitations under the License. */ -#include "split/kernel_scene_intersect.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_scene_intersect.h" __kernel void kernel_ocl_path_trace_scene_intersect( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global uint *rng_coop, - ccl_global Ray *Ray_coop, /* Required for scene_intersect */ - ccl_global PathState *PathState_coop, /* Required for scene_intersect */ - Intersection *Intersection_coop, /* Required for scene_intersect */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - int sw, int sh, - ccl_global int *Queue_data, /* Memory for queues */ - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* Size (capacity) of queues */ - ccl_global char *use_queues_flag, /* used to decide if this kernel should use - * queues to fetch ray index */ -#ifdef __KERNEL_DEBUG__ - DebugData *debugdata_coop, -#endif - int parallel_samples) /* Number of samples to be processed in parallel */ + ccl_constant KernelData *data) { - int x = get_global_id(0); - int y = get_global_id(1); - - /* Fetch use_queues_flag */ - ccl_local char local_use_queues_flag; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_use_queues_flag = use_queues_flag[0]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index; - if(local_use_queues_flag) { - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } else { - if(x < (sw * parallel_samples) && y < sh) { - ray_index = x + y * (sw * parallel_samples); - } else { - return; - } - } - - kernel_scene_intersect((KernelGlobals *)kg, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - sw, sh, - use_queues_flag, -#ifdef __KERNEL_DEBUG__ - debugdata_coop, -#endif - ray_index); + kernel_scene_intersect((KernelGlobals*)kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl index c37856c8f30..6baee460986 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -14,55 +14,14 @@ * limitations under the License. */ -#include "split/kernel_shader_eval.h" +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_shader_eval.h" __kernel void kernel_ocl_path_trace_shader_eval( ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global char *sd, /* Output ShaderData structure to be filled */ - ccl_global uint *rng_coop, /* Required for rbsdf calculation */ - ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ - ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ - Intersection *Intersection_coop, /* Required for setting up shader from ray */ - ccl_global char *ray_state, /* Denotes the state of each ray */ - ccl_global int *Queue_data, /* queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ + ccl_constant KernelData *data) { - /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ ccl_local unsigned int local_queue_atomics; - if(get_local_id(0) == 0 && get_local_id(1) == 0) { - local_queue_atomics = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - ray_index = get_ray_index(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - Queue_data, - queuesize, - 0); - - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - - char enqueue_flag = (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - queuesize, - &local_queue_atomics, - Queue_data, - Queue_index); - - /* Continue on with shader evaluation. */ - kernel_shader_eval((KernelGlobals *)kg, - (ShaderData *)sd, - rng_coop, - Ray_coop, - PathState_coop, - Intersection_coop, - ray_state, - ray_index); + kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl deleted file mode 100644 index edf76fba714..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "split/kernel_shadow_blocked.h" - -__kernel void kernel_ocl_path_trace_shadow_blocked( - ccl_global char *kg, - ccl_constant KernelData *data, - ccl_global PathState *PathState_coop, /* Required for shadow blocked */ - ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ - ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ - ccl_global char *ray_state, - ccl_global int *Queue_data, /* Queue memory */ - ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ - int queuesize) /* Size (capacity) of each queue */ -{ - int lidx = get_local_id(1) * get_local_id(0) + get_local_id(0); - - ccl_local unsigned int ao_queue_length; - ccl_local unsigned int dl_queue_length; - if(lidx == 0) { - ao_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - dl_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - /* flag determining if the current ray is to process shadow ray for AO or DL */ - char shadow_blocked_type = -1; - - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); - if(thread_index < ao_queue_length + dl_queue_length) { - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, Queue_data, queuesize, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; - } else { - ray_index = get_ray_index(thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, Queue_data, queuesize, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; - } - } - - if(ray_index == QUEUE_EMPTY_SLOT) - return; - - kernel_shadow_blocked((KernelGlobals *)kg, - PathState_coop, - LightRay_dl_coop, - LightRay_ao_coop, - ray_state, - shadow_blocked_type, - ray_index); -} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl new file mode 100644 index 00000000000..6a8ef81b32a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_ao.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_shadow_blocked_ao.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked_ao( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_shadow_blocked_ao((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl new file mode 100644 index 00000000000..b255cc5ef8b --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked_dl.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_shadow_blocked_dl.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked_dl( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + kernel_shadow_blocked_dl((KernelGlobals*)kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl new file mode 100644 index 00000000000..732cda30115 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -0,0 +1,35 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernels/opencl/kernel_state_buffer_size.cl" +#include "kernel/kernels/opencl/kernel_data_init.cl" +#include "kernel/kernels/opencl/kernel_path_init.cl" + +#include "kernel/kernels/opencl/kernel_scene_intersect.cl" +#include "kernel/kernels/opencl/kernel_lamp_emission.cl" +#include "kernel/kernels/opencl/kernel_do_volume.cl" +#include "kernel/kernels/opencl/kernel_indirect_background.cl" +#include "kernel/kernels/opencl/kernel_queue_enqueue.cl" +#include "kernel/kernels/opencl/kernel_shader_eval.cl" +#include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" +#include "kernel/kernels/opencl/kernel_subsurface_scatter.cl" +#include "kernel/kernels/opencl/kernel_direct_lighting.cl" +#include "kernel/kernels/opencl/kernel_shadow_blocked_ao.cl" +#include "kernel/kernels/opencl/kernel_shadow_blocked_dl.cl" +#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" +#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" +#include "kernel/kernels/opencl/kernel_buffer_update.cl" + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl new file mode 100644 index 00000000000..c10ecc426c6 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" + +__kernel void kernel_ocl_path_trace_state_buffer_size( + ccl_global char *kg, + ccl_constant KernelData *data, + uint num_threads, + ccl_global uint64_t *size) +{ + ((KernelGlobals*)kg)->data = data; + *size = split_data_buffer_size((KernelGlobals*)kg, num_threads); +} + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl new file mode 100644 index 00000000000..7a1838e485f --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -0,0 +1,27 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/split/kernel_split_common.h" +#include "kernel/split/kernel_subsurface_scatter.h" + +__kernel void kernel_ocl_path_trace_subsurface_scatter( + ccl_global char *kg, + ccl_constant KernelData *data) +{ + ccl_local unsigned int local_queue_atomics; + kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl deleted file mode 100644 index 88a1ed830af..00000000000 --- a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "split/kernel_sum_all_radiance.h" - -__kernel void kernel_ocl_path_trace_sum_all_radiance( - ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ - ccl_global float *buffer, /* Output buffer of RenderTile */ - ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ - int parallel_samples, int sw, int sh, int stride, - int buffer_offset_x, - int buffer_offset_y, - int buffer_stride, - int start_sample) -{ - kernel_sum_all_radiance(data, - buffer, - per_sample_output_buffer, - parallel_samples, - sw, sh, stride, - buffer_offset_x, - buffer_offset_y, - buffer_stride, - start_sample); -} |