diff options
Diffstat (limited to 'intern/cycles/kernel/kernels/opencl')
12 files changed, 1329 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl new file mode 100644 index 00000000000..57db6fd9098 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -0,0 +1,169 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* OpenCL kernel entry points - unfinished */ + +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" + +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_path_branched.h" +#include "../../kernel_bake.h" + +#ifdef __COMPILE_ONLY_MEGAKERNEL__ + +__kernel void kernel_ocl_path_trace( + ccl_constant KernelData *data, + ccl_global float *buffer, + ccl_global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int sample, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); +} + +#else // __COMPILE_ONLY_MEGAKERNEL__ + +__kernel void kernel_ocl_shader( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + + if(x < sx + sw) + kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x, sample); +} + +__kernel void kernel_ocl_bake( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample); +#endif + } +} + +__kernel void kernel_ocl_convert_to_byte( + ccl_constant KernelData *data, + ccl_global uchar4 *rgba, + ccl_global float *buffer, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +__kernel void kernel_ocl_convert_to_half_float( + ccl_constant KernelData *data, + ccl_global uchar4 *rgba, + ccl_global float *buffer, + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + float sample_scale, + int sx, int sy, int sw, int sh, int offset, int stride) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; +#include "../../kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); + + if(x < sx + sw && y < sy + sh) + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +} + +#endif // __COMPILE_ONLY_MEGAKERNEL__
\ No newline at end of file diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl new file mode 100644 index 00000000000..eff77b89a0a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -0,0 +1,128 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_background_buffer_update.h" + +__kernel void kernel_ocl_path_trace_background_buffer_update( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* Required for buffer Update */ + ccl_global float3 *throughput_coop, /* Required for background hit processing */ + PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ + ccl_global Ray *Ray_coop, /* Required for background hit processing */ + ccl_global PathState *PathState_coop, /* Required for background hit processing */ + ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ + ccl_global char *ray_state, /* Stores information on the current state of a ray */ + int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global unsigned int *work_array, /* Denotes work of each ray */ + ccl_global int *Queue_data, /* Queues memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int end_sample, + int start_sample, +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + if(ray_index == 0) { + /* We will empty this queue in this kernel. */ + Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + char enqueue_flag = 0; + ray_index = get_ray_index(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + Queue_data, + queuesize, + 1); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = + kernel_background_buffer_update(globals, + data, + shader_data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + L_transparent_coop, + ray_state, + sw, sh, sx, sy, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + work_array, + end_sample, + start_sample, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples, + ray_index); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; + * These rays will be made active during next SceneIntersectkernel. + */ + enqueue_ray_index_local(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl new file mode 100644 index 00000000000..c3277676029 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -0,0 +1,241 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_data_init.h" + +__kernel void kernel_ocl_path_trace_data_init( + ccl_global char *globals, + ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ + ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ + + ccl_global float3 *P_sd, + ccl_global float3 *P_sd_DL_shadow, + + ccl_global float3 *N_sd, + ccl_global float3 *N_sd_DL_shadow, + + ccl_global float3 *Ng_sd, + ccl_global float3 *Ng_sd_DL_shadow, + + ccl_global float3 *I_sd, + ccl_global float3 *I_sd_DL_shadow, + + ccl_global int *shader_sd, + ccl_global int *shader_sd_DL_shadow, + + ccl_global int *flag_sd, + ccl_global int *flag_sd_DL_shadow, + + ccl_global int *prim_sd, + ccl_global int *prim_sd_DL_shadow, + + ccl_global int *type_sd, + ccl_global int *type_sd_DL_shadow, + + ccl_global float *u_sd, + ccl_global float *u_sd_DL_shadow, + + ccl_global float *v_sd, + ccl_global float *v_sd_DL_shadow, + + ccl_global int *object_sd, + ccl_global int *object_sd_DL_shadow, + + ccl_global float *time_sd, + ccl_global float *time_sd_DL_shadow, + + ccl_global float *ray_length_sd, + ccl_global float *ray_length_sd_DL_shadow, + + ccl_global int *ray_depth_sd, + ccl_global int *ray_depth_sd_DL_shadow, + + ccl_global int *transparent_depth_sd, + ccl_global int *transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + ccl_global differential3 *dP_sd, + ccl_global differential3 *dP_sd_DL_shadow, + + ccl_global differential3 *dI_sd, + ccl_global differential3 *dI_sd_DL_shadow, + + ccl_global differential *du_sd, + ccl_global differential *du_sd_DL_shadow, + + ccl_global differential *dv_sd, + ccl_global differential *dv_sd_DL_shadow, + + /* Dp/Du */ + ccl_global float3 *dPdu_sd, + ccl_global float3 *dPdu_sd_DL_shadow, + + ccl_global float3 *dPdv_sd, + ccl_global float3 *dPdv_sd_DL_shadow, + + /* Object motion. */ + ccl_global Transform *ob_tfm_sd, + ccl_global Transform *ob_tfm_sd_DL_shadow, + + ccl_global Transform *ob_itfm_sd, + ccl_global Transform *ob_itfm_sd_DL_shadow, + + ShaderClosure *closure_sd, + ShaderClosure *closure_sd_DL_shadow, + + ccl_global int *num_closure_sd, + ccl_global int *num_closure_sd_DL_shadow, + + ccl_global float *randb_closure_sd, + ccl_global float *randb_closure_sd_DL_shadow, + + ccl_global float3 *ray_P_sd, + ccl_global float3 *ray_P_sd_DL_shadow, + + ccl_global differential3 *ray_dP_sd, + ccl_global differential3 *ray_dP_sd_DL_shadow, + + ccl_constant KernelData *data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ + ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ + ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ + PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ + ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ + ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ + ccl_global char *ray_state, /* Stores information on current state of a ray */ + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ + unsigned int num_samples, /* Total number of samples per pixel */ +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_data_init(globals, + shader_data_sd, + shader_data_sd_DL_shadow, + P_sd, + P_sd_DL_shadow, + N_sd, + N_sd_DL_shadow, + Ng_sd, + Ng_sd_DL_shadow, + I_sd, + I_sd_DL_shadow, + shader_sd, + shader_sd_DL_shadow, + flag_sd, + flag_sd_DL_shadow, + prim_sd, + prim_sd_DL_shadow, + type_sd, + type_sd_DL_shadow, + u_sd, + u_sd_DL_shadow, + v_sd, + v_sd_DL_shadow, + object_sd, + object_sd_DL_shadow, + time_sd, + time_sd_DL_shadow, + ray_length_sd, + ray_length_sd_DL_shadow, + ray_depth_sd, + ray_depth_sd_DL_shadow, + transparent_depth_sd, + transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + dP_sd, + dP_sd_DL_shadow, + dI_sd, + dI_sd_DL_shadow, + du_sd, + du_sd_DL_shadow, + dv_sd, + dv_sd_DL_shadow, + + /* Dp/Du */ + dPdu_sd, + dPdu_sd_DL_shadow, + dPdv_sd, + dPdv_sd_DL_shadow, + + /* Object motion. */ + ob_tfm_sd, + ob_tfm_sd_DL_shadow, + ob_itfm_sd, + ob_itfm_sd_DL_shadow, + + closure_sd, + closure_sd_DL_shadow, + num_closure_sd, + num_closure_sd_DL_shadow, + randb_closure_sd, + randb_closure_sd_DL_shadow, + ray_P_sd, + ray_P_sd_DL_shadow, + ray_dP_sd, + ray_dP_sd_DL_shadow, + data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + ray_state, + +#define KERNEL_TEX(type, ttype, name) name, +#include "../../kernel_textures.h" + + start_sample, sx, sy, sw, sh, offset, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + work_array, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl new file mode 100644 index 00000000000..6ec75013b3a --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -0,0 +1,90 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_direct_lighting.h" + +__kernel void kernel_ocl_path_trace_direct_lighting( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for direct lighting */ + ccl_global char *shader_DL, /* Required for direct lighting */ + ccl_global uint *rng_coop, /* Required for direct lighting */ + ccl_global PathState *PathState_coop, /* Required for direct lighting */ + ccl_global int *ISLamp_coop, /* Required for direct lighting */ + ccl_global Ray *LightRay_coop, /* Required for direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = kernel_direct_lighting(globals, + data, + shader_data, + shader_DL, + rng_coop, + PathState_coop, + ISLamp_coop, + LightRay_coop, + BSDFEval_coop, + ray_state, + ray_index); + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + +#ifdef __EMISSION__ + /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +#endif +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl new file mode 100644 index 00000000000..ae5f5cd1b3b --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -0,0 +1,124 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" + +__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ + ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ + ccl_global float *L_transparent_coop, /* Required for handling holdout material */ + PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ + ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ + Intersection *Intersection_coop, /* Required for indirect primitive emission */ + ccl_global float3 *AOAlpha_coop, /* Required for AO */ + ccl_global float3 *AOBSDF_coop, /* Required for AO */ + ccl_global Ray *AOLightRay_coop, /* Required for AO */ + int sw, int sh, int sx, int sy, int stride, + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ +#ifdef __WORK_STEALING__ + unsigned int start_sample, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + ccl_local unsigned int local_queue_atomics_bg; + ccl_local unsigned int local_queue_atomics_ao; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics_bg = 0; + local_queue_atomics_ao = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + char enqueue_flag_AO_SHADOW_RAY_CAST = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif /* __COMPUTE_DEVICE_GPU__ */ + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + kernel_holdout_emission_blurring_pathtermination_ao( + globals, + data, + shader_data, + per_sample_output_buffers, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + PathState_coop, + Intersection_coop, + AOAlpha_coop, + AOBSDF_coop, + AOLightRay_coop, + sw, sh, sx, sy, stride, + ray_state, + work_array, +#ifdef __WORK_STEALING__ + start_sample, +#endif + parallel_samples, + ray_index, + &enqueue_flag, + &enqueue_flag_AO_SHADOW_RAY_CAST); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics_bg, + Queue_data, + Queue_index); + +#ifdef __AO__ + /* Enqueue to-shadow-ray-cast rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + enqueue_flag_AO_SHADOW_RAY_CAST, + queuesize, + &local_queue_atomics_ao, + Queue_data, + Queue_index); +#endif +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl new file mode 100644 index 00000000000..1bc7808d834 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -0,0 +1,84 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_lamp_emission.h" + +__kernel void kernel_ocl_path_trace_lamp_emission( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for lamp emission */ + ccl_global float3 *throughput_coop, /* Required for lamp emission */ + PathRadiance *PathRadiance_coop, /* Required for lamp emission */ + ccl_global Ray *Ray_coop, /* Required for lamp emission */ + ccl_global PathState *PathState_coop, /* Required for lamp emission */ + Intersection *Intersection_coop, /* Required for lamp emission */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* Used to decide if this kernel should use + * queues to fetch ray index + */ + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + int x = get_global_id(0); + int y = get_global_id(1); + + /* We will empty this queue in this kernel. */ + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } + /* Fetch use_queues_flag. */ + ccl_local char local_use_queues_flag; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_use_queues_flag = use_queues_flag[0]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index; + if(local_use_queues_flag) { + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 1); + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } else { + if(x < (sw * parallel_samples) && y < sh){ + ray_index = x + y * (sw * parallel_samples); + } else { + return; + } + } + + kernel_lamp_emission(globals, + data, + shader_data, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + use_queues_flag, + parallel_samples, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl new file mode 100644 index 00000000000..dcf4db40411 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -0,0 +1,115 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_next_iteration_setup.h" + +__kernel void kernel_ocl_path_trace_next_iteration_setup( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for setting up ray for next iteration */ + ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ + ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ + PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ + ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ + ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ + ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should + * use queues to fetch ray index */ +{ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + /* If we are here, then it means that scene-intersect kernel + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ + use_queues_flag[0] = 1; + + /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the + * previous kernel. + */ + Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } + + char enqueue_flag = 0; + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + enqueue_flag = kernel_next_iteration_setup(globals, + data, + shader_data, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + LightRay_dl_coop, + ISLamp_coop, + BSDFEval_coop, + LightRay_ao_coop, + AOBSDF_coop, + AOAlpha_coop, + ray_state, + use_queues_flag, + ray_index); +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl new file mode 100644 index 00000000000..3156dc255fb --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -0,0 +1,106 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" +#include "../../kernel_queues.h" + +/* + * The kernel "kernel_queue_enqueue" enqueues rays of + * different ray state into their appropriate Queues; + * 1. Rays that have been determined to hit the background from the + * "kernel_scene_intersect" kernel + * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. + * + * The input and output of the kernel is as follows, + * + * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | + * queuesize -------------------------------------------| | + * + * Note on Queues : + * State of queues during the first time this kernel is called : + * At entry, + * Both QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays + * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_HIT_BACKGROUND rays. + * + * State of queue during other times this kernel is called : + * At entry, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be empty. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will contain RAY_TO_REGENERATE and RAY_UPDATE_BUFFER rays. + * At exit, + * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE rays. + * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. + */ +__kernel void kernel_ocl_path_trace_queue_enqueue( + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int queuesize) /* Size (capacity) of each queue */ +{ + /* We have only 2 cases (Hit/Not-Hit) */ + ccl_local unsigned int local_queue_atomics[2]; + + int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0); + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + + if(lidx < 2 ) { + local_queue_atomics[lidx] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int queue_number = -1; + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + } + else if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; + } + + unsigned int my_lqidx; + if(queue_number != -1) { + my_lqidx = get_local_queue_index(queue_number, local_queue_atomics); + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(lidx == 0) { + local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = + get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, + local_queue_atomics, + Queue_index); + local_queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = + get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + local_queue_atomics, + Queue_index); + } + barrier(CLK_LOCAL_MEM_FENCE); + + unsigned int my_gqidx; + if(queue_number != -1) { + my_gqidx = get_global_queue_index(queue_number, + queuesize, + my_lqidx, + local_queue_atomics); + Queue_data[my_gqidx] = ray_index; + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl new file mode 100644 index 00000000000..e5fad7bce50 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -0,0 +1,82 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_scene_intersect.h" + +__kernel void kernel_ocl_path_trace_scene_intersect( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global uint *rng_coop, + ccl_global Ray *Ray_coop, /* Required for scene_intersect */ + ccl_global PathState *PathState_coop, /* Required for scene_intersect */ + Intersection *Intersection_coop, /* Required for scene_intersect */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use + * queues to fetch ray index */ +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + int x = get_global_id(0); + int y = get_global_id(1); + + /* Fetch use_queues_flag */ + ccl_local char local_use_queues_flag; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_use_queues_flag = use_queues_flag[0]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index; + if(local_use_queues_flag) { + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } else { + if(x < (sw * parallel_samples) && y < sh){ + ray_index = x + y * (sw * parallel_samples); + } else { + return; + } + } + + kernel_scene_intersect(globals, + data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + use_queues_flag, +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl new file mode 100644 index 00000000000..b9f616e6bdf --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -0,0 +1,69 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shader_eval.h" + +__kernel void kernel_ocl_path_trace_shader_eval( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Output ShaderData structure to be filled */ + ccl_global uint *rng_coop, /* Required for rbsdf calculation */ + ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ + ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ + Intersection *Intersection_coop, /* Required for setting up shader from ray */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ + ccl_local unsigned int local_queue_atomics; + if(get_local_id(0) == 0 && get_local_id(1) == 0) { + local_queue_atomics = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + ray_index = get_ray_index(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + Queue_data, + queuesize, + 0); + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + char enqueue_flag = (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + queuesize, + &local_queue_atomics, + Queue_data, + Queue_index); + + /* Continue on with shader evaluation. */ + kernel_shader_eval(globals, + data, + shader_data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl new file mode 100644 index 00000000000..03886c0a030 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shadow_blocked.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_shadow, /* Required for shadow blocked */ + ccl_global PathState *PathState_coop, /* Required for shadow blocked */ + ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ + ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ + Intersection *Intersection_coop_AO, + Intersection *Intersection_coop_DL, + ccl_global char *ray_state, + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int total_num_rays) +{ +#if 0 + /* We will make the Queue_index entries '0' in the next kernel. */ + if(get_global_id(0) == 0 && get_global_id(1) == 0) { + /* We empty this queue here */ + Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } +#endif + + int lidx = get_local_id(1) * get_local_id(0) + get_local_id(0); + + ccl_local unsigned int ao_queue_length; + ccl_local unsigned int dl_queue_length; + if(lidx == 0) { + ao_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + dl_queue_length = Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + /* flag determining if the current ray is to process shadow ray for AO or DL */ + char shadow_blocked_type = -1; + + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = get_global_id(1) * get_global_size(0) + get_global_id(0); + if(thread_index < ao_queue_length + dl_queue_length) { + if(thread_index < ao_queue_length) { + ray_index = get_ray_index(thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, Queue_data, queuesize, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; + } else { + ray_index = get_ray_index(thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, Queue_data, queuesize, 1); + shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; + } + } + + if(ray_index == QUEUE_EMPTY_SLOT) + return; + + kernel_shadow_blocked(globals, + data, + shader_shadow, + PathState_coop, + LightRay_dl_coop, + LightRay_ao_coop, + Intersection_coop_AO, + Intersection_coop_DL, + ray_state, + total_num_rays, + shadow_blocked_type, + ray_index); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl new file mode 100644 index 00000000000..88a1ed830af --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_sum_all_radiance.h" + +__kernel void kernel_ocl_path_trace_sum_all_radiance( + ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ + ccl_global float *buffer, /* Output buffer of RenderTile */ + ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ + int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, + int buffer_stride, + int start_sample) +{ + kernel_sum_all_radiance(data, + buffer, + per_sample_output_buffer, + parallel_samples, + sw, sh, stride, + buffer_offset_x, + buffer_offset_y, + buffer_stride, + start_sample); +} |