diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-03-08 19:39:40 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-09 19:09:37 +0300 |
commit | 06c051363b509f7c3c40a803b87739fe0e2a8576 (patch) | |
tree | 83f6023d3927c98175082bf44f3f6623afc856b1 /intern/cycles/kernel/split | |
parent | e8b5a5bf5b63ef1c8980f8da95be32cad4d2cf0e (diff) |
Cycles: split kernel_shadow_blocked to AO & DL parts
Reduces memory allocation for split kernel.
This allows for faster rendering due to bigger global size,
specially when GPU memory is limited.
Perfromance results:
R9 290 total render time
Before After Change
BMW 4:37 4:34 -1.1 %
Classroom 14:43 14:30 -1.5 %
Fishy Cat 11:20 11:04 -2.4 %
Koro 12:11 12:04 -1.0 %
Pabellon Barcelona 22:01 20:44 -5.8 %
Pabellon Barcelona(*) 15:32 15:09 -2.5 %
(*) without glossy connected to volume
Diffstat (limited to 'intern/cycles/kernel/split')
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked_ao.h (renamed from intern/cycles/kernel/split/kernel_shadow_blocked.h) | 42 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_shadow_blocked_dl.h | 91 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_split_data_types.h | 2 |
3 files changed, 102 insertions, 33 deletions
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index 52f7002acb3..e153c16bd68 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -36,42 +36,28 @@ CCL_NAMESPACE_BEGIN * * Note on sd_shadow : sd_shadow is neither input nor output to this kernel. sd_shadow is filled and consumed in this kernel itself. * Note on queues : - * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS queues. We will empty - * these queues this kernel. + * The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS queue. We will empty this queues in this kernel. * State of queues when this kernel is called : * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same * before and after this kernel call. - * QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO - * and RAY_SHADOW_RAY_CAST_DL respectively, during kernel entry. - * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit. + * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_AO during kernel entry. + * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be empty at kernel exit. */ -ccl_device void kernel_shadow_blocked(KernelGlobals *kg) +ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) { int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0); ccl_local unsigned int ao_queue_length; - ccl_local unsigned int dl_queue_length; if(lidx == 0) { ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; } ccl_barrier(CCL_LOCAL_MEM_FENCE); - /* flag determining if the current ray is to process shadow ray for AO or DL */ - char shadow_blocked_type = -1; - int ray_index = QUEUE_EMPTY_SLOT; int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < ao_queue_length + dl_queue_length) { - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_AO; - } else { - ray_index = get_ray_index(kg, thread_index - ao_queue_length, QUEUE_SHADOW_RAY_CAST_DL_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - shadow_blocked_type = RAY_SHADOW_RAY_CAST_DL; - } + if(thread_index < ao_queue_length) { + ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, 1); } if(ray_index == QUEUE_EMPTY_SLOT) @@ -80,22 +66,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg) /* Flag determining if we need to update L. */ char update_path_radiance = 0; - if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) || - IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) - { + if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ccl_global Ray *light_ray_dl_global = &kernel_split_state.light_ray[ray_index]; - ccl_global Ray *light_ray_ao_global = &kernel_split_state.ao_light_ray[ray_index]; - - ccl_global Ray *light_ray_global = - shadow_blocked_type == RAY_SHADOW_RAY_CAST_AO - ? light_ray_ao_global - : light_ray_dl_global; + ccl_global Ray *light_ray_global = &kernel_split_state.ao_light_ray[ray_index]; float3 shadow; Ray ray = *light_ray_global; update_path_radiance = !(shadow_blocked(kg, - &kernel_split_state.sd_DL_shadow[thread_index], + &kernel_split_state.sd_DL_shadow[ray_index], state, &ray, &shadow)); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h new file mode 100644 index 00000000000..cfd8d78c2de --- /dev/null +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -0,0 +1,91 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Note on kernel_shadow_blocked kernel. + * This is the ninth kernel in the ray tracing logic. This is the eighth + * of the path iteration kernels. This kernel takes care of "shadow ray cast" + * logic of the direct lighting and AO part of ray tracing. + * + * The input and output are as follows, + * + * PathState_coop ----------------------------------|--- kernel_shadow_blocked --| + * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop + * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop + * ray_state ---------------------------------------| |--- ray_state + * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS) + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * Queue_index(QUEUE_SHADOW_RAY_CAST_AO_RAYS& + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * kg (globals) ------------------------------------| | + * queuesize ---------------------------------------| | + * + * Note on sd_shadow : sd_shadow is neither input nor output to this kernel. sd_shadow is filled and consumed in this kernel itself. + * Note on queues : + * The kernel fetches from QUEUE_SHADOW_RAY_CAST_DL_RAYS queue. We will empty this queue in this kernel. + * State of queues when this kernel is called : + * state of queues QUEUE_ACTIVE_AND_REGENERATED_RAYS and QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be same + * before and after this kernel call. + * QUEUE_SHADOW_RAY_CAST_DL_RAYS will be filled with rays marked with flags RAY_SHADOW_RAY_CAST_DL, during kernel entry. + * QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit. + */ +ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) +{ + int lidx = ccl_local_id(1) * ccl_local_id(0) + ccl_local_id(0); + + ccl_local unsigned int dl_queue_length; + if(lidx == 0) { + dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(thread_index < dl_queue_length) { + ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS, + kernel_split_state.queue_data, kernel_split_params.queue_size, 1); + } + + if(ray_index == QUEUE_EMPTY_SLOT) + return; + + /* Flag determining if we need to update L. */ + char update_path_radiance = 0; + + if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global Ray *light_ray_global = &kernel_split_state.light_ray[ray_index]; + + float3 shadow; + Ray ray = *light_ray_global; + update_path_radiance = !(shadow_blocked(kg, + &kernel_split_state.sd_DL_shadow[ray_index], + state, + &ray, + &shadow)); + + *light_ray_global = ray; + /* We use light_ray_global's P and t to store shadow and + * update_path_radiance. + */ + light_ray_global->P = shadow; + light_ray_global->t = update_path_radiance; + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index b39ed4995dc..365d78c9f99 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -78,7 +78,7 @@ typedef struct SplitParams { SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 2) \ + SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \ SPLIT_DATA_DEBUG_ENTRIES \ /* struct that holds pointers to data in the shared state buffer */ |