diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-03-08 19:39:40 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-09 19:09:37 +0300 |
commit | 06c051363b509f7c3c40a803b87739fe0e2a8576 (patch) | |
tree | 83f6023d3927c98175082bf44f3f6623afc856b1 /intern/cycles/device/device_split_kernel.cpp | |
parent | e8b5a5bf5b63ef1c8980f8da95be32cad4d2cf0e (diff) |
Cycles: split kernel_shadow_blocked to AO & DL parts
Reduces memory allocation for split kernel.
This allows for faster rendering due to bigger global size,
specially when GPU memory is limited.
Perfromance results:
R9 290 total render time
Before After Change
BMW 4:37 4:34 -1.1 %
Classroom 14:43 14:30 -1.5 %
Fishy Cat 11:20 11:04 -2.4 %
Koro 12:11 12:04 -1.0 %
Pabellon Barcelona 22:01 20:44 -5.8 %
Pabellon Barcelona(*) 15:32 15:09 -2.5 %
(*) without glossy connected to volume
Diffstat (limited to 'intern/cycles/device/device_split_kernel.cpp')
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 6ab0b3c5777..10a642ed4d0 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -42,7 +42,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) kernel_holdout_emission_blurring_pathtermination_ao = NULL; kernel_subsurface_scatter = NULL; kernel_direct_lighting = NULL; - kernel_shadow_blocked = NULL; + kernel_shadow_blocked_ao = NULL; + kernel_shadow_blocked_dl = NULL; kernel_next_iteration_setup = NULL; kernel_indirect_subsurface = NULL; kernel_buffer_update = NULL; @@ -66,7 +67,8 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_holdout_emission_blurring_pathtermination_ao; delete kernel_subsurface_scatter; delete kernel_direct_lighting; - delete kernel_shadow_blocked; + delete kernel_shadow_blocked_ao; + delete kernel_shadow_blocked_dl; delete kernel_next_iteration_setup; delete kernel_indirect_subsurface; delete kernel_buffer_update; @@ -90,7 +92,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); LOAD_KERNEL(subsurface_scatter); LOAD_KERNEL(direct_lighting); - LOAD_KERNEL(shadow_blocked); + LOAD_KERNEL(shadow_blocked_ao); + LOAD_KERNEL(shadow_blocked_dl); LOAD_KERNEL(next_iteration_setup); LOAD_KERNEL(indirect_subsurface); LOAD_KERNEL(buffer_update); @@ -222,12 +225,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, bool activeRaysAvailable = true; while(activeRaysAvailable) { - /* Twice the global work size of other kernels for - * ckPathTraceKernel_shadow_blocked_direct_lighting. */ - size_t global_size_shadow_blocked[2]; - global_size_shadow_blocked[0] = global_size[0] * 2; - global_size_shadow_blocked[1] = global_size[1]; - /* Do path-iteration in host [Enqueue Path-iteration kernels. */ for(int PathIter = 0; PathIter < 16; PathIter++) { ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); @@ -239,7 +236,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size); ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size); + ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); |