diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_work_stealing.h')
-rw-r--r-- | intern/cycles/kernel/kernel_work_stealing.h | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h index 0c11158e8da..0c2d9379b63 100644 --- a/intern/cycles/kernel/kernel_work_stealing.h +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -27,29 +27,28 @@ CCL_NAMESPACE_BEGIN # pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #endif +#ifdef __SPLIT_KERNEL__ /* Returns true if there is work */ ccl_device bool get_next_work(KernelGlobals *kg, - uint thread_index, + ccl_global uint *work_pools, + uint total_work_size, + uint ray_index, ccl_private uint *global_work_index) { - uint total_work_size = kernel_split_params.w - * kernel_split_params.h - * kernel_split_params.num_samples; - /* With a small amount of work there may be more threads than work due to * rounding up of global size, stop such threads immediately. */ - if(thread_index >= total_work_size) { + if(ray_index >= total_work_size) { return false; } /* Increase atomic work index counter in pool. */ - uint pool = thread_index / WORK_POOL_SIZE; - uint work_index = atomic_fetch_and_inc_uint32(&kernel_split_params.work_pools[pool]); + uint pool = ray_index / WORK_POOL_SIZE; + uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]); /* Map per-pool work index to a global work index. */ uint global_size = ccl_global_size(0) * ccl_global_size(1); kernel_assert(global_size % WORK_POOL_SIZE == 0); - kernel_assert(thread_index < global_size); + kernel_assert(ray_index < global_size); *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) @@ -58,23 +57,24 @@ ccl_device bool get_next_work(KernelGlobals *kg, /* Test if all work for this pool is done. */ return (*global_work_index < total_work_size); } +#endif -/* Map global work index to pixel X/Y and sample. */ -ccl_device_inline void get_work_pixel(KernelGlobals *kg, +/* Map global work index to tile, pixel X/Y and sample. */ +ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, uint global_work_index, ccl_private uint *x, ccl_private uint *y, ccl_private uint *sample) { - uint tile_pixels = kernel_split_params.w * kernel_split_params.h; + uint tile_pixels = tile->w * tile->h; uint sample_offset = global_work_index / tile_pixels; uint pixel_offset = global_work_index - sample_offset * tile_pixels; - uint y_offset = pixel_offset / kernel_split_params.w; - uint x_offset = pixel_offset - y_offset * kernel_split_params.w; + uint y_offset = pixel_offset / tile->w; + uint x_offset = pixel_offset - y_offset * tile->w; - *x = kernel_split_params.x + x_offset; - *y = kernel_split_params.y + y_offset; - *sample = kernel_split_params.start_sample + sample_offset; + *x = tile->x + x_offset; + *y = tile->y + y_offset; + *sample = tile->start_sample + sample_offset; } CCL_NAMESPACE_END |