Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_work_stealing.h')
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h87
1 files changed, 11 insertions, 76 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index d1602744f1d..fab0915c38e 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -14,8 +14,7 @@
* limitations under the License.
*/
-#ifndef __KERNEL_WORK_STEALING_H__
-#define __KERNEL_WORK_STEALING_H__
+#pragma once
CCL_NAMESPACE_BEGIN
@@ -24,21 +23,24 @@ CCL_NAMESPACE_BEGIN
*/
/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
uint global_work_index,
ccl_private uint *x,
ccl_private uint *y,
ccl_private uint *sample)
{
-#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
+#if 0
+ /* Keep threads for the same sample together. */
uint tile_pixels = tile->w * tile->h;
uint sample_offset = global_work_index / tile_pixels;
uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
+#else
+ /* Keeping threads for the same pixel together.
+ * Appears to improve performance by a few % on CUDA and OptiX. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#endif
+
uint y_offset = pixel_offset / tile->w;
uint x_offset = pixel_offset - y_offset * tile->w;
@@ -47,71 +49,4 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
*sample = tile->start_sample + sample_offset;
}
-#ifdef __KERNEL_OPENCL__
-# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#endif
-
-#ifdef __SPLIT_KERNEL__
-/* Returns true if there is work */
-ccl_device bool get_next_work_item(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
-{
- /* With a small amount of work there may be more threads than work due to
- * rounding up of global size, stop such threads immediately. */
- if (ray_index >= total_work_size) {
- return false;
- }
-
- /* Increase atomic work index counter in pool. */
- uint pool = ray_index / WORK_POOL_SIZE;
- uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
-
- /* Map per-pool work index to a global work index. */
- uint global_size = ccl_global_size(0) * ccl_global_size(1);
- kernel_assert(global_size % WORK_POOL_SIZE == 0);
- kernel_assert(ray_index < global_size);
-
- *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
- (work_index % WORK_POOL_SIZE);
-
- /* Test if all work for this pool is done. */
- return (*global_work_index < total_work_size);
-}
-
-ccl_device bool get_next_work(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
-{
- bool got_work = false;
- if (kernel_data.film.pass_adaptive_aux_buffer) {
- do {
- got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
- if (got_work) {
- ccl_global WorkTile *tile = &kernel_split_params.tile;
- uint x, y, sample;
- get_work_pixel(tile, *global_work_index, &x, &y, &sample);
- uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
- ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
- ccl_global float4 *aux = (ccl_global float4 *)(buffer +
- kernel_data.film.pass_adaptive_aux_buffer);
- if ((*aux).w == 0.0f) {
- break;
- }
- }
- } while (got_work);
- }
- else {
- got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
- }
- return got_work;
-}
-#endif
-
CCL_NAMESPACE_END
-
-#endif /* __KERNEL_WORK_STEALING_H__ */