Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_work_stealing.h')
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h195
1 files changed, 42 insertions, 153 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 7d559b1aa31..0c2d9379b63 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -17,177 +17,66 @@
#ifndef __KERNEL_WORK_STEALING_H__
#define __KERNEL_WORK_STEALING_H__
+CCL_NAMESPACE_BEGIN
+
/*
* Utility functions for work stealing
*/
-#ifdef __WORK_STEALING__
-
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
-uint get_group_id_with_ray_index(uint ray_index,
- uint tile_dim_x,
- uint tile_dim_y,
- uint parallel_samples,
- int dim)
+#ifdef __SPLIT_KERNEL__
+/* Returns true if there is work */
+ccl_device bool get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
- if(dim == 0) {
- uint x_span = ray_index % (tile_dim_x * parallel_samples);
- return x_span / get_local_size(0);
+ /* With a small amount of work there may be more threads than work due to
+ * rounding up of global size, stop such threads immediately. */
+ if(ray_index >= total_work_size) {
+ return false;
}
- else /*if(dim == 1)*/ {
- kernel_assert(dim == 1);
- uint y_span = ray_index / (tile_dim_x * parallel_samples);
- return y_span / get_local_size(1);
- }
-}
-
-uint get_total_work(uint tile_dim_x,
- uint tile_dim_y,
- uint grp_idx,
- uint grp_idy,
- uint num_samples)
-{
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
-
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
-
- return threads_within_tile_border_x *
- threads_within_tile_border_y *
- num_samples;
-}
-/* Returns 0 in case there is no next work available */
-/* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global uint *work_pool,
- ccl_private uint *my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint num_samples,
- uint parallel_samples,
- uint ray_index)
-{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint total_work = get_total_work(tile_dim_x,
- tile_dim_y,
- grp_idx,
- grp_idy,
- num_samples);
- uint group_index = grp_idy * get_num_groups(0) + grp_idx;
- *my_work = atomic_inc(&work_pool[group_index]);
- return (*my_work < total_work) ? 1 : 0;
-}
+ /* Increase atomic work index counter in pool. */
+ uint pool = ray_index / WORK_POOL_SIZE;
+ uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
-/* This function assumes that the passed my_work is valid. */
-/* Decode sample number w.r.t. assigned my_work. */
-uint get_my_sample(uint my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint parallel_samples,
- uint ray_index)
-{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
+ /* Map per-pool work index to a global work index. */
+ uint global_size = ccl_global_size(0) * ccl_global_size(1);
+ kernel_assert(global_size % WORK_POOL_SIZE == 0);
+ kernel_assert(ray_index < global_size);
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
+ *global_work_index = (work_index / WORK_POOL_SIZE) * global_size
+ + (pool * WORK_POOL_SIZE)
+ + (work_index % WORK_POOL_SIZE);
- return my_work /
- (threads_within_tile_border_x * threads_within_tile_border_y);
+ /* Test if all work for this pool is done. */
+ return (*global_work_index < total_work_size);
}
+#endif
-/* Decode pixel and tile position w.r.t. assigned my_work. */
-void get_pixel_tile_position(ccl_private uint *pixel_x,
- ccl_private uint *pixel_y,
- ccl_private uint *tile_x,
- ccl_private uint *tile_y,
- uint my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint tile_offset_x,
- uint tile_offset_y,
- uint parallel_samples,
- uint ray_index)
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ uint global_work_index,
+ ccl_private uint *x,
+ ccl_private uint *y,
+ ccl_private uint *sample)
{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
-
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
-
- uint total_associated_pixels =
- threads_within_tile_border_x * threads_within_tile_border_y;
- uint work_group_pixel_index = my_work % total_associated_pixels;
- uint work_group_pixel_x =
- work_group_pixel_index % threads_within_tile_border_x;
- uint work_group_pixel_y =
- work_group_pixel_index / threads_within_tile_border_x;
-
- *pixel_x =
- tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
- *pixel_y =
- tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
- *tile_x = *pixel_x - tile_offset_x;
- *tile_y = *pixel_y - tile_offset_y;
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
+
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
}
-#endif /* __WORK_STEALING__ */
+CCL_NAMESPACE_END
#endif /* __KERNEL_WORK_STEALING_H__ */