Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Eisel <eiseljulian@gmail.com>2017-04-04 22:39:57 +0300
committerJulian Eisel <eiseljulian@gmail.com>2017-04-04 22:39:57 +0300
commit7576ad3d043ac5d15e0c5a68e65339904441b5e7 (patch)
treebb990cce1eec04d45ab57e8a42af2669f9d7522f /intern/cycles/kernel/kernel_work_stealing.h
parent10b24eabbab0193f6944cdf3bec7b386c75d5445 (diff)
parentdb0f67f46454fd0bfeb886d3e61227b65fbc6ac1 (diff)
Merge branch 'blender2.8' into transform-manipulatorstransform-manipulators
Conflicts: intern/gawain/gawain/immediate.h intern/gawain/src/immediate.c source/blender/editors/physics/physics_ops.c source/blender/editors/screen/glutil.c source/blender/editors/space_view3d/space_view3d.c source/blender/editors/space_view3d/view3d_draw.c source/blender/editors/space_view3d/view3d_edit.c source/blender/editors/space_view3d/view3d_ops.c source/blender/editors/transform/transform_manipulator.c
Diffstat (limited to 'intern/cycles/kernel/kernel_work_stealing.h')
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h211
1 files changed, 68 insertions, 143 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 7d559b1aa31..28fc5ce1c30 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -17,177 +17,102 @@
#ifndef __KERNEL_WORK_STEALING_H__
#define __KERNEL_WORK_STEALING_H__
+CCL_NAMESPACE_BEGIN
+
/*
* Utility functions for work stealing
*/
-#ifdef __WORK_STEALING__
-
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
-uint get_group_id_with_ray_index(uint ray_index,
- uint tile_dim_x,
- uint tile_dim_y,
- uint parallel_samples,
- int dim)
+ccl_device_inline uint kernel_total_work_size(KernelGlobals *kg)
+{
+ return kernel_split_params.w * kernel_split_params.h * kernel_split_params.num_samples;
+}
+
+ccl_device_inline uint kernel_num_work_pools(KernelGlobals *kg)
+{
+ return ccl_global_size(0) * ccl_global_size(1) / WORK_POOL_SIZE;
+}
+
+ccl_device_inline uint work_pool_from_ray_index(KernelGlobals *kg, uint ray_index)
+{
+ return ray_index / WORK_POOL_SIZE;
+}
+
+ccl_device_inline uint work_pool_work_size(KernelGlobals *kg, uint work_pool)
{
- if(dim == 0) {
- uint x_span = ray_index % (tile_dim_x * parallel_samples);
- return x_span / get_local_size(0);
+ uint total_work_size = kernel_total_work_size(kg);
+ uint num_pools = kernel_num_work_pools(kg);
+
+ if(work_pool >= num_pools || work_pool * WORK_POOL_SIZE >= total_work_size) {
+ return 0;
+ }
+
+ uint work_size = (total_work_size / (num_pools * WORK_POOL_SIZE)) * WORK_POOL_SIZE;
+
+ uint remainder = (total_work_size % (num_pools * WORK_POOL_SIZE));
+ if(work_pool < remainder / WORK_POOL_SIZE) {
+ work_size += WORK_POOL_SIZE;
}
- else /*if(dim == 1)*/ {
- kernel_assert(dim == 1);
- uint y_span = ray_index / (tile_dim_x * parallel_samples);
- return y_span / get_local_size(1);
+ else if(work_pool == remainder / WORK_POOL_SIZE) {
+ work_size += remainder % WORK_POOL_SIZE;
}
+
+ return work_size;
}
-uint get_total_work(uint tile_dim_x,
- uint tile_dim_y,
- uint grp_idx,
- uint grp_idy,
- uint num_samples)
+ccl_device_inline uint get_global_work_index(KernelGlobals *kg, uint work_index, uint ray_index)
{
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
-
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
-
- return threads_within_tile_border_x *
- threads_within_tile_border_y *
- num_samples;
+ uint num_pools = kernel_num_work_pools(kg);
+ uint pool = work_pool_from_ray_index(kg, ray_index);
+
+ return (work_index / WORK_POOL_SIZE) * (num_pools * WORK_POOL_SIZE)
+ + (pool * WORK_POOL_SIZE)
+ + (work_index % WORK_POOL_SIZE);
}
-/* Returns 0 in case there is no next work available */
-/* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global uint *work_pool,
- ccl_private uint *my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint num_samples,
- uint parallel_samples,
- uint ray_index)
+/* Returns true if there is work */
+ccl_device bool get_next_work(KernelGlobals *kg, ccl_private uint *work_index, uint ray_index)
{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint total_work = get_total_work(tile_dim_x,
- tile_dim_y,
- grp_idx,
- grp_idy,
- num_samples);
- uint group_index = grp_idy * get_num_groups(0) + grp_idx;
- *my_work = atomic_inc(&work_pool[group_index]);
- return (*my_work < total_work) ? 1 : 0;
+ uint work_pool = work_pool_from_ray_index(kg, ray_index);
+ uint pool_size = work_pool_work_size(kg, work_pool);
+
+ if(pool_size == 0) {
+ return false;
+ }
+
+ *work_index = atomic_fetch_and_inc_uint32(&kernel_split_params.work_pools[work_pool]);
+ return (*work_index < pool_size);
}
-/* This function assumes that the passed my_work is valid. */
-/* Decode sample number w.r.t. assigned my_work. */
-uint get_my_sample(uint my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint parallel_samples,
- uint ray_index)
+/* This function assumes that the passed `work` is valid. */
+/* Decode sample number w.r.t. assigned `work`. */
+ccl_device uint get_work_sample(KernelGlobals *kg, uint work_index, uint ray_index)
{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
-
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
-
- return my_work /
- (threads_within_tile_border_x * threads_within_tile_border_y);
+ return get_global_work_index(kg, work_index, ray_index) / (kernel_split_params.w * kernel_split_params.h);
}
-/* Decode pixel and tile position w.r.t. assigned my_work. */
-void get_pixel_tile_position(ccl_private uint *pixel_x,
+/* Decode pixel and tile position w.r.t. assigned `work`. */
+ccl_device void get_work_pixel_tile_position(KernelGlobals *kg,
+ ccl_private uint *pixel_x,
ccl_private uint *pixel_y,
ccl_private uint *tile_x,
ccl_private uint *tile_y,
- uint my_work,
- uint tile_dim_x,
- uint tile_dim_y,
- uint tile_offset_x,
- uint tile_offset_y,
- uint parallel_samples,
+ uint work_index,
uint ray_index)
{
- uint grp_idx = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 0);
- uint grp_idy = get_group_id_with_ray_index(ray_index,
- tile_dim_x,
- tile_dim_y,
- parallel_samples,
- 1);
- uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
- uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
-
- threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
- : threads_within_tile_border_x;
- threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
- : threads_within_tile_border_y;
-
- uint total_associated_pixels =
- threads_within_tile_border_x * threads_within_tile_border_y;
- uint work_group_pixel_index = my_work % total_associated_pixels;
- uint work_group_pixel_x =
- work_group_pixel_index % threads_within_tile_border_x;
- uint work_group_pixel_y =
- work_group_pixel_index / threads_within_tile_border_x;
-
- *pixel_x =
- tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
- *pixel_y =
- tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
- *tile_x = *pixel_x - tile_offset_x;
- *tile_y = *pixel_y - tile_offset_y;
+ uint pixel_index = get_global_work_index(kg, work_index, ray_index) % (kernel_split_params.w*kernel_split_params.h);
+
+ *tile_x = pixel_index % kernel_split_params.w;
+ *tile_y = pixel_index / kernel_split_params.w;
+
+ *pixel_x = *tile_x + kernel_split_params.x;
+ *pixel_y = *tile_y + kernel_split_params.y;
}
-#endif /* __WORK_STEALING__ */
+CCL_NAMESPACE_END
#endif /* __KERNEL_WORK_STEALING_H__ */