Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_work_stealing.h')
-rw-r--r--intern/cycles/kernel/kernel_work_stealing.h193
1 files changed, 193 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
new file mode 100644
index 00000000000..9b83d972e97
--- /dev/null
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_WORK_STEALING_H__
+#define __KERNEL_WORK_STEALING_H__
+
+/*
+ * Utility functions for work stealing
+ */
+
+#ifdef __WORK_STEALING__
+
+#ifdef __KERNEL_OPENCL__
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#endif
+
+uint get_group_id_with_ray_index(uint ray_index,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ int dim)
+{
+ if(dim == 0) {
+ uint x_span = ray_index % (tile_dim_x * parallel_samples);
+ return x_span / get_local_size(0);
+ }
+ else /*if(dim == 1)*/ {
+ kernel_assert(dim == 1);
+ uint y_span = ray_index / (tile_dim_x * parallel_samples);
+ return y_span / get_local_size(1);
+ }
+}
+
+uint get_total_work(uint tile_dim_x,
+ uint tile_dim_y,
+ uint grp_idx,
+ uint grp_idy,
+ uint num_samples)
+{
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return threads_within_tile_border_x *
+ threads_within_tile_border_y *
+ num_samples;
+}
+
+/* Returns 0 in case there is no next work available */
+/* Returns 1 in case work assigned is valid */
+int get_next_work(ccl_global uint *work_pool,
+ ccl_private uint *my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint num_samples,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint total_work = get_total_work(tile_dim_x,
+ tile_dim_y,
+ grp_idx,
+ grp_idy,
+ num_samples);
+ uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+ *my_work = atomic_inc(&work_pool[group_index]);
+ return (*my_work < total_work) ? 1 : 0;
+}
+
+/* This function assumes that the passed my_work is valid. */
+/* Decode sample number w.r.t. assigned my_work. */
+uint get_my_sample(uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return my_work /
+ (threads_within_tile_border_x * threads_within_tile_border_y);
+}
+
+/* Decode pixel and tile position w.r.t. assigned my_work. */
+void get_pixel_tile_position(ccl_private uint *pixel_x,
+ ccl_private uint *pixel_y,
+ ccl_private uint *tile_x,
+ ccl_private uint *tile_y,
+ uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint tile_offset_x,
+ uint tile_offset_y,
+ uint parallel_samples,
+ uint ray_index)
+{
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ uint total_associated_pixels =
+ threads_within_tile_border_x * threads_within_tile_border_y;
+ uint work_group_pixel_index = my_work % total_associated_pixels;
+ uint work_group_pixel_x =
+ work_group_pixel_index % threads_within_tile_border_x;
+ uint work_group_pixel_y =
+ work_group_pixel_index / threads_within_tile_border_x;
+
+ *pixel_x =
+ tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
+ *pixel_y =
+ tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
+ *tile_x = *pixel_x - tile_offset_x;
+ *tile_y = *pixel_y - tile_offset_y;
+}
+
+#endif /* __WORK_STEALING__ */
+
+#endif /* __KERNEL_WORK_STEALING_H__ */