Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/device/gpu/parallel_sorted_index.h')
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_sorted_index.h26
1 files changed, 18 insertions, 8 deletions
diff --git a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
index 9bca1fad22f..c092e2a21ee 100644
--- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
*
* TODO: there may be ways to optimize this to avoid this many atomic ops? */
-#include "util/util_atomic.h"
+#include "util/atomic.h"
#ifdef __HIP__
# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
@@ -33,20 +33,30 @@ CCL_NAMESPACE_BEGIN
#endif
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
-template<uint blocksize, typename GetKeyOp>
-__device__ void gpu_parallel_sorted_index_array(const uint num_states,
- int *indices,
- int *num_indices,
- int *key_prefix_sum,
+template<typename GetKeyOp>
+__device__ void gpu_parallel_sorted_index_array(const uint state_index,
+ const uint num_states,
+ const int num_states_limit,
+ ccl_global int *indices,
+ ccl_global int *num_indices,
+ ccl_global int *key_counter,
+ ccl_global int *key_prefix_sum,
GetKeyOp get_key_op)
{
- const uint state_index = ccl_gpu_block_idx_x * blocksize + ccl_gpu_thread_idx_x;
const int key = (state_index < num_states) ? get_key_op(state_index) :
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
if (key != GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY) {
const uint index = atomic_fetch_and_add_uint32(&key_prefix_sum[key], 1);
- indices[index] = state_index;
+ if (index < num_states_limit) {
+ /* Assign state index. */
+ indices[index] = state_index;
+ }
+ else {
+ /* Can't process this state now, increase the counter again so that
+ * it will be handled in another iteration. */
+ atomic_fetch_and_add_uint32(&key_counter[key], 1);
+ }
}
}