diff options
Diffstat (limited to 'intern/cycles/kernel/device/gpu/parallel_prefix_sum.h')
-rw-r--r-- | intern/cycles/kernel/device/gpu/parallel_prefix_sum.h | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h index a1349e82efb..4bd002c27e4 100644 --- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h +++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN * This is used for an array the size of the number of shaders in the scene * which is not usually huge, so might not be a significant bottleneck. */ -#include "util/util_atomic.h" +#include "util/atomic.h" #ifdef __HIP__ # define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024 @@ -33,16 +33,20 @@ CCL_NAMESPACE_BEGIN # define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512 #endif -template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values) +__device__ void gpu_parallel_prefix_sum(const int global_id, + ccl_global int *counter, + ccl_global int *prefix_sum, + const int num_values) { - if (!(ccl_gpu_block_idx_x == 0 && ccl_gpu_thread_idx_x == 0)) { + if (global_id != 0) { return; } int offset = 0; for (int i = 0; i < num_values; i++) { - const int new_offset = offset + values[i]; - values[i] = offset; + const int new_offset = offset + counter[i]; + prefix_sum[i] = offset; + counter[i] = 0; offset = new_offset; } } |