diff options
Diffstat (limited to 'intern/cycles/kernel/device/gpu/parallel_prefix_sum.h')
-rw-r--r-- | intern/cycles/kernel/device/gpu/parallel_prefix_sum.h | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h new file mode 100644 index 00000000000..f609520b8b4 --- /dev/null +++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Parallel prefix sum. + * + * TODO: actually make this work in parallel. + * + * This is used for an array the size of the number of shaders in the scene + * which is not usually huge, so might not be a significant bottleneck. */ + +#include "util/util_atomic.h" + +#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512 + +template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values) +{ + if (!(ccl_gpu_block_idx_x == 0 && ccl_gpu_thread_idx_x == 0)) { + return; + } + + int offset = 0; + for (int i = 0; i < num_values; i++) { + const int new_offset = offset + values[i]; + values[i] = offset; + offset = new_offset; + } +} + +CCL_NAMESPACE_END |