Welcome to mirror list, hosted at ThFree Co, Russian Federation.

parallel_sorted_index.h « gpu « device « kernel « cycles « intern - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 99e7ee25e0533f9e9831ce028c3b8c0086bbef90 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* SPDX-License-Identifier: Apache-2.0
 * Copyright 2021-2022 Blender Foundation */

#pragma once

CCL_NAMESPACE_BEGIN

/* Given an array of states, build an array of indices for which the states
 * are active and sorted by a given key. The prefix sum of the number of active
 * states per key must have already been computed.
 *
 * TODO: there may be ways to optimize this to avoid this many atomic ops? */

#include "util/atomic.h"

#ifdef __HIP__
#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
#else
#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
#endif
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)

template<typename GetKeyOp>
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
                                                const uint num_states,
                                                const int num_states_limit,
                                                ccl_global int *indices,
                                                ccl_global int *num_indices,
                                                ccl_global int *key_counter,
                                                ccl_global int *key_prefix_sum,
                                                GetKeyOp get_key_op)
{
  const int key = (state_index < num_states) ? get_key_op(state_index) :
                                               GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;

  if (key != GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY) {
    const uint index = atomic_fetch_and_add_uint32(&key_prefix_sum[key], 1);
    if (index < num_states_limit) {
      /* Assign state index. */
      indices[index] = state_index;
    }
    else {
      /* Can't process this state now, increase the counter again so that
       * it will be handled in another iteration. */
      atomic_fetch_and_add_uint32(&key_counter[key], 1);
    }
  }
}

CCL_NAMESPACE_END