blob: daf2016cd35681566792886689dbef9e13d768a2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
/**
* Sort the lights by their Z distance to the camera.
* Outputs ordered light buffer.
* One thread processes one Light entity.
*/
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
shared float zdists_cache[gl_WorkGroupSize.x];
void main()
{
uint src_index = gl_GlobalInvocationID.x;
bool valid_thread = true;
if (src_index >= light_cull_buf.visible_count) {
/* Do not return because we use barriers later on (which need uniform control flow).
* Just process the same last item but avoid insertion. */
src_index = light_cull_buf.visible_count - 1;
valid_thread = false;
}
float local_zdist = in_zdist_buf[src_index];
int prefix_sum = 0;
/* Iterate over the whole key buffer. */
uint iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x);
for (uint i = 0u; i < iter; i++) {
uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x;
/* NOTE: This will load duplicated values, but they will be discarded. */
index = min(index, light_cull_buf.visible_count - 1);
zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index];
barrier();
/* Iterate over the cache line. */
uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i);
for (uint j = 0u; j < line_end; j++) {
if (zdists_cache[j] < local_zdist) {
prefix_sum++;
}
else if (zdists_cache[j] == local_zdist) {
/* Same depth, use index to order and avoid same prefix for 2 different lights. */
if ((gl_WorkGroupSize.x * i + j) < src_index) {
prefix_sum++;
}
}
}
}
if (valid_thread) {
/* Copy sorted light to render light buffer. */
uint input_index = in_key_buf[src_index];
out_light_buf[prefix_sum] = in_light_buf[input_index];
}
}
|