1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
/**
* Virtual shadowmapping: Schedule phase for tilemaps.
* This is the most complex part in the entire shadow pipeline.
* This step will read each updated tilemap to see if any tile is both visible and to be
* updated. If that is the case, it computes the bounds of the tiles to update and write it
* in a texture to be read back by the CPU. This is a sync step that is the main performance
* bottleneck of the pipeline.
*
* Unused tile might be reallocated at this stage.
*
* For each unallocated tile it will reserve a new page in the atlas. If the tile is to be
* rendered, it will also write the tile copy coordinates required in another buffer.
* This is also a slow part and should be improved in the future by moving the least amount of
* tiles.
*/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_page_ops_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared ivec2 min_tile;
shared ivec2 max_tile;
void main()
{
ShadowTileMapData tilemap_data = tilemaps_buf[gl_GlobalInvocationID.z];
int tilemap_idx = tilemap_data.index;
int lod_max = tilemap_data.is_cubeface ? SHADOW_TILEMAP_LOD : 0;
int lod_valid = 0;
uvec2 page_valid;
for (int lod = lod_max; lod >= 0; lod--) {
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy) >> lod;
int tile_index = (SHADOW_TILEMAP_RES / 2) * tile_co.y + tile_co.x;
uint stride = 1u << lod;
/* We load the same data for each thread covering the same LOD tile, but we avoid
* allocating the same tile twice. This is because we need uniform control flow for the
* barriers to be valid. */
bool valid_thread = (gl_GlobalInvocationID.xy % stride) == uvec2(0);
ivec2 texel = shadow_tile_coord_in_atlas(tile_co, tilemap_idx, lod);
ShadowTileData tile = shadow_tile_data_unpack(imageLoad(tilemaps_img, texel).x);
if (valid_thread) {
if (tile.is_visible && tile.is_used && !tile.is_allocated) {
shadow_page_free_buf_pop_last(tile);
imageStore(tilemaps_img, texel, uvec4(shadow_tile_data_pack(tile)));
}
}
barrier();
/* Save highest quality valid lod for this thread. */
if (tile.is_visible && tile.is_used && lod > 0) {
/* Reload the page in case there was an allocation in the valid thread. */
page_valid = shadow_tile_data_unpack(imageLoad(tilemaps_img, texel).x).page;
lod_valid = lod;
}
else if (lod == 0 && lod_valid != 0 && !tile.is_allocated) {
/* If the tile is not used, store the valid LOD level in LOD0. */
tile.page = page_valid;
tile.lod = lod_valid;
imageStore(tilemaps_img, texel, uvec4(shadow_tile_data_pack(tile)));
}
/** Compute area to render and write to buffer for CPU to read. */
{
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy);
if (gl_GlobalInvocationID.xy == uvec2(0)) {
min_tile = ivec2(SHADOW_TILEMAP_RES - 1);
max_tile = ivec2(0);
}
/* Makes initial value visible to other threads. */
barrier();
if (valid_thread && tile.do_update && tile.is_visible && tile.is_used) {
atomicMin(min_tile.x, tile_co.x);
atomicMin(min_tile.y, tile_co.y);
atomicMax(max_tile.x, tile_co.x);
atomicMax(max_tile.y, tile_co.y);
}
/* Makes final value visible to first threads. */
barrier();
if (gl_GlobalInvocationID.xy == uvec2(0)) {
max_tile += 1;
/* Must match the rcti structure. */
ivec4 out_data = ivec4(min_tile.x, max_tile.x, min_tile.y, max_tile.y);
imageStore(tilemap_rects_img, ivec2(lod, gl_GlobalInvocationID.z), out_data);
}
}
}
}
|