From 1b00ca35758dacf7ece7b95275ea3c41e53bec6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Tue, 23 Nov 2021 21:24:00 +0100 Subject: EEVEE: Light: Port light culling to GPU This removes the light count limit for the forward shaded object. This also provides a more efficient way of computing the culling directly on the GPU. Moreover, this avoids doing multiple lighting passes for high light counts in the deferred pipeline, improving performance. --- .../eevee/shaders/eevee_culling_debug_frag.glsl | 31 +++-- .../eevee/shaders/eevee_culling_iter_lib.glsl | 61 ++++----- .../engines/eevee/shaders/eevee_culling_lib.glsl | 32 +++-- .../eevee/shaders/eevee_culling_light_frag.glsl | 51 -------- .../eevee/shaders/eevee_culling_select_comp.glsl | 57 +++++++++ .../eevee/shaders/eevee_culling_sort_comp.glsl | 138 +++++++++++++++++++++ .../eevee/shaders/eevee_culling_tile_comp.glsl | 73 +++++++++++ .../eevee/shaders/eevee_deferred_direct_frag.glsl | 21 +++- .../eevee/shaders/eevee_deferred_volume_frag.glsl | 21 +++- .../eevee/shaders/eevee_light_eval_lib.glsl | 7 +- .../eevee/shaders/eevee_surface_forward_frag.glsl | 21 +++- 11 files changed, 383 insertions(+), 130 deletions(-) delete mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl (limited to 'source/blender/draw/engines/eevee/shaders') diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl index f559788145d..33734324445 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl @@ -2,25 +2,34 @@ /** * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots. * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling - * pass is not conservative enough). This shader will only work on the last light batch so remove - * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights. + * pass is not conservative enough). */ #pragma BLENDER_REQUIRE(common_view_lib.glsl) #pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) #pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf { - CullingData culling; + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf +{ + CullingData light_culling; +}; + +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; }; -uniform usampler2D item_culling_tx; uniform sampler2D depth_tx; in vec4 uvcoordsvar; @@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color; void main(void) { - float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r; + float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r; float vP_z = get_view_z_from_depth(depth); vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth); float lights_count = 0.0; uint lights_cull = 0u; - ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) { + ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) { LightData light = lights[l_idx]; lights_cull |= 1u << l_idx; lights_count += 1.0; @@ -44,7 +53,7 @@ void main(void) ITEM_FOREACH_END uint lights_nocull = 0u; - ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) { + ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) { LightData light = lights[l_idx]; if (distance(light._position, P) < light.influence_radius_max) { lights_nocull |= 1u << l_idx; @@ -57,6 +66,6 @@ void main(void) out_debug_color = vec4(0.0, 1.0, 0.0, 1.0); } else { - out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0); + out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0); } } \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl index a0ea075db22..640ffb4a6a1 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl @@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min) return ~mask << bit_min; } -uint zbin_mask(int word_index, int zbin_min, int zbin_max) +uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max) { - int local_min = clamp(zbin_min - word_index * 32, 0, 31); - int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32); - return bit_field_mask(uint(mask_width), uint(local_min)); + uint word_start = word_index * 32u; + uint word_end = word_start + 31u; + uint local_min = max(zbin_min, word_start); + uint local_max = min(zbin_max, word_end); + uint mask_width = local_max - local_min + 1; + return bit_field_mask(mask_width, local_min); } /* Waiting to implement extensions support. We need: @@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max) # define subgroupBroadcastFirst(a) a #endif -#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \ +#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \ { \ - int zbin_index = culling_z_to_zbin(_culling, _linearz); \ - zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \ - uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \ - int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \ - int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \ - /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ - min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ - max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ - int word_min = 0; \ - int word_max = max(0, CULLING_MAX_WORD - 1); \ - word_min = max(min_index / 32, word_min); \ - word_max = min(max_index / 32, word_max); \ - for (int word_index = word_min; word_index <= word_max; word_index++) { \ - /* TODO(fclem) Support bigger max_word with larger texture. */ \ - ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \ - uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \ - uint mask = zbin_mask(word_index, min_index, max_index); \ - word &= mask; \ + uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \ + uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \ + uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \ + _culling.tile_word_len; \ + for (uint batch = 0; batch < batch_count; batch++) { \ + int zbin_index = culling_z_to_zbin(_culling, _linearz); \ + zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \ + uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \ + uint min_index = zbin_data & 0xFFFFu; \ + uint max_index = zbin_data >> 16u; \ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ - word = subgroupBroadcastFirst(subgroupOr(word)); \ - /* TODO(fclem) Replace by findLSB on supported hardware. */ \ - for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \ - if ((word & 1u) != 0u) { \ - int _item_index = word_index * 32 + int(i); + min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ + max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ + uint word_min = min_index / 32u; \ + uint word_max = max_index / 32u; \ + for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \ + uint word = _words[tile_word_offset + word_idx]; \ + word &= zbin_mask(word_idx, min_index, max_index); \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + word = subgroupBroadcastFirst(subgroupOr(word)); \ + while (word != 0u) { \ + uint bit_index = uint(findLSB(word)); \ + word &= ~1u << bit_index; \ + uint _item_index = word_idx * 32u + bit_index; /* No culling. Iterate over all items. */ #define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \ { \ { \ { \ - for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) { + for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) { #define ITEM_FOREACH_END \ } \ diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl index f128b89e864..27a39817140 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl @@ -7,11 +7,6 @@ /** \name Intersection Tests * \{ */ -struct Sphere { - vec3 position; - float radius; -}; - struct Cone { vec3 direction; float angle_cos; @@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone) * by Eric Zhang * https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html */ - float sphere_distance = length(sphere.position); + float sphere_distance = length(sphere.center); float sphere_sin = saturate(sphere.radius / sphere_distance); float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin); float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos); - float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction); + float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction); /* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */ float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ? -1.0 : @@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone) bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder) { - float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy); + float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy); return (distance_squared < sqr(cylinder.radius + sphere.radius)); } bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum) { - if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) { return false; } return true; @@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum) bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile) { /* Culling in view space for precision and simplicity. */ - sphere.position = transform_point(ViewMatrix, sphere.position); + sphere.center = transform_point(ViewMatrix, sphere.center); bool isect; /* Test tile intersection using bounding cone or bounding cylinder. * This has less false positive cases when the sphere is large. */ @@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset) return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0; } -CullingTile culling_tile_get(CullingData culling) +CullingTile culling_tile_get(CullingData culling, uvec2 tile_co) { + vec2 ftile = vec2(tile_co); /* Culling frustum corners for this tile. */ vec3 corners[8]; - corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5)); - corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5)); - corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5)); - corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5)); + corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1)); + corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0)); + corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0)); + corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1)); /* The corners depth only matter for precision. Use a mix of not so close to clip plane to * avoid small float imprecision if near clip is low. */ corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl deleted file mode 100644 index c81a94b35f3..00000000000 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl +++ /dev/null @@ -1,51 +0,0 @@ - -/** - * 2D Culling pass for lights. - * We iterate over all items and check if they intersect with the tile frustum. - */ - -#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl) -#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) - -layout(std140) uniform lights_block -{ - LightData lights[CULLING_ITEM_BATCH]; -}; - -layout(std140) uniform lights_culling_block -{ - CullingData culling; -}; - -in vec4 uvcoordsvar; - -layout(location = 0) out uvec4 out_items_bits; - -void main(void) -{ - CullingTile tile = culling_tile_get(culling); - - out_items_bits = uvec4(0); - ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) { - LightData light = lights[l_idx]; - - bool intersect_tile = true; - switch (light.type) { - case LIGHT_SPOT: - /* TODO cone culling. */ - case LIGHT_RECT: - case LIGHT_ELLIPSE: - case LIGHT_POINT: - Sphere sphere = Sphere(light._position, light.influence_radius_max); - intersect_tile = culling_sphere_tile_isect(sphere, tile); - break; - default: - break; - } - - if (intersect_tile) { - out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u); - } - } - ITEM_FOREACH_END -} \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl new file mode 100644 index 00000000000..138e54b8bae --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl @@ -0,0 +1,57 @@ + +/** + * Select the visible items inside the active view and put them inside the sorting buffer. + */ + +#pragma BLENDER_REQUIRE(common_debug_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersection_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) + +layout(local_size_x = CULLING_ITEM_BATCH) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) restrict buffer key_buf +{ + uint keys[]; +}; + +void main() +{ + uint l_idx = gl_GlobalInvocationID.x; + if (l_idx >= culling.items_count) { + return; + } + + LightData light = lights[l_idx]; + + Sphere sphere; + switch (light.type) { + case LIGHT_SUN: + sphere = Sphere(cameraPos, ViewFar * 2.0); + break; + case LIGHT_SPOT: + /* TODO cone culling. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + sphere = Sphere(light._position, light.influence_radius_max); + break; + } + + if (intersect_view(sphere)) { + uint index = atomicAdd(culling.visible_count, 1); + keys[index] = l_idx; + } +} diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl new file mode 100644 index 00000000000..dfd2c80a45a --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl @@ -0,0 +1,138 @@ + +/** + * Sort the lights by their Z distance to the camera. + * Outputs ordered light buffer and associated zbins. + * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins. + * One thread process one Light entity. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) + +layout(local_size_x = CULLING_BATCH_SIZE) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) readonly restrict buffer key_buf +{ + uint keys[]; +}; + +layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf +{ + CullingZBin out_zbins[]; +}; + +layout(std430, binding = 4) writeonly restrict buffer out_items_buf +{ + LightData out_lights[]; +}; + +void main() +{ + uint src_index = gl_GlobalInvocationID.x; + bool valid_thread = true; + + if (src_index >= culling.visible_count) { + /* Do not return because we use barriers later on (which need uniform control flow). + * Just process the same last item but avoid insertion. */ + src_index = culling.visible_count - 1; + valid_thread = false; + } + + uint key = keys[src_index]; + LightData light = lights[key]; + + if (!culling.enable_specular) { + light.specular_power = 0.0; + } + + int index = 0; + int contenders = 0; + + /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */ + vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position; + float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max; + float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos); + + int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1); + int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1); + + if (!valid_thread) { + /* Do not register invalid threads. */ + z_max = z_min - 1; + } + + /* Fits the limit of 32KB. */ + shared int zbin_max[CULLING_ZBIN_COUNT]; + shared int zbin_min[CULLING_ZBIN_COUNT]; + /* Compilers do not release shared memory from early declaration. + * So we are forced to reuse the same variables in another form. */ +#define z_dists zbin_max +#define contender_table zbin_min + + /** + * Find how many values are before the local value. + * This finds the first possible destination index. + */ + z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist); + barrier(); + + const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE; + uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start); + for (uint i = 0; i < i_max; i++) { + float ref = intBitsToFloat(z_dists[i]); + if (ref > z_dist) { + index++; + } + else if (ref == z_dist) { + contenders++; + } + } + + atomicExchange(contender_table[index], contenders); + barrier(); + + if (valid_thread) { + /** + * For each clashing index (where two lights have exactly the same z distances) + * we use an atomic counter to know how much to offset from the disputed index. + */ + index += atomicAdd(contender_table[index], -1) - 1; + index += int(i_start); + out_lights[index] = light; + } + + const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE); + const uint zbin_local = gl_LocalInvocationID.x * iter; + const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local; + + for (uint i = 0u, l = zbin_local; i < iter; i++, l++) { + zbin_max[l] = 0x0000; + zbin_min[l] = 0xFFFF; + } + barrier(); + + /* Register to Z bins. */ + for (int z = z_min; z <= z_max; z++) { + atomicMin(zbin_min[z], index); + atomicMax(zbin_max[z], index); + } + barrier(); + + /* Write result to zbins buffer. */ + for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) { + /* Pack min & max into 1 uint. */ + out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]); + } +} diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl new file mode 100644 index 00000000000..913e094980e --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl @@ -0,0 +1,73 @@ + +/** + * 2D Culling pass for lights. + * We iterate over all items and check if they intersect with the tile frustum. + * Dispatch one thread per word. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) +#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) + +layout(local_size_x = 1024) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) readonly restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf +{ + CullingWord culling_words[]; +}; + +void main(void) +{ + uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len; + uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len; + uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len); + + if (tile_co.y >= culling.tile_y_len) { + return; + } + + /* TODO(fclem): We could stop the tile at the HiZ depth. */ + CullingTile tile = culling_tile_get(culling, tile_co); + + uint l_idx = word_idx * 32u; + uint l_end = min(l_idx + 32u, culling.visible_count); + uint word = 0u; + + for (; l_idx < l_end; l_idx++) { + LightData light = lights[l_idx]; + + bool intersect_tile; + switch (light.type) { + case LIGHT_SUN: + intersect_tile = true; + break; + case LIGHT_SPOT: + /* TODO cone culling. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + Sphere sphere = Sphere(light._position, light.influence_radius_max); + intersect_tile = culling_sphere_tile_isect(sphere, tile); + break; + } + + if (intersect_tile) { + word |= 1u << (l_idx & 0x1Fu); + } + } + + culling_words[gl_GlobalInvocationID.x] = word; +} \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl index 942f75961e9..14e38d6f1d6 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl @@ -17,19 +17,29 @@ layout(std140) uniform sampling_block SamplingData sampling; }; -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; layout(std140) uniform grids_block @@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx; uniform sampler2D reflect_color_tx; uniform sampler2D reflect_normal_tx; uniform sampler1D sss_transmittance_tx; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2D shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl index 677881abd71..068db3e78fd 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl @@ -15,24 +15,33 @@ #pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl) #pragma BLENDER_REQUIRE(eevee_shader_shared.hh) -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; uniform sampler2D transparency_data_tx; uniform usampler2D volume_data_tx; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2DShadow shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl index 448e5b54886..d3d5f859174 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl @@ -4,9 +4,10 @@ * A prototype needs to be declared before main in order to use it. * * The resources expected to be defined are: - * - light_culling - * - lights_culling_tx * - lights + * - lights_zbins + * - light_culling + * - lights_culling_words * - shadows * - shadow_atlas_tx * - shadow_tilemaps_tx @@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse, vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER); float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x; - ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) { + ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) { LightData light = lights[l_idx]; vec3 L; float dist; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl index 9723d24544c..152bfbeacec 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl @@ -25,19 +25,29 @@ layout(std140) uniform sampling_block SamplingData sampling; }; -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; layout(std140) uniform grids_block @@ -75,7 +85,6 @@ layout(std140) uniform hiz_block HiZData hiz; }; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2D shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; -- cgit v1.2.3