Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2021-11-23 23:24:00 +0300
committerClément Foucault <foucault.clem@gmail.com>2021-12-04 00:41:37 +0300
commit1b00ca35758dacf7ece7b95275ea3c41e53bec6c (patch)
treeb587d29f39a3cffd5c88ab86ceea674a1aaeb1b4 /source/blender/draw/engines/eevee/shaders
parent68b0195bf3e27ee687eb87e37257891a3a7f4e78 (diff)
EEVEE: Light: Port light culling to GPU
This removes the light count limit for the forward shaded object. This also provides a more efficient way of computing the culling directly on the GPU. Moreover, this avoids doing multiple lighting passes for high light counts in the deferred pipeline, improving performance.
Diffstat (limited to 'source/blender/draw/engines/eevee/shaders')
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl31
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl61
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl32
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl51
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl138
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl73
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl7
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl21
11 files changed, 383 insertions, 130 deletions
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
index f559788145d..33734324445 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
@@ -2,25 +2,34 @@
/**
* Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
* Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
- * pass is not conservative enough). This shader will only work on the last light batch so remove
- * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights.
+ * pass is not conservative enough).
*/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
{
- CullingData culling;
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
+{
+ CullingData light_culling;
+};
+
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
};
-uniform usampler2D item_culling_tx;
uniform sampler2D depth_tx;
in vec4 uvcoordsvar;
@@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color;
void main(void)
{
- float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r;
+ float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r;
float vP_z = get_view_z_from_depth(depth);
vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
float lights_count = 0.0;
uint lights_cull = 0u;
- ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
lights_cull |= 1u << l_idx;
lights_count += 1.0;
@@ -44,7 +53,7 @@ void main(void)
ITEM_FOREACH_END
uint lights_nocull = 0u;
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
+ ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) {
LightData light = lights[l_idx];
if (distance(light._position, P) < light.influence_radius_max) {
lights_nocull |= 1u << l_idx;
@@ -57,6 +66,6 @@ void main(void)
out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
}
else {
- out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0);
+ out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0);
}
} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
index a0ea075db22..640ffb4a6a1 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
@@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min)
return ~mask << bit_min;
}
-uint zbin_mask(int word_index, int zbin_min, int zbin_max)
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
{
- int local_min = clamp(zbin_min - word_index * 32, 0, 31);
- int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32);
- return bit_field_mask(uint(mask_width), uint(local_min));
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
}
/* Waiting to implement extensions support. We need:
@@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max)
# define subgroupBroadcastFirst(a) a
#endif
-#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \
+#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \
{ \
- int zbin_index = culling_z_to_zbin(_culling, _linearz); \
- zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \
- uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \
- int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \
- int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \
- /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
- max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
- int word_min = 0; \
- int word_max = max(0, CULLING_MAX_WORD - 1); \
- word_min = max(min_index / 32, word_min); \
- word_max = min(max_index / 32, word_max); \
- for (int word_index = word_min; word_index <= word_max; word_index++) { \
- /* TODO(fclem) Support bigger max_word with larger texture. */ \
- ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \
- uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \
- uint mask = zbin_mask(word_index, min_index, max_index); \
- word &= mask; \
+ uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \
+ uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ for (uint batch = 0; batch < batch_count; batch++) { \
+ int zbin_index = culling_z_to_zbin(_culling, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
/* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- word = subgroupBroadcastFirst(subgroupOr(word)); \
- /* TODO(fclem) Replace by findLSB on supported hardware. */ \
- for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \
- if ((word & 1u) != 0u) { \
- int _item_index = word_index * 32 + int(i);
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ uint word_min = min_index / 32u; \
+ uint word_max = max_index / 32u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ while (word != 0u) { \
+ uint bit_index = uint(findLSB(word)); \
+ word &= ~1u << bit_index; \
+ uint _item_index = word_idx * 32u + bit_index;
/* No culling. Iterate over all items. */
#define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \
{ \
{ \
{ \
- for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) {
+ for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) {
#define ITEM_FOREACH_END \
} \
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
index f128b89e864..27a39817140 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
@@ -7,11 +7,6 @@
/** \name Intersection Tests
* \{ */
-struct Sphere {
- vec3 position;
- float radius;
-};
-
struct Cone {
vec3 direction;
float angle_cos;
@@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
* by Eric Zhang
* https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
*/
- float sphere_distance = length(sphere.position);
+ float sphere_distance = length(sphere.center);
float sphere_sin = saturate(sphere.radius / sphere_distance);
float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
- float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction);
+ float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction);
/* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
-1.0 :
@@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder)
{
- float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy);
+ float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy);
return (distance_squared < sqr(cylinder.radius + sphere.radius));
}
bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
{
- if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) {
return false;
}
return true;
@@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile)
{
/* Culling in view space for precision and simplicity. */
- sphere.position = transform_point(ViewMatrix, sphere.position);
+ sphere.center = transform_point(ViewMatrix, sphere.center);
bool isect;
/* Test tile intersection using bounding cone or bounding cylinder.
* This has less false positive cases when the sphere is large. */
@@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset)
return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0;
}
-CullingTile culling_tile_get(CullingData culling)
+CullingTile culling_tile_get(CullingData culling, uvec2 tile_co)
{
+ vec2 ftile = vec2(tile_co);
/* Culling frustum corners for this tile. */
vec3 corners[8];
- corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5));
- corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5));
- corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5));
- corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5));
+ corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1));
+ corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0));
+ corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0));
+ corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1));
/* The corners depth only matter for precision. Use a mix of not so close to clip plane to
* avoid small float imprecision if near clip is low. */
corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
deleted file mode 100644
index c81a94b35f3..00000000000
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/**
- * 2D Culling pass for lights.
- * We iterate over all items and check if they intersect with the tile frustum.
- */
-
-#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-
-layout(std140) uniform lights_block
-{
- LightData lights[CULLING_ITEM_BATCH];
-};
-
-layout(std140) uniform lights_culling_block
-{
- CullingData culling;
-};
-
-in vec4 uvcoordsvar;
-
-layout(location = 0) out uvec4 out_items_bits;
-
-void main(void)
-{
- CullingTile tile = culling_tile_get(culling);
-
- out_items_bits = uvec4(0);
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
- LightData light = lights[l_idx];
-
- bool intersect_tile = true;
- switch (light.type) {
- case LIGHT_SPOT:
- /* TODO cone culling. */
- case LIGHT_RECT:
- case LIGHT_ELLIPSE:
- case LIGHT_POINT:
- Sphere sphere = Sphere(light._position, light.influence_radius_max);
- intersect_tile = culling_sphere_tile_isect(sphere, tile);
- break;
- default:
- break;
- }
-
- if (intersect_tile) {
- out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u);
- }
- }
- ITEM_FOREACH_END
-} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
new file mode 100644
index 00000000000..138e54b8bae
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersection_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_ITEM_BATCH) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) restrict buffer key_buf
+{
+ uint keys[];
+};
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= culling.items_count) {
+ return;
+ }
+
+ LightData light = lights[l_idx];
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SUN:
+ sphere = Sphere(cameraPos, ViewFar * 2.0);
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(culling.visible_count, 1);
+ keys[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
new file mode 100644
index 00000000000..dfd2c80a45a
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
@@ -0,0 +1,138 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer and associated zbins.
+ * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins.
+ * One thread process one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_BATCH_SIZE) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) readonly restrict buffer key_buf
+{
+ uint keys[];
+};
+
+layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf
+{
+ CullingZBin out_zbins[];
+};
+
+layout(std430, binding = 4) writeonly restrict buffer out_items_buf
+{
+ LightData out_lights[];
+};
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= culling.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = culling.visible_count - 1;
+ valid_thread = false;
+ }
+
+ uint key = keys[src_index];
+ LightData light = lights[key];
+
+ if (!culling.enable_specular) {
+ light.specular_power = 0.0;
+ }
+
+ int index = 0;
+ int contenders = 0;
+
+ /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */
+ vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position;
+ float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max;
+ float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos);
+
+ int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1);
+ int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1);
+
+ if (!valid_thread) {
+ /* Do not register invalid threads. */
+ z_max = z_min - 1;
+ }
+
+ /* Fits the limit of 32KB. */
+ shared int zbin_max[CULLING_ZBIN_COUNT];
+ shared int zbin_min[CULLING_ZBIN_COUNT];
+ /* Compilers do not release shared memory from early declaration.
+ * So we are forced to reuse the same variables in another form. */
+#define z_dists zbin_max
+#define contender_table zbin_min
+
+ /**
+ * Find how many values are before the local value.
+ * This finds the first possible destination index.
+ */
+ z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist);
+ barrier();
+
+ const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE;
+ uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start);
+ for (uint i = 0; i < i_max; i++) {
+ float ref = intBitsToFloat(z_dists[i]);
+ if (ref > z_dist) {
+ index++;
+ }
+ else if (ref == z_dist) {
+ contenders++;
+ }
+ }
+
+ atomicExchange(contender_table[index], contenders);
+ barrier();
+
+ if (valid_thread) {
+ /**
+ * For each clashing index (where two lights have exactly the same z distances)
+ * we use an atomic counter to know how much to offset from the disputed index.
+ */
+ index += atomicAdd(contender_table[index], -1) - 1;
+ index += int(i_start);
+ out_lights[index] = light;
+ }
+
+ const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE);
+ const uint zbin_local = gl_LocalInvocationID.x * iter;
+ const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local;
+
+ for (uint i = 0u, l = zbin_local; i < iter; i++, l++) {
+ zbin_max[l] = 0x0000;
+ zbin_min[l] = 0xFFFF;
+ }
+ barrier();
+
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ barrier();
+
+ /* Write result to zbins buffer. */
+ for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) {
+ /* Pack min & max into 1 uint. */
+ out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]);
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
new file mode 100644
index 00000000000..913e094980e
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
@@ -0,0 +1,73 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
+
+layout(local_size_x = 1024) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf
+{
+ CullingWord culling_words[];
+};
+
+void main(void)
+{
+ uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len);
+
+ if (tile_co.y >= culling.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = culling_tile_get(culling, tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, culling.visible_count);
+ uint word = 0u;
+
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = lights[l_idx];
+
+ bool intersect_tile;
+ switch (light.type) {
+ case LIGHT_SUN:
+ intersect_tile = true;
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ Sphere sphere = Sphere(light._position, light.influence_radius_max);
+ intersect_tile = culling_sphere_tile_isect(sphere, tile);
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx & 0x1Fu);
+ }
+ }
+
+ culling_words[gl_GlobalInvocationID.x] = word;
+} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
index 942f75961e9..14e38d6f1d6 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
@@ -17,19 +17,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx;
uniform sampler2D reflect_color_tx;
uniform sampler2D reflect_normal_tx;
uniform sampler1D sss_transmittance_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
index 677881abd71..068db3e78fd 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
@@ -15,24 +15,33 @@
#pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
uniform sampler2D transparency_data_tx;
uniform usampler2D volume_data_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2DShadow shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
index 448e5b54886..d3d5f859174 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
@@ -4,9 +4,10 @@
* A prototype needs to be declared before main in order to use it.
*
* The resources expected to be defined are:
- * - light_culling
- * - lights_culling_tx
* - lights
+ * - lights_zbins
+ * - light_culling
+ * - lights_culling_words
* - shadows
* - shadow_atlas_tx
* - shadow_tilemaps_tx
@@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse,
vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER);
float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x;
- ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
vec3 L;
float dist;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
index 9723d24544c..152bfbeacec 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
@@ -25,19 +25,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -75,7 +85,6 @@ layout(std140) uniform hiz_block
HiZData hiz;
};
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;