Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/draw/engines/eevee/shaders')
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl31
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl61
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl32
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl51
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl138
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl73
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl7
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl21
11 files changed, 383 insertions, 130 deletions
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
index f559788145d..33734324445 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
@@ -2,25 +2,34 @@
/**
* Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
* Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
- * pass is not conservative enough). This shader will only work on the last light batch so remove
- * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights.
+ * pass is not conservative enough).
*/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
{
- CullingData culling;
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
+{
+ CullingData light_culling;
+};
+
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
};
-uniform usampler2D item_culling_tx;
uniform sampler2D depth_tx;
in vec4 uvcoordsvar;
@@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color;
void main(void)
{
- float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r;
+ float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r;
float vP_z = get_view_z_from_depth(depth);
vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
float lights_count = 0.0;
uint lights_cull = 0u;
- ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
lights_cull |= 1u << l_idx;
lights_count += 1.0;
@@ -44,7 +53,7 @@ void main(void)
ITEM_FOREACH_END
uint lights_nocull = 0u;
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
+ ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) {
LightData light = lights[l_idx];
if (distance(light._position, P) < light.influence_radius_max) {
lights_nocull |= 1u << l_idx;
@@ -57,6 +66,6 @@ void main(void)
out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
}
else {
- out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0);
+ out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0);
}
} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
index a0ea075db22..640ffb4a6a1 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
@@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min)
return ~mask << bit_min;
}
-uint zbin_mask(int word_index, int zbin_min, int zbin_max)
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
{
- int local_min = clamp(zbin_min - word_index * 32, 0, 31);
- int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32);
- return bit_field_mask(uint(mask_width), uint(local_min));
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
}
/* Waiting to implement extensions support. We need:
@@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max)
# define subgroupBroadcastFirst(a) a
#endif
-#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \
+#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \
{ \
- int zbin_index = culling_z_to_zbin(_culling, _linearz); \
- zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \
- uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \
- int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \
- int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \
- /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
- max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
- int word_min = 0; \
- int word_max = max(0, CULLING_MAX_WORD - 1); \
- word_min = max(min_index / 32, word_min); \
- word_max = min(max_index / 32, word_max); \
- for (int word_index = word_min; word_index <= word_max; word_index++) { \
- /* TODO(fclem) Support bigger max_word with larger texture. */ \
- ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \
- uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \
- uint mask = zbin_mask(word_index, min_index, max_index); \
- word &= mask; \
+ uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \
+ uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ for (uint batch = 0; batch < batch_count; batch++) { \
+ int zbin_index = culling_z_to_zbin(_culling, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
/* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- word = subgroupBroadcastFirst(subgroupOr(word)); \
- /* TODO(fclem) Replace by findLSB on supported hardware. */ \
- for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \
- if ((word & 1u) != 0u) { \
- int _item_index = word_index * 32 + int(i);
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ uint word_min = min_index / 32u; \
+ uint word_max = max_index / 32u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ while (word != 0u) { \
+ uint bit_index = uint(findLSB(word)); \
+ word &= ~1u << bit_index; \
+ uint _item_index = word_idx * 32u + bit_index;
/* No culling. Iterate over all items. */
#define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \
{ \
{ \
{ \
- for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) {
+ for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) {
#define ITEM_FOREACH_END \
} \
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
index f128b89e864..27a39817140 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
@@ -7,11 +7,6 @@
/** \name Intersection Tests
* \{ */
-struct Sphere {
- vec3 position;
- float radius;
-};
-
struct Cone {
vec3 direction;
float angle_cos;
@@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
* by Eric Zhang
* https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
*/
- float sphere_distance = length(sphere.position);
+ float sphere_distance = length(sphere.center);
float sphere_sin = saturate(sphere.radius / sphere_distance);
float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
- float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction);
+ float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction);
/* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
-1.0 :
@@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder)
{
- float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy);
+ float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy);
return (distance_squared < sqr(cylinder.radius + sphere.radius));
}
bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
{
- if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) {
return false;
}
return true;
@@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile)
{
/* Culling in view space for precision and simplicity. */
- sphere.position = transform_point(ViewMatrix, sphere.position);
+ sphere.center = transform_point(ViewMatrix, sphere.center);
bool isect;
/* Test tile intersection using bounding cone or bounding cylinder.
* This has less false positive cases when the sphere is large. */
@@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset)
return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0;
}
-CullingTile culling_tile_get(CullingData culling)
+CullingTile culling_tile_get(CullingData culling, uvec2 tile_co)
{
+ vec2 ftile = vec2(tile_co);
/* Culling frustum corners for this tile. */
vec3 corners[8];
- corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5));
- corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5));
- corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5));
- corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5));
+ corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1));
+ corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0));
+ corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0));
+ corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1));
/* The corners depth only matter for precision. Use a mix of not so close to clip plane to
* avoid small float imprecision if near clip is low. */
corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
deleted file mode 100644
index c81a94b35f3..00000000000
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/**
- * 2D Culling pass for lights.
- * We iterate over all items and check if they intersect with the tile frustum.
- */
-
-#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-
-layout(std140) uniform lights_block
-{
- LightData lights[CULLING_ITEM_BATCH];
-};
-
-layout(std140) uniform lights_culling_block
-{
- CullingData culling;
-};
-
-in vec4 uvcoordsvar;
-
-layout(location = 0) out uvec4 out_items_bits;
-
-void main(void)
-{
- CullingTile tile = culling_tile_get(culling);
-
- out_items_bits = uvec4(0);
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
- LightData light = lights[l_idx];
-
- bool intersect_tile = true;
- switch (light.type) {
- case LIGHT_SPOT:
- /* TODO cone culling. */
- case LIGHT_RECT:
- case LIGHT_ELLIPSE:
- case LIGHT_POINT:
- Sphere sphere = Sphere(light._position, light.influence_radius_max);
- intersect_tile = culling_sphere_tile_isect(sphere, tile);
- break;
- default:
- break;
- }
-
- if (intersect_tile) {
- out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u);
- }
- }
- ITEM_FOREACH_END
-} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
new file mode 100644
index 00000000000..138e54b8bae
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersection_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_ITEM_BATCH) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) restrict buffer key_buf
+{
+ uint keys[];
+};
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= culling.items_count) {
+ return;
+ }
+
+ LightData light = lights[l_idx];
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SUN:
+ sphere = Sphere(cameraPos, ViewFar * 2.0);
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(culling.visible_count, 1);
+ keys[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
new file mode 100644
index 00000000000..dfd2c80a45a
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
@@ -0,0 +1,138 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer and associated zbins.
+ * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins.
+ * One thread process one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_BATCH_SIZE) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) readonly restrict buffer key_buf
+{
+ uint keys[];
+};
+
+layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf
+{
+ CullingZBin out_zbins[];
+};
+
+layout(std430, binding = 4) writeonly restrict buffer out_items_buf
+{
+ LightData out_lights[];
+};
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= culling.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = culling.visible_count - 1;
+ valid_thread = false;
+ }
+
+ uint key = keys[src_index];
+ LightData light = lights[key];
+
+ if (!culling.enable_specular) {
+ light.specular_power = 0.0;
+ }
+
+ int index = 0;
+ int contenders = 0;
+
+ /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */
+ vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position;
+ float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max;
+ float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos);
+
+ int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1);
+ int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1);
+
+ if (!valid_thread) {
+ /* Do not register invalid threads. */
+ z_max = z_min - 1;
+ }
+
+ /* Fits the limit of 32KB. */
+ shared int zbin_max[CULLING_ZBIN_COUNT];
+ shared int zbin_min[CULLING_ZBIN_COUNT];
+ /* Compilers do not release shared memory from early declaration.
+ * So we are forced to reuse the same variables in another form. */
+#define z_dists zbin_max
+#define contender_table zbin_min
+
+ /**
+ * Find how many values are before the local value.
+ * This finds the first possible destination index.
+ */
+ z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist);
+ barrier();
+
+ const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE;
+ uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start);
+ for (uint i = 0; i < i_max; i++) {
+ float ref = intBitsToFloat(z_dists[i]);
+ if (ref > z_dist) {
+ index++;
+ }
+ else if (ref == z_dist) {
+ contenders++;
+ }
+ }
+
+ atomicExchange(contender_table[index], contenders);
+ barrier();
+
+ if (valid_thread) {
+ /**
+ * For each clashing index (where two lights have exactly the same z distances)
+ * we use an atomic counter to know how much to offset from the disputed index.
+ */
+ index += atomicAdd(contender_table[index], -1) - 1;
+ index += int(i_start);
+ out_lights[index] = light;
+ }
+
+ const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE);
+ const uint zbin_local = gl_LocalInvocationID.x * iter;
+ const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local;
+
+ for (uint i = 0u, l = zbin_local; i < iter; i++, l++) {
+ zbin_max[l] = 0x0000;
+ zbin_min[l] = 0xFFFF;
+ }
+ barrier();
+
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ barrier();
+
+ /* Write result to zbins buffer. */
+ for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) {
+ /* Pack min & max into 1 uint. */
+ out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]);
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
new file mode 100644
index 00000000000..913e094980e
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
@@ -0,0 +1,73 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
+
+layout(local_size_x = 1024) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf
+{
+ CullingWord culling_words[];
+};
+
+void main(void)
+{
+ uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len);
+
+ if (tile_co.y >= culling.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = culling_tile_get(culling, tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, culling.visible_count);
+ uint word = 0u;
+
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = lights[l_idx];
+
+ bool intersect_tile;
+ switch (light.type) {
+ case LIGHT_SUN:
+ intersect_tile = true;
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ Sphere sphere = Sphere(light._position, light.influence_radius_max);
+ intersect_tile = culling_sphere_tile_isect(sphere, tile);
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx & 0x1Fu);
+ }
+ }
+
+ culling_words[gl_GlobalInvocationID.x] = word;
+} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
index 942f75961e9..14e38d6f1d6 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
@@ -17,19 +17,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx;
uniform sampler2D reflect_color_tx;
uniform sampler2D reflect_normal_tx;
uniform sampler1D sss_transmittance_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
index 677881abd71..068db3e78fd 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
@@ -15,24 +15,33 @@
#pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
uniform sampler2D transparency_data_tx;
uniform usampler2D volume_data_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2DShadow shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
index 448e5b54886..d3d5f859174 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
@@ -4,9 +4,10 @@
* A prototype needs to be declared before main in order to use it.
*
* The resources expected to be defined are:
- * - light_culling
- * - lights_culling_tx
* - lights
+ * - lights_zbins
+ * - light_culling
+ * - lights_culling_words
* - shadows
* - shadow_atlas_tx
* - shadow_tilemaps_tx
@@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse,
vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER);
float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x;
- ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
vec3 L;
float dist;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
index 9723d24544c..152bfbeacec 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
@@ -25,19 +25,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -75,7 +85,6 @@ layout(std140) uniform hiz_block
HiZData hiz;
};
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;