1 files changed, 72 insertions, 0 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
new file mode 100644
index 00000000000..22a5f98e6c3
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
@@ -0,0 +1,72 @@
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
+{
+  uint word_start = word_index * 32u;
+  uint word_end = word_start + 31u;
+  uint local_min = max(zbin_min, word_start);
+  uint local_max = min(zbin_max, word_end);
+  uint mask_width = local_max - local_min + 1;
+  return bit_field_mask(mask_width, local_min);
+}
+
+int culling_z_to_zbin(float scale, float bias, float z)
+{
+  return int(z * scale + bias);
+}
+
+/* Waiting to implement extensions support. We need:
+ * - GL_KHR_shader_subgroup_ballot
+ * - GL_KHR_shader_subgroup_arithmetic
+ * or
+ * - Vulkan 1.1
+ */
+#if 1
+#  define subgroupMin(a) a
+#  define subgroupMax(a) a
+#  define subgroupOr(a) a
+#  define subgroupBroadcastFirst(a) a
+#endif
+
+#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \
+  { \
+    { \
+      for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) {
+
+#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \
+  { \
+    uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \
+    uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+                            _culling.tile_word_len; \
+    int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \
+    zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+    uint zbin_data = _zbins[zbin_index]; \
+    uint min_index = zbin_data & 0xFFFFu; \
+    uint max_index = zbin_data >> 16u; \
+    /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+    min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+    max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+    /* Same as divide by 32 but avoid interger division. */ \
+    uint word_min = min_index >> 5u; \
+    uint word_max = max_index >> 5u; \
+    for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+      uint word = _words[tile_word_offset + word_idx]; \
+      word &= zbin_mask(word_idx, min_index, max_index); \
+      /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+      word = subgroupBroadcastFirst(subgroupOr(word)); \
+      int bit_index; \
+      while ((bit_index = findLSB(word)) != -1) { \
+        word &= ~1u << uint(bit_index); \
+        uint _item_index = word_idx * 32u + bit_index;
+
+/* No culling. Iterate over all items. */
+#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \
+  { \
+    { \
+      for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) {
+
+#define LIGHT_FOREACH_END \
+  } \
+  } \
+  }