diff options
Diffstat (limited to 'source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl')
-rw-r--r-- | source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl new file mode 100644 index 00000000000..22a5f98e6c3 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl @@ -0,0 +1,72 @@ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max) +{ + uint word_start = word_index * 32u; + uint word_end = word_start + 31u; + uint local_min = max(zbin_min, word_start); + uint local_max = min(zbin_max, word_end); + uint mask_width = local_max - local_min + 1; + return bit_field_mask(mask_width, local_min); +} + +int culling_z_to_zbin(float scale, float bias, float z) +{ + return int(z * scale + bias); +} + +/* Waiting to implement extensions support. We need: + * - GL_KHR_shader_subgroup_ballot + * - GL_KHR_shader_subgroup_arithmetic + * or + * - Vulkan 1.1 + */ +#if 1 +# define subgroupMin(a) a +# define subgroupMax(a) a +# define subgroupOr(a) a +# define subgroupBroadcastFirst(a) a +#endif + +#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \ + { \ + { \ + for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) { + +#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \ + { \ + uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \ + uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \ + _culling.tile_word_len; \ + int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \ + zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \ + uint zbin_data = _zbins[zbin_index]; \ + uint min_index = zbin_data & 0xFFFFu; \ + uint max_index = zbin_data >> 16u; \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ + max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ + /* Same as divide by 32 but avoid interger division. */ \ + uint word_min = min_index >> 5u; \ + uint word_max = max_index >> 5u; \ + for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \ + uint word = _words[tile_word_offset + word_idx]; \ + word &= zbin_mask(word_idx, min_index, max_index); \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + word = subgroupBroadcastFirst(subgroupOr(word)); \ + int bit_index; \ + while ((bit_index = findLSB(word)) != -1) { \ + word &= ~1u << uint(bit_index); \ + uint _item_index = word_idx * 32u + bit_index; + +/* No culling. Iterate over all items. */ +#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \ + { \ + { \ + for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) { + +#define LIGHT_FOREACH_END \ + } \ + } \ + } |