Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2021-11-23 23:24:00 +0300
committerClément Foucault <foucault.clem@gmail.com>2021-12-04 00:41:37 +0300
commit1b00ca35758dacf7ece7b95275ea3c41e53bec6c (patch)
treeb587d29f39a3cffd5c88ab86ceea674a1aaeb1b4 /source/blender/draw/engines
parent68b0195bf3e27ee687eb87e37257891a3a7f4e78 (diff)
EEVEE: Light: Port light culling to GPU
This removes the light count limit for the forward shaded object. This also provides a more efficient way of computing the culling directly on the GPU. Moreover, this avoids doing multiple lighting passes for high light counts in the deferred pipeline, improving performance.
Diffstat (limited to 'source/blender/draw/engines')
-rw-r--r--source/blender/draw/engines/eevee/eevee_culling.cc68
-rw-r--r--source/blender/draw/engines/eevee/eevee_culling.hh338
-rw-r--r--source/blender/draw/engines/eevee/eevee_instance.cc3
-rw-r--r--source/blender/draw/engines/eevee/eevee_instance.hh2
-rw-r--r--source/blender/draw/engines/eevee/eevee_light.cc221
-rw-r--r--source/blender/draw/engines/eevee/eevee_light.hh105
-rw-r--r--source/blender/draw/engines/eevee/eevee_lookdev.cc8
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader.cc8
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader.hh4
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader_shared.hh147
-rw-r--r--source/blender/draw/engines/eevee/eevee_shading.cc67
-rw-r--r--source/blender/draw/engines/eevee/eevee_shading.hh17
-rw-r--r--source/blender/draw/engines/eevee/eevee_shadow.cc32
-rw-r--r--source/blender/draw/engines/eevee/eevee_view.cc14
-rw-r--r--source/blender/draw/engines/eevee/eevee_view.hh1
-rw-r--r--source/blender/draw/engines/eevee/eevee_wrapper.hh141
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl31
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl61
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl32
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl51
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl138
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl73
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl7
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl21
27 files changed, 820 insertions, 869 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_culling.cc b/source/blender/draw/engines/eevee/eevee_culling.cc
deleted file mode 100644
index c54c7fa9320..00000000000
--- a/source/blender/draw/engines/eevee/eevee_culling.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright 2021, Blender Foundation.
- */
-
-/** \file
- * \ingroup eevee
- *
- * A culling object is a data structure that contains fine grained culling
- * of entities against in the whole view frustum. The Culling structure contains the
- * final entity list since it has to have a special order.
- *
- * Follows the principles of Tiled Culling + Z binning from:
- * "Improved Culling for Tiled and Clustered Rendering"
- * by Michal Drobot
- * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
- */
-
-#include "eevee_instance.hh"
-
-#include "eevee_culling.hh"
-
-namespace blender::eevee {
-
-/* -------------------------------------------------------------------- */
-/** \name CullingDebugPass
- * \{ */
-
-void CullingDebugPass::sync(void)
-{
- LightModule &lights = inst_.lights;
-
- debug_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR);
-
- GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG);
- DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
- DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
-}
-
-void CullingDebugPass::render(GPUTexture *input_depth_tx)
-{
- input_depth_tx_ = input_depth_tx;
-
- inst_.lights.bind_batch(0);
-
- DRW_draw_pass(debug_ps_);
-}
-
-/** \} */
-
-} // namespace blender::eevee \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/eevee_culling.hh b/source/blender/draw/engines/eevee/eevee_culling.hh
deleted file mode 100644
index 976c30a1efb..00000000000
--- a/source/blender/draw/engines/eevee/eevee_culling.hh
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright 2021, Blender Foundation.
- */
-
-/** \file
- * \ingroup eevee
- *
- * A culling object is a data structure that contains fine grained culling
- * of entities against in the whole view frustum. The Culling structure contains the
- * final entity list since it has to have a special order.
- *
- * Follows the principles of Tiled Culling + Z binning from:
- * "Improved Culling for Tiled and Clustered Rendering"
- * by Michal Drobot
- * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
- */
-
-#pragma once
-
-#include "DRW_render.h"
-
-#include "BLI_vector.hh"
-
-#include "eevee_shader_shared.hh"
-
-namespace blender::eevee {
-
-class Instance;
-
-/* -------------------------------------------------------------------- */
-/** \name CullingBatch
- * \{ */
-
-/**
- * Do not use directly. Use Culling object instead.
- */
-template<
- /* Type of data contained per culling batch. */
- typename Tdata>
-class CullingBatch {
- public:
- /** Z ordered items. */
- Tdata item_data;
-
- private:
- /* Items to order in Z. */
- struct ItemHandle {
- /** Index inside item_source_. */
- uint32_t source_index;
- /** Signed Z distance along camera Z axis. */
- float z_dist;
- /** Item radius. */
- float radius;
- };
-
- /** Compact handle list to order without moving source. */
- Vector<ItemHandle, CULLING_ITEM_BATCH> item_handles_;
- /** Z bins. */
- CullingDataBuf culling_data_;
- /** Tile texture and framebuffer handling the 2D culling. */
- eevee::Texture tiles_tx_ = Texture("culling_tx_");
- eevee::Framebuffer tiles_fb_;
-
- public:
- CullingBatch(){};
- ~CullingBatch(){};
-
- void init(const ivec2 &extent)
- {
- item_handles_.clear();
-
- uint tile_size = 8;
-
- uint res[2] = {divide_ceil_u(extent.x, tile_size), divide_ceil_u(extent.y, tile_size)};
-
- tiles_tx_.ensure(UNPACK2(res), 1, GPU_RGBA32UI);
-
- culling_data_.tile_size = tile_size;
- for (int i = 0; i < 2; i++) {
- culling_data_.tile_to_uv_fac[i] = tile_size / (float)extent[i];
- }
-
- // tiles_tx_.ensure(1, 1, 1, GPU_RGBA32UI);
- // uvec4 no_2D_culling = {UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX};
- // GPU_texture_update(tiles_tx_, GPU_DATA_UINT, no_2D_culling);
-
- tiles_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(tiles_tx_));
- }
-
- void set_empty(void)
- {
- init_min_max();
- culling_data_.push_update();
- }
-
- void insert(int32_t index, float z_dist, float radius)
- {
- ItemHandle handle = {(uint32_t)index, z_dist, radius};
- item_handles_.append(handle);
- }
-
- template<typename DataAppendF, typename CullingF>
- void finalize(float near_z,
- float far_z,
- const DataAppendF &data_append,
- const CullingF &draw_culling)
- {
- culling_data_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
- culling_data_.zbin_bias = -near_z * culling_data_.zbin_scale;
-
- /* Order items by Z distance to the camera. */
- auto sort = [](const ItemHandle &a, const ItemHandle &b) { return a.z_dist > b.z_dist; };
- std::sort(item_handles_.begin(), item_handles_.end(), sort);
-
- init_min_max();
- /* Fill the GPU data buffer. */
- for (auto item_idx : item_handles_.index_range()) {
- ItemHandle &handle = item_handles_[item_idx];
- data_append(item_data, item_idx, handle.source_index);
- /* Register to Z bins. */
- int z_min = max_ii(culling_z_to_zbin(culling_data_, handle.z_dist + handle.radius), 0);
- int z_max = min_ii(culling_z_to_zbin(culling_data_, handle.z_dist - handle.radius),
- CULLING_ZBIN_COUNT - 1);
- for (auto z : IndexRange(z_min, z_max - z_min + 1)) {
- BLI_assert(z >= 0 && z < CULLING_ZBIN_COUNT);
- uint16_t(&zbin_minmax)[2] = ((uint16_t(*)[2])culling_data_.zbins)[z];
- if (item_idx < zbin_minmax[0]) {
- zbin_minmax[0] = (uint16_t)item_idx;
- }
- if (item_idx > zbin_minmax[1]) {
- zbin_minmax[1] = (uint16_t)item_idx;
- }
- }
- }
- /* Set item count for no-cull iterator. */
- culling_data_.items_count = item_handles_.size();
- /* Upload data to GPU. */
- culling_data_.push_update();
-
- GPU_framebuffer_bind(tiles_fb_);
-
- draw_culling(item_data, culling_data_);
- }
-
- /**
- * Getters
- **/
- bool is_full(void)
- {
- return item_handles_.size() == CULLING_ITEM_BATCH;
- }
- const GPUUniformBuf *culling_ubo_get(void) const
- {
- return culling_data_.ubo_get();
- }
- uint items_count_get(void) const
- {
- return culling_data_.items_count;
- }
- GPUTexture *culling_texture_get(void) const
- {
- return tiles_tx_;
- }
-
- private:
- void init_min_max(void)
- {
- /* Init min-max for each bin. */
- for (auto i : IndexRange(CULLING_ZBIN_COUNT)) {
- uint16_t *zbin_minmax = (uint16_t *)culling_data_.zbins;
- zbin_minmax[i * 2 + 0] = CULLING_ITEM_BATCH - 1;
- zbin_minmax[i * 2 + 1] = 0;
- }
- culling_data_.items_count = 0;
- }
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Culling
- * \{ */
-
-template</* Type of data contained per culling batch. */
- typename Tdata,
- /* True if items can be added in multiple batches. */
- bool is_extendable = false>
-class Culling {
- private:
- using CullingBatchType = CullingBatch<Tdata>;
- /** Multiple culling batches containing at most CULLING_ITEM_BATCH items worth of data. */
- Vector<CullingBatchType *> batches_;
- /** Number of active batches. Allocated count may be higher. */
- int used_batch_count_;
- /** Pointer to the active batch being filled. */
- CullingBatchType *active_batch_;
- /** Used to get Z distance. */
- vec3 camera_z_axis_;
- float camera_z_offset_;
- /** View for which the culling is computed. */
- const DRWView *view_;
- /** View resolution. */
- ivec2 extent_ = ivec2(0);
-
- public:
- Culling(){};
- ~Culling()
- {
- for (CullingBatchType *batch : batches_) {
- delete batch;
- }
- }
-
- void set_view(const DRWView *view, const ivec2 extent)
- {
- view_ = view;
- extent_ = extent;
-
- float viewinv[4][4];
- DRW_view_viewmat_get(view, viewinv, true);
-
- camera_z_axis_ = viewinv[2];
- camera_z_offset_ = -vec3::dot(camera_z_axis_, viewinv[3]);
-
- if (batches_.size() == 0) {
- batches_.append(new CullingBatchType());
- }
-
- used_batch_count_ = 1;
- active_batch_ = batches_[0];
- active_batch_->init(extent_);
- }
-
- /* Cull every items. Do not reset the batches to avoid freeing the vectors' memory. */
- void set_empty(void)
- {
- if (extent_.x == 0) {
- extent_ = ivec2(1);
- }
-
- if (batches_.size() == 0) {
- batches_.append(new CullingBatchType());
-
- active_batch_ = batches_[0];
- active_batch_->init(extent_);
- }
-
- active_batch_ = batches_[0];
- active_batch_->set_empty();
- }
-
- /* Returns true if we cannot add any more items.
- * In this case, the caller is expected to not try to insert another item. */
- bool insert(int32_t index, BoundSphere &bsphere)
- {
- if (!DRW_culling_sphere_test(view_, &bsphere)) {
- return false;
- }
-
- if (active_batch_->is_full()) {
- BLI_assert(is_extendable);
- /* TODO(fclem) degrow vector of batches. */
- if (batches_.size() < (used_batch_count_ + 1)) {
- batches_.append(new CullingBatchType());
- }
- active_batch_ = batches_[used_batch_count_];
- active_batch_->init(extent_);
- used_batch_count_++;
- }
-
- float z_dist = vec3::dot(bsphere.center, camera_z_axis_) + camera_z_offset_;
- active_batch_->insert(index, z_dist, bsphere.radius);
-
- return active_batch_->is_full();
- }
-
- template<typename DataAppendF, typename CullingF>
- void finalize(const DataAppendF &data_append, const CullingF &draw_culling)
- {
- float near_z = DRW_view_near_distance_get(view_);
- float far_z = DRW_view_far_distance_get(view_);
-
- for (auto i : IndexRange(used_batch_count_)) {
- batches_[i]->finalize(near_z, far_z, data_append, draw_culling);
- }
- }
-
- /**
- * Getters
- **/
- const CullingBatchType *operator[](int64_t index) const
- {
- return batches_[index];
- }
- IndexRange index_range(void) const
- {
- return IndexRange(used_batch_count_);
- }
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name CullingDebugPass
- * \{ */
-
-class CullingDebugPass {
- private:
- Instance &inst_;
-
- GPUTexture *input_depth_tx_ = nullptr;
-
- DRWPass *debug_ps_ = nullptr;
-
- public:
- CullingDebugPass(Instance &inst) : inst_(inst){};
-
- void sync(void);
- void render(GPUTexture *input_depth_tx);
-};
-
-/** \} */
-
-} // namespace blender::eevee \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/eevee_instance.cc b/source/blender/draw/engines/eevee/eevee_instance.cc
index 63c9c690180..f614d4b4a97 100644
--- a/source/blender/draw/engines/eevee/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee/eevee_instance.cc
@@ -22,6 +22,7 @@
* An instance contains all structures needed to do a complete render.
*/
+#include "BKE_global.h"
#include "BKE_object.h"
#include "BLI_rect.h"
#include "DEG_depsgraph_query.h"
@@ -63,6 +64,8 @@ void Instance::init(const ivec2 &output_res,
rv3d = rv3d_;
baking_probe = light_probe_;
+ debug_mode = (eDebugMode)G.debug_value;
+
update_eval_members();
rcti render_border = output_crop(output_res, output_rect);
diff --git a/source/blender/draw/engines/eevee/eevee_instance.hh b/source/blender/draw/engines/eevee/eevee_instance.hh
index 09548ca504d..3079f931231 100644
--- a/source/blender/draw/engines/eevee/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee/eevee_instance.hh
@@ -99,6 +99,8 @@ class Instance {
/** Can be null. Used to exclude objects during baking. */
const struct LightProbe *baking_probe = nullptr;
+ eDebugMode debug_mode = SHADOW_DEBUG_NONE;
+
/* Info string displayed at the top of the render / viewport. */
char info[64];
diff --git a/source/blender/draw/engines/eevee/eevee_light.cc b/source/blender/draw/engines/eevee/eevee_light.cc
index b80954e2195..d73d650d78a 100644
--- a/source/blender/draw/engines/eevee/eevee_light.cc
+++ b/source/blender/draw/engines/eevee/eevee_light.cc
@@ -284,10 +284,10 @@ void LightModule::sync_light(const Object *ob, ObjectHandle &handle)
void LightModule::end_sync(void)
{
- lights_refs_.clear();
-
Vector<ObjectKey, 0> deleted_keys;
+ light_refs_.clear();
+
/* Detect light deletion. */
for (auto item : lights_.items()) {
Light &light = item.value;
@@ -297,7 +297,7 @@ void LightModule::end_sync(void)
}
else {
light.used = false;
- lights_refs_.append(&light);
+ light_refs_.append(&light);
}
}
@@ -308,110 +308,171 @@ void LightModule::end_sync(void)
lights_.remove(key);
}
+ if (light_refs_.size() > CULLING_MAX_ITEM) {
+ /* TODO(fclem) Print error to user. */
+ light_refs_.resize(CULLING_MAX_ITEM);
+ }
+
+ batch_len_ = divide_ceil_u(max_ii(light_refs_.size(), 1), CULLING_BATCH_SIZE);
+ lights_data.resize(batch_len_ * CULLING_BATCH_SIZE);
+ shadows_data.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_key_buf.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_light_buf.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_zbin_buf.resize(batch_len_ * CULLING_ZBIN_COUNT);
+ culling_data.items_count = light_refs_.size();
+ culling_data.tile_word_len = divide_ceil_u(max_ii(culling_data.items_count, 1), 32);
+
/* Call shadows.end_sync after light pruning to avoid packing deleted shadows. */
inst_.shadows.end_sync();
+
+ for (auto l_idx : light_refs_.index_range()) {
+ Light &light = *light_refs_[l_idx];
+ lights_data[l_idx] = light;
+ lights_data[l_idx].shadow_id = LIGHT_NO_SHADOW;
+
+ if (light.shadow_id != LIGHT_NO_SHADOW) {
+ if (light.type == LIGHT_SUN) {
+ shadows_data[l_idx] = this->inst_.shadows.directionals[light.shadow_id];
+ }
+ else {
+ shadows_data[l_idx] = this->inst_.shadows.punctuals[light.shadow_id];
+ }
+ }
+ }
+
+ lights_data.push_update();
+ shadows_data.push_update();
+
+ {
+ culling_ps_ = DRW_pass_create("CullingLight", (DRWState)0);
+
+ uint lights_len = light_refs_.size();
+ uint batch_len = divide_ceil_u(lights_len, CULLING_BATCH_SIZE);
+
+ if (batch_len > 0) {
+ /* NOTE: We reference the buffers that may be resized or updated later. */
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SELECT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf);
+ DRW_shgroup_call_compute(grp, batch_len, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SORT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "out_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "out_items_buf", &culling_light_buf);
+ DRW_shgroup_call_compute(grp, batch_len, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_TILE);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_tile_buf", &culling_tile_buf);
+ DRW_shgroup_call_compute_ref(grp, culling_tile_dispatch_size_);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
+ }
+ }
+ }
+
+ debug_end_sync();
}
-/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */
-void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular)
+void LightModule::debug_end_sync(void)
{
- if (extent.x == 0) {
- culling_.set_empty();
+ if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) {
+ debug_draw_ps_ = nullptr;
return;
}
- culling_.set_view(view, extent);
-
- for (auto light_id : lights_refs_.index_range()) {
- Light &light = *lights_refs_[light_id];
+ debug_draw_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR);
- BoundSphere bsphere;
- if (light.type == LIGHT_SUN) {
- /* Make sun lights cover the whole frustum. */
- float viewinv[4][4];
- DRW_view_viewmat_get(view, viewinv, true);
- copy_v3_v3(bsphere.center, viewinv[3]);
- bsphere.radius = fabsf(DRW_view_far_distance_get(view));
- }
- else {
- /* TODO(fclem) fit cones better. */
- copy_v3_v3(bsphere.center, light._position);
- bsphere.radius = light.influence_radius_max;
- }
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf);
+ DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
+ DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+}
- culling_.insert(light_id, bsphere);
- }
+/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */
+void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular)
+{
+ const bool no_lights = (extent.x == 0);
- DRW_view_set_active(view);
+ /* Target 1bit per pixel. */
+ uint tile_size = 1u << log2_ceil_u(ceil(sqrtf(culling_data.tile_word_len * 32)));
- /* This is only called if the light is visible under this view. */
- auto data_copy = [&](LightBatch &light_batch, uint32_t dst_index, uint32_t src_index) {
- Light &light = *this->lights_refs_[src_index];
- LightData &dst = light_batch.lights_data[dst_index];
+ int3 tiles_extent;
+ tiles_extent.x = divide_ceil_u(extent.x, tile_size);
+ tiles_extent.y = divide_ceil_u(extent.y, tile_size);
+ tiles_extent.z = batch_len_;
- dst = light;
- if (!enable_specular) {
- dst.specular_power = 0.0f;
- }
+ float far_z = DRW_view_far_distance_get(view);
+ float near_z = DRW_view_near_distance_get(view);
- if (light.shadow_id != LIGHT_NO_SHADOW) {
- ShadowData &shadow_dst = light_batch.shadows_data[dst_index];
- if (light.type == LIGHT_SUN) {
- shadow_dst = this->inst_.shadows.directionals[light.shadow_id];
- }
- else {
- shadow_dst = this->inst_.shadows.punctuals[light.shadow_id];
- }
- }
- };
+ culling_data.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
+ culling_data.zbin_bias = -near_z * culling_data.zbin_scale;
+ culling_data.tile_size = tile_size;
+ culling_data.tile_x_len = tiles_extent.x;
+ culling_data.tile_y_len = tiles_extent.y;
+ culling_data.tile_to_uv_fac = tile_size / float2(UNPACK2(extent));
- /* Called for each batch. Do 2D gpu culling. */
- auto culling_func = [&](LightBatch &light_batch, CullingDataBuf &culling_data) {
- LightDataBuf &lights_data = light_batch.lights_data;
- ShadowDataBuf &shadows_data = light_batch.shadows_data;
- lights_data.push_update();
- shadows_data.push_update();
+ culling_data.enable_specular = enable_specular;
+ culling_data.items_count = no_lights ? 0 : light_refs_.size();
+ culling_data.visible_count = 0;
+ culling_data.push_update();
- this->inst_.shading_passes.light_culling.render(lights_data.ubo_get(), culling_data.ubo_get());
- };
+ if (no_lights) {
+ return;
+ }
- culling_.finalize(data_copy, culling_func);
+ uint word_count = tiles_extent.x * tiles_extent.y * tiles_extent.z * culling_data.tile_word_len;
- inst_.shadows.update_visible(view);
-}
+ /* TODO(fclem) Only resize once per redraw. */
+ culling_tile_buf.resize(word_count);
-void LightModule::bind_batch(int batch_index)
-{
- active_batch_ = batch_index;
- auto &batch = *culling_[batch_index];
- active_lights_ubo_ = batch.item_data.lights_data.ubo_get();
- active_shadows_ubo_ = batch.item_data.shadows_data.ubo_get();
- active_culling_ubo_ = batch.culling_ubo_get();
- active_culling_tx_ = batch.culling_texture_get();
-}
+ culling_tile_dispatch_size_.x = divide_ceil_u(word_count, 1024);
+ culling_tile_dispatch_size_.y = 1;
+ culling_tile_dispatch_size_.z = 1;
-/** \} */
+ DRW_view_set_active(view);
+ DRW_draw_pass(culling_ps_);
-/* -------------------------------------------------------------------- */
-/** \name CullingPass
- * \{ */
+ inst_.shadows.update_visible(view);
+}
-void CullingLightPass::sync(void)
+void LightModule::debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz)
{
- culling_ps_ = DRW_pass_create("CullingLight", DRW_STATE_WRITE_COLOR);
+ if (debug_draw_ps_ == nullptr) {
+ return;
+ }
+ input_depth_tx_ = hiz.texture_get();
- GPUShader *sh = inst_.shaders.static_shader_get(CULLING_LIGHT);
- DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", &lights_ubo_);
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", &culling_ubo_);
- DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(debug_draw_ps_);
}
-void CullingLightPass::render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo)
+void LightModule::shgroup_resources(DRWShadingGroup *grp)
{
- lights_ubo_ = lights_ubo;
- culling_ubo_ = culling_ubo;
- DRW_draw_pass(culling_ps_);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf);
+
+ DRW_shgroup_vertex_buffer_ref(grp, "shadows_buf", &shadows_data);
+ DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
+ DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get());
}
/** \} */
diff --git a/source/blender/draw/engines/eevee/eevee_light.hh b/source/blender/draw/engines/eevee/eevee_light.hh
index 36bacf9ac8a..254d9231eef 100644
--- a/source/blender/draw/engines/eevee/eevee_light.hh
+++ b/source/blender/draw/engines/eevee/eevee_light.hh
@@ -29,7 +29,6 @@
#include "DNA_light_types.h"
#include "eevee_camera.hh"
-#include "eevee_culling.hh"
#include "eevee_id_map.hh"
#include "eevee_sampling.hh"
#include "eevee_shader.hh"
@@ -72,27 +71,6 @@ struct Light : public LightData {
/** \} */
/* -------------------------------------------------------------------- */
-/** \name CullingPass
- * \{ */
-
-class CullingLightPass {
- private:
- Instance &inst_;
-
- DRWPass *culling_ps_ = nullptr;
- const GPUUniformBuf *lights_ubo_ = nullptr;
- const GPUUniformBuf *culling_ubo_ = nullptr;
-
- public:
- CullingLightPass(Instance &inst) : inst_(inst){};
-
- void sync(void);
- void render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo);
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
/** \name LightModule
* \{ */
@@ -102,30 +80,47 @@ class CullingLightPass {
class LightModule {
friend ShadowModule;
+ public:
+ /** Scene lights data. */
+ LightDataBuf lights_data;
+ /** Shadow data. TODO(fclem): merge with lights_data. */
+ ShadowDataBuf shadows_data;
+ /** Culling infos. */
+ CullingDataBuf culling_data;
+ /** Key buffer containing only visible lights indices. */
+ CullingKeyBuf culling_key_buf;
+ /** LightData buffer used for rendering. Ordered by the culling phase. */
+ CullingLightBuf culling_light_buf;
+ /** Zbins containing min and max light index for each Z bin. */
+ CullingZbinBuf culling_zbin_buf;
+ /** Bitmap of lights touching each tiles. Using one layer for each culling batch. */
+ CullingTileBuf culling_tile_buf;
+
private:
Instance &inst_;
/** Map of light objects. This is used to track light deletion. */
Map<ObjectKey, Light> lights_;
- /** References to data in lights_ for easy indexing. */
- Vector<Light *> lights_refs_;
- /** Batches of lights alongside their culling data. */
- struct LightBatch {
- LightDataBuf lights_data;
- ShadowDataBuf shadows_data;
- };
- Culling<LightBatch, true> culling_;
- /** Active data pointers used for rendering. */
- const GPUUniformBuf *active_lights_ubo_;
- const GPUUniformBuf *active_shadows_ubo_;
- const GPUUniformBuf *active_culling_ubo_;
- GPUTexture *active_culling_tx_;
- int active_batch_ = 0;
+
+ Vector<Light *> light_refs_;
+
+ /** Follows the principles of Tiled Culling + Z binning from:
+ * "Improved Culling for Tiled and Clustered Rendering"
+ * by Michal Drobot
+ * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf */
+ DRWPass *culling_ps_ = nullptr;
+ int3 culling_tile_dispatch_size_ = int3(1);
+ /* Number of batches of lights that are separately processed. */
+ int batch_len_ = 1;
float light_threshold_;
+ /** Debug Culling visualization. */
+ DRWPass *debug_draw_ps_ = nullptr;
+ GPUTexture *input_depth_tx_ = nullptr;
+
public:
- LightModule(Instance &inst) : inst_(inst), culling_(){};
+ LightModule(Instance &inst) : inst_(inst){};
~LightModule(){};
void begin_sync(void);
@@ -134,40 +129,10 @@ class LightModule {
void set_view(const DRWView *view, const ivec2 extent, bool enable_specular = true);
- void bind_batch(int range_id);
+ void shgroup_resources(DRWShadingGroup *grp);
- /**
- * Getters
- **/
- const GPUUniformBuf **lights_ubo_ref_get(void)
- {
- return &active_lights_ubo_;
- }
- const GPUUniformBuf **shadows_ubo_ref_get(void)
- {
- return &active_shadows_ubo_;
- }
- const GPUUniformBuf **culling_ubo_ref_get(void)
- {
- return &active_culling_ubo_;
- }
- /** Returns the active Span of lights that passed the culling test. */
- Span<LightData> lights_get(void) const
- {
- const auto &batch = *culling_[active_batch_];
- Span<LightData> span = batch.item_data.lights_data;
- return span.take_front(batch.items_count_get());
- }
- GPUTexture **culling_tx_ref_get(void)
- {
- return &active_culling_tx_;
- }
- /* Return a range iterator to loop over all lights.
- * In practice, we render with light in waves of LIGHT_MAX lights at a time. */
- IndexRange index_range(void) const
- {
- return culling_.index_range();
- }
+ void debug_end_sync(void);
+ void debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz);
};
/** \} */
diff --git a/source/blender/draw/engines/eevee/eevee_lookdev.cc b/source/blender/draw/engines/eevee/eevee_lookdev.cc
index 84c77130b05..e5633ac47ee 100644
--- a/source/blender/draw/engines/eevee/eevee_lookdev.cc
+++ b/source/blender/draw/engines/eevee/eevee_lookdev.cc
@@ -302,18 +302,13 @@ void LookDev::sync_overlay(void)
GPUMaterial *gpumat = inst_.shaders.material_shader_get(
mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_LOOKDEV, false);
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, overlay_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
- DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
offset.x -= sphere_size_ + sphere_margin;
@@ -340,7 +335,6 @@ void LookDev::render_overlay(GPUFrameBuffer *fb)
inst_.lightprobes.set_view(active_view, ivec2(0));
inst_.lights.set_view(active_view, ivec2(0));
- inst_.lights.bind_batch(0);
/* Create subview for correct shading. Sub because we don not care about culling. */
const CameraData &cam = inst_.camera.data_get();
diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc
index f802303036f..d1d6e50d5d8 100644
--- a/source/blender/draw/engines/eevee/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee/eevee_shader.cc
@@ -50,7 +50,9 @@ extern char datatoc_eevee_cubemap_lib_glsl[];
extern char datatoc_eevee_culling_debug_frag_glsl[];
extern char datatoc_eevee_culling_iter_lib_glsl[];
extern char datatoc_eevee_culling_lib_glsl[];
-extern char datatoc_eevee_culling_light_frag_glsl[];
+extern char datatoc_eevee_culling_select_comp_glsl[];
+extern char datatoc_eevee_culling_sort_comp_glsl[];
+extern char datatoc_eevee_culling_tile_comp_glsl[];
extern char datatoc_eevee_deferred_direct_frag_glsl[];
extern char datatoc_eevee_deferred_holdout_frag_glsl[];
extern char datatoc_eevee_deferred_transparent_frag_glsl[];
@@ -236,7 +238,9 @@ ShaderModule::ShaderModule()
#define SHADER_FULLSCREEN(enum_, frag_) SHADER_FULLSCREEN_DEFINES(enum_, frag_, nullptr)
SHADER_FULLSCREEN(CULLING_DEBUG, eevee_culling_debug_frag);
- SHADER_FULLSCREEN(CULLING_LIGHT, eevee_culling_light_frag);
+ SHADER_COMPUTE(CULLING_SELECT, eevee_culling_select_comp, nullptr);
+ SHADER_COMPUTE(CULLING_SORT, eevee_culling_sort_comp, nullptr);
+ SHADER_COMPUTE(CULLING_TILE, eevee_culling_tile_comp, nullptr);
SHADER_FULLSCREEN(FILM_FILTER, eevee_film_filter_frag);
SHADER_FULLSCREEN(FILM_RESOLVE, eevee_film_resolve_frag);
SHADER_FULLSCREEN(FILM_RESOLVE_DEPTH, eevee_film_resolve_depth_frag);
diff --git a/source/blender/draw/engines/eevee/eevee_shader.hh b/source/blender/draw/engines/eevee/eevee_shader.hh
index f9d4fe2785d..e42e49e35c3 100644
--- a/source/blender/draw/engines/eevee/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader.hh
@@ -40,7 +40,9 @@ namespace blender::eevee {
/* Keep alphabetical order and clean prefix. */
enum eShaderType {
CULLING_DEBUG = 0,
- CULLING_LIGHT,
+ CULLING_SELECT,
+ CULLING_SORT,
+ CULLING_TILE,
DEFERRED_EVAL_DIRECT,
DEFERRED_EVAL_HOLDOUT,
diff --git a/source/blender/draw/engines/eevee/eevee_shader_shared.hh b/source/blender/draw/engines/eevee/eevee_shader_shared.hh
index 4675d9cc882..6801d4cbd59 100644
--- a/source/blender/draw/engines/eevee/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader_shared.hh
@@ -178,6 +178,52 @@ BLI_STATIC_ASSERT_ALIGN(CameraData, 16)
/** \name Film
* \{ */
+enum eDebugMode : uint32_t {
+ /* TODO(fclem) Rename shadow cases. */
+ SHADOW_DEBUG_NONE = 0u,
+ /**
+ * Gradient showing light evaluation hotspots.
+ */
+ DEBUG_LIGHT_CULLING = 4u,
+ /**
+ * Tilemaps to screen. Is also present in other modes.
+ * - Black pixels, no pages allocated.
+ * - Green pixels, pages cached.
+ * - Red pixels, pages allocated.
+ */
+ SHADOW_DEBUG_TILEMAPS = 5u,
+ /**
+ * Random color per pages. Validates page density allocation and sampling.
+ */
+ SHADOW_DEBUG_PAGES = 6u,
+ /**
+ * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
+ * Black means not covered by any tilemaps LOD of the shadow.
+ */
+ SHADOW_DEBUG_LOD = 7u,
+ /**
+ * Outputs white pixels for pages allocated and black pixels for unused pages.
+ * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work.
+ */
+ SHADOW_DEBUG_PAGE_ALLOCATION = 8u,
+ /**
+ * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
+ * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+ */
+ SHADOW_DEBUG_TILE_ALLOCATION = 9u,
+ /**
+ * Visualize linear depth stored in the atlas regions of the active light.
+ * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+ */
+ SHADOW_DEBUG_SHADOW_DEPTH = 10u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
enum eFilmDataType : uint32_t {
/** Color is accumulated using the pixel filter. No negative values. */
FILM_DATA_COLOR = 0u,
@@ -341,34 +387,44 @@ BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16)
/** \name Cullings
* \{ */
-/* Number of items in a culling batch. Needs to be Power of 2. */
+/* TODO(fclem) Rename this. Only used by probes now. */
#define CULLING_ITEM_BATCH 128
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536. */
+/* Current limiting factor is the sorting phase which is single pass and only sort within a
+ * threadgroup which maximum size is 1024. */
+#define CULLING_BATCH_SIZE 1024
/* Maximum number of 32 bit uint stored per tile. */
-#define CULLING_MAX_WORD ((CULLING_ITEM_BATCH + 1) / 32)
-/* TODO(fclem) Support more than 4 words using layered texture for culling result. */
-#if CULLING_MAX_WORD > 4
-# error "CULLING_MAX_WORD is greater than supported maximum."
-#endif
-/* Fine grained subdivision in the Z direction. */
-#define CULLING_ZBIN_COUNT 4088
+#define CULLING_MAX_WORD (CULLING_BATCH_SIZE / 32)
+/* Fine grained subdivision in the Z direction (Must be multiple of CULLING_BATCH_SIZE). */
+#define CULLING_ZBIN_COUNT 4096
struct CullingData {
- /* Linearly distributed z-bins with encoded uint16_t min and max index. */
- /* NOTE: due to alignment restrictions of uint arrays, use uvec4. */
- uvec4 zbins[CULLING_ZBIN_COUNT / 4];
- /* Extent of one square tile in pixels. */
- int tile_size;
- /* Valid item count in the data array. */
- uint items_count;
- /* Scale and bias applied to linear Z to get zbin. */
+ /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+ vec2 tile_to_uv_fac;
+ /** Scale and bias applied to linear Z to get zbin. */
float zbin_scale;
float zbin_bias;
- /* Scale applied to tile pixel coordinates to get target UV coordinate. */
- vec2 tile_to_uv_fac;
- vec2 _pad0;
+ /** Valid item count in the source data array. */
+ uint items_count;
+ /** Number of items that passes the first culling test. */
+ uint visible_count;
+ /** Will disable specular during light data copy.. */
+ bool enable_specular;
+ /** Extent of one square tile in pixels. */
+ uint tile_size;
+ /** Number of tiles on the X/Y axis. */
+ uint tile_x_len;
+ uint tile_y_len;
+ /** Number of word per tile. Depends on the maximum number of lights. */
+ uint tile_word_len;
+ int _pad0;
};
BLI_STATIC_ASSERT_ALIGN(CullingData, 16)
-BLI_STATIC_ASSERT_SIZE(CullingData, UBO_MIN_MAX_SUPPORTED_SIZE)
+
+#define CullingZBin uint
+#define CullingWord uint
static inline int culling_z_to_zbin(CullingData data, float z)
{
@@ -542,41 +598,6 @@ struct ShadowTileMapData {
};
BLI_STATIC_ASSERT_ALIGN(ShadowTileMapData, 16)
-enum eShadowDebug : uint32_t {
- SHADOW_DEBUG_NONE = 0u,
- /**
- * Tilemaps to screen. Is also present in other modes.
- * - Black pixels, no pages allocated.
- * - Green pixels, pages cached.
- * - Red pixels, pages allocated.
- */
- SHADOW_DEBUG_TILEMAPS = 1u,
- /**
- * Random color per pages. Validates page density allocation and sampling.
- */
- SHADOW_DEBUG_PAGES = 2u,
- /**
- * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
- * Black means not covered by any tilemaps LOD of the shadow.
- */
- SHADOW_DEBUG_LOD = 3u,
- /**
- * Outputs white pixels for pages allocated and black pixels for unused pages.
- * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work.
- */
- SHADOW_DEBUG_PAGE_ALLOCATION = 4u,
- /**
- * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
- * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
- */
- SHADOW_DEBUG_TILE_ALLOCATION = 5u,
- /**
- * Visualize linear depth stored in the atlas regions of the active light.
- * This way, one can check if the rendering, the copying and the shadow sampling functions works.
- */
- SHADOW_DEBUG_SHADOW_DEPTH = 6u
-};
-
/**
* Shadow data for debugging the active light shadow.
*/
@@ -584,7 +605,7 @@ struct ShadowDebugData {
LightData light;
ShadowData shadow;
vec3 camera_position;
- eShadowDebug type;
+ eDebugMode type;
int tilemap_data_index;
int _pad1;
int _pad2;
@@ -869,19 +890,23 @@ vec4 utility_tx_sample(vec2 uv, float layer);
#ifdef __cplusplus
using CameraDataBuf = StructBuffer<CameraData>;
using CubemapDataBuf = StructArrayBuffer<CubemapData, CULLING_ITEM_BATCH>;
-using CullingDataBuf = StructBuffer<CullingData>;
+using CullingDataBuf = StorageBuffer<CullingData>;
+using CullingKeyBuf = StorageArrayBuffer<uint, CULLING_BATCH_SIZE, true>;
+using CullingLightBuf = StorageArrayBuffer<LightData, CULLING_BATCH_SIZE, true>;
+using CullingTileBuf = StorageArrayBuffer<uint, 16 * 16 * CULLING_MAX_WORD, true>;
+using CullingZbinBuf = StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
using DepthOfFieldDataBuf = StructBuffer<DepthOfFieldData>;
using GridDataBuf = StructArrayBuffer<GridData, GRID_MAX>;
using HiZDataBuf = StructBuffer<HiZData>;
-using LightDataBuf = StructArrayBuffer<LightData, CULLING_ITEM_BATCH>;
+using LightDataBuf = StorageArrayBuffer<LightData, CULLING_BATCH_SIZE>;
using LightProbeFilterDataBuf = StructBuffer<LightProbeFilterData>;
using LightProbeInfoDataBuf = StructBuffer<LightProbeInfoData>;
using RaytraceBufferDataBuf = StructBuffer<RaytraceBufferData>;
using RaytraceDataBuf = StructBuffer<RaytraceData>;
-using ShadowDataBuf = StructArrayBuffer<ShadowData, CULLING_ITEM_BATCH>;
-using ShadowTileMapDataBuf = StorageArrayBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
-using ShadowPageHeapBuf = StorageArrayBuffer<ShadowPagePacked, SHADOW_MAX_PAGE, true>;
+using ShadowDataBuf = StorageArrayBuffer<ShadowData, CULLING_BATCH_SIZE>;
using ShadowDebugDataBuf = StructBuffer<ShadowDebugData>;
+using ShadowPageHeapBuf = StorageArrayBuffer<ShadowPagePacked, SHADOW_MAX_PAGE, true>;
+using ShadowTileMapDataBuf = StorageArrayBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
using SubsurfaceDataBuf = StructBuffer<SubsurfaceData>;
using VelocityObjectBuf = StructBuffer<VelocityObjectData>;
diff --git a/source/blender/draw/engines/eevee/eevee_shading.cc b/source/blender/draw/engines/eevee/eevee_shading.cc
index 37501b561c8..3d802299a05 100644
--- a/source/blender/draw/engines/eevee/eevee_shading.cc
+++ b/source/blender/draw/engines/eevee/eevee_shading.cc
@@ -96,23 +96,17 @@ DRWShadingGroup *ForwardPass::material_opaque_add(::Material *blender_mat, GPUMa
{
DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
if (true) {
DRW_shgroup_uniform_texture_ref(
@@ -143,23 +137,17 @@ DRWShadingGroup *ForwardPass::material_transparent_add(::Material *blender_mat,
GPUMaterial *gpumat)
{
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
if (true) {
DRW_shgroup_uniform_texture_ref(
@@ -224,9 +212,6 @@ void ForwardPass::render(GBuffer &gbuffer, HiZBuffer &hiz, GPUFrameBuffer *view_
GPU_framebuffer_bind(view_fb);
}
- /* Only one batch of light is supported. */
- inst_.lights.bind_batch(0);
-
DRW_draw_pass(prepass_ps_);
DRW_draw_pass(opaque_ps_);
@@ -305,10 +290,7 @@ void DeferredLayer::volume_add(Object *ob)
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_VOLUME);
DRWShadingGroup *grp = DRW_shgroup_create(sh, volume_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_texture_ref(grp, "depth_max_tx", &deferred_pass.input_depth_behind_tx_);
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
DRW_shgroup_stencil_set(grp, CLOSURE_VOLUME | CLOSURE_TRANSPARENCY, 0xFF, 0xFF);
@@ -371,12 +353,8 @@ void DeferredLayer::render(GBuffer &gbuffer,
// gbuffer.copy_depth_behind();
// deferred_pass.input_depth_behind_tx_ = gbuffer.depth_behind_tx;
- for (auto index : inst_.lights.index_range()) {
- inst_.lights.bind_batch(index);
-
- gbuffer.bind_volume();
- DRW_draw_pass(volume_ps_);
- }
+ gbuffer.bind_volume();
+ DRW_draw_pass(volume_ps_);
}
if (use_holdout) {
@@ -404,22 +382,16 @@ void DeferredLayer::render(GBuffer &gbuffer,
rt_buffer.resolve(CLOSURE_REFRACTION, gbuffer);
}
- for (auto index : inst_.lights.index_range()) {
- inst_.lights.bind_batch(index);
-
- if (!no_volumes) {
- /* TODO(fclem) volume fb. */
- GPU_framebuffer_bind(view_fb);
- DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_);
- }
-
- if (!no_surfaces) {
- gbuffer.bind_radiance();
- DRW_draw_pass(deferred_pass.eval_direct_ps_);
- }
+ if (!no_volumes) {
+ /* TODO(fclem) volume fb. */
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_);
}
if (!no_surfaces) {
+ gbuffer.bind_radiance();
+ DRW_draw_pass(deferred_pass.eval_direct_ps_);
+
if (use_diffuse) {
rt_buffer.trace(CLOSURE_DIFFUSE, gbuffer, hiz_front, hiz_front);
rt_buffer.denoise(CLOSURE_DIFFUSE);
@@ -469,7 +441,6 @@ void DeferredPass::sync(void)
volumetric_layer_.sync();
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
@@ -479,19 +450,14 @@ void DeferredPass::sync(void)
eval_direct_ps_ = DRW_pass_create("DeferredDirect", state);
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_DIRECT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_direct_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
DRW_shgroup_uniform_texture_ref_ex(
grp, "emission_data_tx", &input_emission_data_tx_, no_interp);
DRW_shgroup_uniform_texture_ref_ex(
@@ -535,13 +501,8 @@ void DeferredPass::sync(void)
eval_volume_homogeneous_ps_ = DRW_pass_create("DeferredVolume", state);
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_VOLUME);
DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_volume_homogeneous_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
DRW_shgroup_uniform_texture_ref_ex(
grp, "transparency_data_tx", &input_transparency_data_tx_, no_interp);
DRW_shgroup_uniform_texture_ref_ex(grp, "volume_data_tx", &input_volume_data_tx_, no_interp);
diff --git a/source/blender/draw/engines/eevee/eevee_shading.hh b/source/blender/draw/engines/eevee/eevee_shading.hh
index 8adbecf3dd5..b84739a7220 100644
--- a/source/blender/draw/engines/eevee/eevee_shading.hh
+++ b/source/blender/draw/engines/eevee/eevee_shading.hh
@@ -30,7 +30,6 @@
#include "eevee_lut.h"
-#include "eevee_culling.hh"
#include "eevee_gbuffer.hh"
#include "eevee_raytracing.hh"
#include "eevee_shadow.hh"
@@ -280,38 +279,24 @@ class UtilityTexture : public Texture {
*/
class ShadingPasses {
public:
- CullingLightPass light_culling;
-
BackgroundPass background;
DeferredPass deferred;
ForwardPass forward;
ShadowPass shadow;
VelocityPass velocity;
- CullingDebugPass debug_culling;
-
UtilityTexture utility_tx;
public:
ShadingPasses(Instance &inst)
- : light_culling(inst),
- background(inst),
- deferred(inst),
- forward(inst),
- shadow(inst),
- velocity(inst),
- debug_culling(inst){};
+ : background(inst), deferred(inst), forward(inst), shadow(inst), velocity(inst){};
void sync()
{
- light_culling.sync();
-
deferred.sync();
forward.sync();
shadow.sync();
velocity.sync();
-
- debug_culling.sync();
}
DRWShadingGroup *material_add(::Material *blender_mat,
diff --git a/source/blender/draw/engines/eevee/eevee_shadow.cc b/source/blender/draw/engines/eevee/eevee_shadow.cc
index 3c132bd9ac7..f869d9e643d 100644
--- a/source/blender/draw/engines/eevee/eevee_shadow.cc
+++ b/source/blender/draw/engines/eevee/eevee_shadow.cc
@@ -554,32 +554,16 @@ void ShadowModule::init(void)
inst_.sampling.reset();
}
- switch (G.debug_value) {
- case 4:
- debug_data_.type = SHADOW_DEBUG_TILEMAPS;
- break;
- case 5:
- debug_data_.type = SHADOW_DEBUG_LOD;
- break;
- case 6:
- debug_data_.type = SHADOW_DEBUG_PAGE_ALLOCATION;
#ifndef SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED
- BLI_assert_msg(0,
- "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but "
- "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED "
- "is not defined");
-#endif
- break;
- case 7:
- debug_data_.type = SHADOW_DEBUG_TILE_ALLOCATION;
- break;
- case 8:
- debug_data_.type = SHADOW_DEBUG_SHADOW_DEPTH;
- break;
- default:
- debug_data_.type = SHADOW_DEBUG_NONE;
- break;
+ if (inst_.debug_mode == SHADOW_DEBUG_PAGE_ALLOCATION) {
+ BLI_assert_msg(0,
+ "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but "
+ "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED "
+ "is not defined");
}
+#endif
+
+ debug_data_.type = inst_.debug_mode;
memset(views_, 0, sizeof(views_));
}
diff --git a/source/blender/draw/engines/eevee/eevee_view.cc b/source/blender/draw/engines/eevee/eevee_view.cc
index 7b4516dd727..e868bad0259 100644
--- a/source/blender/draw/engines/eevee/eevee_view.cc
+++ b/source/blender/draw/engines/eevee/eevee_view.cc
@@ -109,9 +109,6 @@ void ShadingView::sync(ivec2 render_extent_)
view_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_));
- /* Reuse postfx_tx_. */
- debug_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(postfx_tx_));
-
gbuffer_.sync(depth_tx_, combined_tx_, owner);
}
}
@@ -150,6 +147,7 @@ void ShadingView::render(void)
inst_.shading_passes.forward.render(gbuffer_, hiz_front_, view_fb_);
+ inst_.lights.debug_draw(view_fb_, hiz_front_);
inst_.shadows.debug_draw(view_fb_, hiz_front_);
velocity_.render(depth_tx_);
@@ -160,15 +158,7 @@ void ShadingView::render(void)
GPUTexture *final_radiance_tx = render_post(combined_tx_);
- /* TODO(fclem) Have a special renderpass for this. */
- if (G.debug_value == 3) {
- GPU_framebuffer_bind(debug_fb_);
- inst_.shading_passes.debug_culling.render(depth_tx_);
-
- // inst_.render_passes.debug_culling->accumulate(debug_tx_, sub_view_);
- inst_.render_passes.combined->accumulate(postfx_tx_, sub_view_);
- }
- else if (inst_.render_passes.combined) {
+ if (inst_.render_passes.combined) {
inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_);
}
diff --git a/source/blender/draw/engines/eevee/eevee_view.hh b/source/blender/draw/engines/eevee/eevee_view.hh
index 44512c69ef4..4f1aae0d825 100644
--- a/source/blender/draw/engines/eevee/eevee_view.hh
+++ b/source/blender/draw/engines/eevee/eevee_view.hh
@@ -78,7 +78,6 @@ class ShadingView {
/** Owned resources. */
eevee::Framebuffer view_fb_;
- eevee::Framebuffer debug_fb_;
/** Draw resources. Not owned. */
GPUTexture *combined_tx_ = nullptr;
GPUTexture *depth_tx_ = nullptr;
diff --git a/source/blender/draw/engines/eevee/eevee_wrapper.hh b/source/blender/draw/engines/eevee/eevee_wrapper.hh
index 96007f0dd6d..d5daa247d1c 100644
--- a/source/blender/draw/engines/eevee/eevee_wrapper.hh
+++ b/source/blender/draw/engines/eevee/eevee_wrapper.hh
@@ -139,6 +139,8 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
T *data_ = nullptr;
/* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */
GPUVertBuf *ssbo_;
+ /* Currently allocated size. */
+ int64_t size;
#ifdef DEBUG
const char *name_ = typeid(T).name();
@@ -149,22 +151,36 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
public:
StorageArrayBuffer()
{
- BLI_assert(((sizeof(T) * len) % 16) == 0);
+ init(len);
+ }
+ ~StorageArrayBuffer()
+ {
+ GPU_vertbuf_discard(ssbo_);
+ }
+
+ void init(int64_t new_size)
+ {
+ size = new_size;
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage);
- GPU_vertbuf_data_alloc(ssbo_, (sizeof(T) / 4) * len);
+ GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T) * size, 4));
if (!device_only) {
data_ = (T *)GPU_vertbuf_get_data(ssbo_);
GPU_vertbuf_use(ssbo_);
}
}
- ~StorageArrayBuffer()
+
+ void resize(int64_t new_size)
{
- GPU_vertbuf_discard(ssbo_);
+ BLI_assert(new_size > 0);
+ if (new_size != size) {
+ GPU_vertbuf_discard(ssbo_);
+ this->init(new_size);
+ }
}
void push_update(void)
@@ -179,6 +195,11 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
return ssbo_;
}
+ /* To be able to use it with DRW_shgroup_*_ref(). */
+ GPUVertBuf **operator&()
+ {
+ return &ssbo_;
+ }
/**
* Get the value at the given index. This invokes undefined behavior when the index is out of
@@ -188,7 +209,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
BLI_assert(!device_only);
BLI_assert(index >= 0);
- BLI_assert(index < len);
+ BLI_assert(index < size);
return data_[index];
}
@@ -196,7 +217,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
BLI_assert(!device_only);
BLI_assert(index >= 0);
- BLI_assert(index < len);
+ BLI_assert(index < size);
return data_[index];
}
@@ -247,6 +268,68 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
};
/** Simpler version where data is not an array. */
+template<
+ /** Type of the values stored in this uniform buffer. */
+ typename T,
+ /** True if created on device and no memory host memory is allocated. */
+ bool device_only = false>
+class StorageBuffer : public T, NonMovable, NonCopyable {
+ private:
+ /* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */
+ GPUVertBuf *ssbo_;
+
+#ifdef DEBUG
+ const char *name_ = typeid(T).name();
+#else
+ constexpr static const char *name_ = "StorageBuffer";
+#endif
+
+ public:
+ StorageBuffer()
+ {
+ GPUVertFormat format = {0};
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+
+ GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
+ ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage);
+ GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T), 4));
+ if (!device_only) {
+ GPU_vertbuf_use(ssbo_);
+ }
+ }
+ ~StorageBuffer()
+ {
+ GPU_vertbuf_discard(ssbo_);
+ }
+
+ void push_update(void)
+ {
+ BLI_assert(!device_only);
+ /* TODO(fclem): Avoid a full copy. */
+ T *data = (T *)GPU_vertbuf_get_data(ssbo_);
+ *data = *this;
+
+ GPU_vertbuf_use(ssbo_);
+ }
+
+ operator GPUVertBuf *() const
+ {
+ return ssbo_;
+ }
+ /* To be able to use it with DRW_shgroup_*_ref(). */
+ GPUVertBuf **operator&()
+ {
+ return &ssbo_;
+ }
+
+ StorageBuffer<T> &operator=(const T &other)
+ {
+ *static_cast<T *>(this) = other;
+ return *this;
+ }
+};
+
+/** Simpler version where data is not an array. */
template<typename T> class StructBuffer : public T, NonMovable, NonCopyable {
private:
GPUUniformBuf *ubo_;
@@ -366,6 +449,42 @@ class Texture {
}
/* Return true is a texture has been created. */
+ bool ensure(const char *name,
+ int w,
+ int h,
+ int d,
+ int mips,
+ eGPUTextureFormat format,
+ bool layered = false)
+ {
+
+ /* TODO(fclem) In the future, we need to check if mip_count did not change.
+ * For now it's ok as we always define all mip level.*/
+ if (tx_) {
+ int3 size = this->size();
+ BLI_assert(GPU_texture_array(tx_) == layered);
+ if (size != int3(w, h, d) || GPU_texture_format(tx_) != format) {
+ GPU_TEXTURE_FREE_SAFE(tx_);
+ }
+ }
+ if (tx_ == nullptr) {
+ if (layered) {
+ tx_ = GPU_texture_create_2d_array(name, w, h, d, mips, format, nullptr);
+ }
+ else {
+ tx_ = GPU_texture_create_3d(name, w, h, d, mips, format, GPU_DATA_FLOAT, nullptr);
+ }
+ if (mips > 1) {
+ /* TODO(fclem) Remove once we have immutable storage or when mips are
+ * generated on creation. */
+ GPU_texture_generate_mipmap(tx_);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /* Return true is a texture has been created. */
bool ensure(const char *name, int w, int h, int mips, eGPUTextureFormat format)
{
/* TODO(fclem) In the future, we need to check if mip_count did not change.
@@ -474,6 +593,10 @@ class Texture {
return &tx_;
}
+ bool is_valid(void) const
+ {
+ return !!tx_;
+ }
int width(void) const
{
return GPU_texture_width(tx_);
@@ -482,6 +605,12 @@ class Texture {
{
return GPU_texture_height(tx_);
}
+ int3 size(void) const
+ {
+ int3 size;
+ GPU_texture_get_mipmap_size(tx_, 0, size);
+ return size;
+ }
};
class Framebuffer {
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
index f559788145d..33734324445 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
@@ -2,25 +2,34 @@
/**
* Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
* Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
- * pass is not conservative enough). This shader will only work on the last light batch so remove
- * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights.
+ * pass is not conservative enough).
*/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
{
- CullingData culling;
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
+{
+ CullingData light_culling;
+};
+
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
};
-uniform usampler2D item_culling_tx;
uniform sampler2D depth_tx;
in vec4 uvcoordsvar;
@@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color;
void main(void)
{
- float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r;
+ float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r;
float vP_z = get_view_z_from_depth(depth);
vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
float lights_count = 0.0;
uint lights_cull = 0u;
- ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
lights_cull |= 1u << l_idx;
lights_count += 1.0;
@@ -44,7 +53,7 @@ void main(void)
ITEM_FOREACH_END
uint lights_nocull = 0u;
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
+ ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) {
LightData light = lights[l_idx];
if (distance(light._position, P) < light.influence_radius_max) {
lights_nocull |= 1u << l_idx;
@@ -57,6 +66,6 @@ void main(void)
out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
}
else {
- out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0);
+ out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0);
}
} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
index a0ea075db22..640ffb4a6a1 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
@@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min)
return ~mask << bit_min;
}
-uint zbin_mask(int word_index, int zbin_min, int zbin_max)
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
{
- int local_min = clamp(zbin_min - word_index * 32, 0, 31);
- int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32);
- return bit_field_mask(uint(mask_width), uint(local_min));
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
}
/* Waiting to implement extensions support. We need:
@@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max)
# define subgroupBroadcastFirst(a) a
#endif
-#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \
+#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \
{ \
- int zbin_index = culling_z_to_zbin(_culling, _linearz); \
- zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \
- uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \
- int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \
- int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \
- /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
- max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
- int word_min = 0; \
- int word_max = max(0, CULLING_MAX_WORD - 1); \
- word_min = max(min_index / 32, word_min); \
- word_max = min(max_index / 32, word_max); \
- for (int word_index = word_min; word_index <= word_max; word_index++) { \
- /* TODO(fclem) Support bigger max_word with larger texture. */ \
- ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \
- uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \
- uint mask = zbin_mask(word_index, min_index, max_index); \
- word &= mask; \
+ uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \
+ uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ for (uint batch = 0; batch < batch_count; batch++) { \
+ int zbin_index = culling_z_to_zbin(_culling, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
/* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- word = subgroupBroadcastFirst(subgroupOr(word)); \
- /* TODO(fclem) Replace by findLSB on supported hardware. */ \
- for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \
- if ((word & 1u) != 0u) { \
- int _item_index = word_index * 32 + int(i);
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ uint word_min = min_index / 32u; \
+ uint word_max = max_index / 32u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ while (word != 0u) { \
+ uint bit_index = uint(findLSB(word)); \
+ word &= ~1u << bit_index; \
+ uint _item_index = word_idx * 32u + bit_index;
/* No culling. Iterate over all items. */
#define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \
{ \
{ \
{ \
- for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) {
+ for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) {
#define ITEM_FOREACH_END \
} \
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
index f128b89e864..27a39817140 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
@@ -7,11 +7,6 @@
/** \name Intersection Tests
* \{ */
-struct Sphere {
- vec3 position;
- float radius;
-};
-
struct Cone {
vec3 direction;
float angle_cos;
@@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
* by Eric Zhang
* https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
*/
- float sphere_distance = length(sphere.position);
+ float sphere_distance = length(sphere.center);
float sphere_sin = saturate(sphere.radius / sphere_distance);
float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
- float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction);
+ float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction);
/* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
-1.0 :
@@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder)
{
- float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy);
+ float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy);
return (distance_squared < sqr(cylinder.radius + sphere.radius));
}
bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
{
- if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) {
return false;
}
return true;
@@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile)
{
/* Culling in view space for precision and simplicity. */
- sphere.position = transform_point(ViewMatrix, sphere.position);
+ sphere.center = transform_point(ViewMatrix, sphere.center);
bool isect;
/* Test tile intersection using bounding cone or bounding cylinder.
* This has less false positive cases when the sphere is large. */
@@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset)
return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0;
}
-CullingTile culling_tile_get(CullingData culling)
+CullingTile culling_tile_get(CullingData culling, uvec2 tile_co)
{
+ vec2 ftile = vec2(tile_co);
/* Culling frustum corners for this tile. */
vec3 corners[8];
- corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5));
- corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5));
- corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5));
- corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5));
+ corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1));
+ corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0));
+ corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0));
+ corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1));
/* The corners depth only matter for precision. Use a mix of not so close to clip plane to
* avoid small float imprecision if near clip is low. */
corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
deleted file mode 100644
index c81a94b35f3..00000000000
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/**
- * 2D Culling pass for lights.
- * We iterate over all items and check if they intersect with the tile frustum.
- */
-
-#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-
-layout(std140) uniform lights_block
-{
- LightData lights[CULLING_ITEM_BATCH];
-};
-
-layout(std140) uniform lights_culling_block
-{
- CullingData culling;
-};
-
-in vec4 uvcoordsvar;
-
-layout(location = 0) out uvec4 out_items_bits;
-
-void main(void)
-{
- CullingTile tile = culling_tile_get(culling);
-
- out_items_bits = uvec4(0);
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
- LightData light = lights[l_idx];
-
- bool intersect_tile = true;
- switch (light.type) {
- case LIGHT_SPOT:
- /* TODO cone culling. */
- case LIGHT_RECT:
- case LIGHT_ELLIPSE:
- case LIGHT_POINT:
- Sphere sphere = Sphere(light._position, light.influence_radius_max);
- intersect_tile = culling_sphere_tile_isect(sphere, tile);
- break;
- default:
- break;
- }
-
- if (intersect_tile) {
- out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u);
- }
- }
- ITEM_FOREACH_END
-} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
new file mode 100644
index 00000000000..138e54b8bae
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersection_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_ITEM_BATCH) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) restrict buffer key_buf
+{
+ uint keys[];
+};
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= culling.items_count) {
+ return;
+ }
+
+ LightData light = lights[l_idx];
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SUN:
+ sphere = Sphere(cameraPos, ViewFar * 2.0);
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(culling.visible_count, 1);
+ keys[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
new file mode 100644
index 00000000000..dfd2c80a45a
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
@@ -0,0 +1,138 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer and associated zbins.
+ * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins.
+ * One thread process one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_BATCH_SIZE) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) readonly restrict buffer key_buf
+{
+ uint keys[];
+};
+
+layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf
+{
+ CullingZBin out_zbins[];
+};
+
+layout(std430, binding = 4) writeonly restrict buffer out_items_buf
+{
+ LightData out_lights[];
+};
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= culling.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = culling.visible_count - 1;
+ valid_thread = false;
+ }
+
+ uint key = keys[src_index];
+ LightData light = lights[key];
+
+ if (!culling.enable_specular) {
+ light.specular_power = 0.0;
+ }
+
+ int index = 0;
+ int contenders = 0;
+
+ /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */
+ vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position;
+ float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max;
+ float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos);
+
+ int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1);
+ int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1);
+
+ if (!valid_thread) {
+ /* Do not register invalid threads. */
+ z_max = z_min - 1;
+ }
+
+ /* Fits the limit of 32KB. */
+ shared int zbin_max[CULLING_ZBIN_COUNT];
+ shared int zbin_min[CULLING_ZBIN_COUNT];
+ /* Compilers do not release shared memory from early declaration.
+ * So we are forced to reuse the same variables in another form. */
+#define z_dists zbin_max
+#define contender_table zbin_min
+
+ /**
+ * Find how many values are before the local value.
+ * This finds the first possible destination index.
+ */
+ z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist);
+ barrier();
+
+ const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE;
+ uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start);
+ for (uint i = 0; i < i_max; i++) {
+ float ref = intBitsToFloat(z_dists[i]);
+ if (ref > z_dist) {
+ index++;
+ }
+ else if (ref == z_dist) {
+ contenders++;
+ }
+ }
+
+ atomicExchange(contender_table[index], contenders);
+ barrier();
+
+ if (valid_thread) {
+ /**
+ * For each clashing index (where two lights have exactly the same z distances)
+ * we use an atomic counter to know how much to offset from the disputed index.
+ */
+ index += atomicAdd(contender_table[index], -1) - 1;
+ index += int(i_start);
+ out_lights[index] = light;
+ }
+
+ const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE);
+ const uint zbin_local = gl_LocalInvocationID.x * iter;
+ const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local;
+
+ for (uint i = 0u, l = zbin_local; i < iter; i++, l++) {
+ zbin_max[l] = 0x0000;
+ zbin_min[l] = 0xFFFF;
+ }
+ barrier();
+
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ barrier();
+
+ /* Write result to zbins buffer. */
+ for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) {
+ /* Pack min & max into 1 uint. */
+ out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]);
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
new file mode 100644
index 00000000000..913e094980e
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
@@ -0,0 +1,73 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
+
+layout(local_size_x = 1024) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf
+{
+ CullingWord culling_words[];
+};
+
+void main(void)
+{
+ uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len);
+
+ if (tile_co.y >= culling.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = culling_tile_get(culling, tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, culling.visible_count);
+ uint word = 0u;
+
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = lights[l_idx];
+
+ bool intersect_tile;
+ switch (light.type) {
+ case LIGHT_SUN:
+ intersect_tile = true;
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ Sphere sphere = Sphere(light._position, light.influence_radius_max);
+ intersect_tile = culling_sphere_tile_isect(sphere, tile);
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx & 0x1Fu);
+ }
+ }
+
+ culling_words[gl_GlobalInvocationID.x] = word;
+} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
index 942f75961e9..14e38d6f1d6 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
@@ -17,19 +17,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx;
uniform sampler2D reflect_color_tx;
uniform sampler2D reflect_normal_tx;
uniform sampler1D sss_transmittance_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
index 677881abd71..068db3e78fd 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
@@ -15,24 +15,33 @@
#pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
uniform sampler2D transparency_data_tx;
uniform usampler2D volume_data_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2DShadow shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
index 448e5b54886..d3d5f859174 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
@@ -4,9 +4,10 @@
* A prototype needs to be declared before main in order to use it.
*
* The resources expected to be defined are:
- * - light_culling
- * - lights_culling_tx
* - lights
+ * - lights_zbins
+ * - light_culling
+ * - lights_culling_words
* - shadows
* - shadow_atlas_tx
* - shadow_tilemaps_tx
@@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse,
vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER);
float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x;
- ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
vec3 L;
float dist;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
index 9723d24544c..152bfbeacec 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
@@ -25,19 +25,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -75,7 +85,6 @@ layout(std140) uniform hiz_block
HiZData hiz;
};
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;