Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2021-11-23 23:24:00 +0300
committerClément Foucault <foucault.clem@gmail.com>2021-12-04 00:41:37 +0300
commit1b00ca35758dacf7ece7b95275ea3c41e53bec6c (patch)
treeb587d29f39a3cffd5c88ab86ceea674a1aaeb1b4
parent68b0195bf3e27ee687eb87e37257891a3a7f4e78 (diff)
EEVEE: Light: Port light culling to GPU
This removes the light count limit for the forward shaded object. This also provides a more efficient way of computing the culling directly on the GPU. Moreover, this avoids doing multiple lighting passes for high light counts in the deferred pipeline, improving performance.
-rw-r--r--source/blender/draw/CMakeLists.txt5
-rw-r--r--source/blender/draw/engines/eevee/eevee_culling.cc68
-rw-r--r--source/blender/draw/engines/eevee/eevee_culling.hh338
-rw-r--r--source/blender/draw/engines/eevee/eevee_instance.cc3
-rw-r--r--source/blender/draw/engines/eevee/eevee_instance.hh2
-rw-r--r--source/blender/draw/engines/eevee/eevee_light.cc221
-rw-r--r--source/blender/draw/engines/eevee/eevee_light.hh105
-rw-r--r--source/blender/draw/engines/eevee/eevee_lookdev.cc8
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader.cc8
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader.hh4
-rw-r--r--source/blender/draw/engines/eevee/eevee_shader_shared.hh147
-rw-r--r--source/blender/draw/engines/eevee/eevee_shading.cc67
-rw-r--r--source/blender/draw/engines/eevee/eevee_shading.hh17
-rw-r--r--source/blender/draw/engines/eevee/eevee_shadow.cc32
-rw-r--r--source/blender/draw/engines/eevee/eevee_view.cc14
-rw-r--r--source/blender/draw/engines/eevee/eevee_view.hh1
-rw-r--r--source/blender/draw/engines/eevee/eevee_wrapper.hh141
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl31
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl61
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl32
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl51
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl138
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl73
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl21
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl7
-rw-r--r--source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl21
-rw-r--r--source/blender/draw/intern/shaders/common_intersection_lib.glsl29
-rw-r--r--source/blender/draw/intern/shaders/common_math_geom_lib.glsl11
-rw-r--r--source/blender/draw/intern/shaders/common_math_lib.glsl5
31 files changed, 860 insertions, 879 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 27fcf32915c..5d80c7bf36e 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -132,7 +132,6 @@ set(SRC
engines/image/image_engine.c
engines/image/image_shader.c
engines/eevee/eevee_camera.cc
- engines/eevee/eevee_culling.cc
engines/eevee/eevee_depth_of_field.cc
engines/eevee/eevee_engine.c
engines/eevee/eevee_engine.cc
@@ -270,7 +269,9 @@ data_to_c_simple(engines/eevee/shaders/eevee_cubemap_lib.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_culling_debug_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_culling_iter_lib.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_culling_lib.glsl SRC)
-data_to_c_simple(engines/eevee/shaders/eevee_culling_light_frag.glsl SRC)
+data_to_c_simple(engines/eevee/shaders/eevee_culling_select_comp.glsl SRC)
+data_to_c_simple(engines/eevee/shaders/eevee_culling_sort_comp.glsl SRC)
+data_to_c_simple(engines/eevee/shaders/eevee_culling_tile_comp.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_deferred_direct_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_deferred_holdout_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/eevee_deferred_transparent_frag.glsl SRC)
diff --git a/source/blender/draw/engines/eevee/eevee_culling.cc b/source/blender/draw/engines/eevee/eevee_culling.cc
deleted file mode 100644
index c54c7fa9320..00000000000
--- a/source/blender/draw/engines/eevee/eevee_culling.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright 2021, Blender Foundation.
- */
-
-/** \file
- * \ingroup eevee
- *
- * A culling object is a data structure that contains fine grained culling
- * of entities against in the whole view frustum. The Culling structure contains the
- * final entity list since it has to have a special order.
- *
- * Follows the principles of Tiled Culling + Z binning from:
- * "Improved Culling for Tiled and Clustered Rendering"
- * by Michal Drobot
- * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
- */
-
-#include "eevee_instance.hh"
-
-#include "eevee_culling.hh"
-
-namespace blender::eevee {
-
-/* -------------------------------------------------------------------- */
-/** \name CullingDebugPass
- * \{ */
-
-void CullingDebugPass::sync(void)
-{
- LightModule &lights = inst_.lights;
-
- debug_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR);
-
- GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG);
- DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
- DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
-}
-
-void CullingDebugPass::render(GPUTexture *input_depth_tx)
-{
- input_depth_tx_ = input_depth_tx;
-
- inst_.lights.bind_batch(0);
-
- DRW_draw_pass(debug_ps_);
-}
-
-/** \} */
-
-} // namespace blender::eevee \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/eevee_culling.hh b/source/blender/draw/engines/eevee/eevee_culling.hh
deleted file mode 100644
index 976c30a1efb..00000000000
--- a/source/blender/draw/engines/eevee/eevee_culling.hh
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright 2021, Blender Foundation.
- */
-
-/** \file
- * \ingroup eevee
- *
- * A culling object is a data structure that contains fine grained culling
- * of entities against in the whole view frustum. The Culling structure contains the
- * final entity list since it has to have a special order.
- *
- * Follows the principles of Tiled Culling + Z binning from:
- * "Improved Culling for Tiled and Clustered Rendering"
- * by Michal Drobot
- * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
- */
-
-#pragma once
-
-#include "DRW_render.h"
-
-#include "BLI_vector.hh"
-
-#include "eevee_shader_shared.hh"
-
-namespace blender::eevee {
-
-class Instance;
-
-/* -------------------------------------------------------------------- */
-/** \name CullingBatch
- * \{ */
-
-/**
- * Do not use directly. Use Culling object instead.
- */
-template<
- /* Type of data contained per culling batch. */
- typename Tdata>
-class CullingBatch {
- public:
- /** Z ordered items. */
- Tdata item_data;
-
- private:
- /* Items to order in Z. */
- struct ItemHandle {
- /** Index inside item_source_. */
- uint32_t source_index;
- /** Signed Z distance along camera Z axis. */
- float z_dist;
- /** Item radius. */
- float radius;
- };
-
- /** Compact handle list to order without moving source. */
- Vector<ItemHandle, CULLING_ITEM_BATCH> item_handles_;
- /** Z bins. */
- CullingDataBuf culling_data_;
- /** Tile texture and framebuffer handling the 2D culling. */
- eevee::Texture tiles_tx_ = Texture("culling_tx_");
- eevee::Framebuffer tiles_fb_;
-
- public:
- CullingBatch(){};
- ~CullingBatch(){};
-
- void init(const ivec2 &extent)
- {
- item_handles_.clear();
-
- uint tile_size = 8;
-
- uint res[2] = {divide_ceil_u(extent.x, tile_size), divide_ceil_u(extent.y, tile_size)};
-
- tiles_tx_.ensure(UNPACK2(res), 1, GPU_RGBA32UI);
-
- culling_data_.tile_size = tile_size;
- for (int i = 0; i < 2; i++) {
- culling_data_.tile_to_uv_fac[i] = tile_size / (float)extent[i];
- }
-
- // tiles_tx_.ensure(1, 1, 1, GPU_RGBA32UI);
- // uvec4 no_2D_culling = {UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX};
- // GPU_texture_update(tiles_tx_, GPU_DATA_UINT, no_2D_culling);
-
- tiles_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(tiles_tx_));
- }
-
- void set_empty(void)
- {
- init_min_max();
- culling_data_.push_update();
- }
-
- void insert(int32_t index, float z_dist, float radius)
- {
- ItemHandle handle = {(uint32_t)index, z_dist, radius};
- item_handles_.append(handle);
- }
-
- template<typename DataAppendF, typename CullingF>
- void finalize(float near_z,
- float far_z,
- const DataAppendF &data_append,
- const CullingF &draw_culling)
- {
- culling_data_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
- culling_data_.zbin_bias = -near_z * culling_data_.zbin_scale;
-
- /* Order items by Z distance to the camera. */
- auto sort = [](const ItemHandle &a, const ItemHandle &b) { return a.z_dist > b.z_dist; };
- std::sort(item_handles_.begin(), item_handles_.end(), sort);
-
- init_min_max();
- /* Fill the GPU data buffer. */
- for (auto item_idx : item_handles_.index_range()) {
- ItemHandle &handle = item_handles_[item_idx];
- data_append(item_data, item_idx, handle.source_index);
- /* Register to Z bins. */
- int z_min = max_ii(culling_z_to_zbin(culling_data_, handle.z_dist + handle.radius), 0);
- int z_max = min_ii(culling_z_to_zbin(culling_data_, handle.z_dist - handle.radius),
- CULLING_ZBIN_COUNT - 1);
- for (auto z : IndexRange(z_min, z_max - z_min + 1)) {
- BLI_assert(z >= 0 && z < CULLING_ZBIN_COUNT);
- uint16_t(&zbin_minmax)[2] = ((uint16_t(*)[2])culling_data_.zbins)[z];
- if (item_idx < zbin_minmax[0]) {
- zbin_minmax[0] = (uint16_t)item_idx;
- }
- if (item_idx > zbin_minmax[1]) {
- zbin_minmax[1] = (uint16_t)item_idx;
- }
- }
- }
- /* Set item count for no-cull iterator. */
- culling_data_.items_count = item_handles_.size();
- /* Upload data to GPU. */
- culling_data_.push_update();
-
- GPU_framebuffer_bind(tiles_fb_);
-
- draw_culling(item_data, culling_data_);
- }
-
- /**
- * Getters
- **/
- bool is_full(void)
- {
- return item_handles_.size() == CULLING_ITEM_BATCH;
- }
- const GPUUniformBuf *culling_ubo_get(void) const
- {
- return culling_data_.ubo_get();
- }
- uint items_count_get(void) const
- {
- return culling_data_.items_count;
- }
- GPUTexture *culling_texture_get(void) const
- {
- return tiles_tx_;
- }
-
- private:
- void init_min_max(void)
- {
- /* Init min-max for each bin. */
- for (auto i : IndexRange(CULLING_ZBIN_COUNT)) {
- uint16_t *zbin_minmax = (uint16_t *)culling_data_.zbins;
- zbin_minmax[i * 2 + 0] = CULLING_ITEM_BATCH - 1;
- zbin_minmax[i * 2 + 1] = 0;
- }
- culling_data_.items_count = 0;
- }
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Culling
- * \{ */
-
-template</* Type of data contained per culling batch. */
- typename Tdata,
- /* True if items can be added in multiple batches. */
- bool is_extendable = false>
-class Culling {
- private:
- using CullingBatchType = CullingBatch<Tdata>;
- /** Multiple culling batches containing at most CULLING_ITEM_BATCH items worth of data. */
- Vector<CullingBatchType *> batches_;
- /** Number of active batches. Allocated count may be higher. */
- int used_batch_count_;
- /** Pointer to the active batch being filled. */
- CullingBatchType *active_batch_;
- /** Used to get Z distance. */
- vec3 camera_z_axis_;
- float camera_z_offset_;
- /** View for which the culling is computed. */
- const DRWView *view_;
- /** View resolution. */
- ivec2 extent_ = ivec2(0);
-
- public:
- Culling(){};
- ~Culling()
- {
- for (CullingBatchType *batch : batches_) {
- delete batch;
- }
- }
-
- void set_view(const DRWView *view, const ivec2 extent)
- {
- view_ = view;
- extent_ = extent;
-
- float viewinv[4][4];
- DRW_view_viewmat_get(view, viewinv, true);
-
- camera_z_axis_ = viewinv[2];
- camera_z_offset_ = -vec3::dot(camera_z_axis_, viewinv[3]);
-
- if (batches_.size() == 0) {
- batches_.append(new CullingBatchType());
- }
-
- used_batch_count_ = 1;
- active_batch_ = batches_[0];
- active_batch_->init(extent_);
- }
-
- /* Cull every items. Do not reset the batches to avoid freeing the vectors' memory. */
- void set_empty(void)
- {
- if (extent_.x == 0) {
- extent_ = ivec2(1);
- }
-
- if (batches_.size() == 0) {
- batches_.append(new CullingBatchType());
-
- active_batch_ = batches_[0];
- active_batch_->init(extent_);
- }
-
- active_batch_ = batches_[0];
- active_batch_->set_empty();
- }
-
- /* Returns true if we cannot add any more items.
- * In this case, the caller is expected to not try to insert another item. */
- bool insert(int32_t index, BoundSphere &bsphere)
- {
- if (!DRW_culling_sphere_test(view_, &bsphere)) {
- return false;
- }
-
- if (active_batch_->is_full()) {
- BLI_assert(is_extendable);
- /* TODO(fclem) degrow vector of batches. */
- if (batches_.size() < (used_batch_count_ + 1)) {
- batches_.append(new CullingBatchType());
- }
- active_batch_ = batches_[used_batch_count_];
- active_batch_->init(extent_);
- used_batch_count_++;
- }
-
- float z_dist = vec3::dot(bsphere.center, camera_z_axis_) + camera_z_offset_;
- active_batch_->insert(index, z_dist, bsphere.radius);
-
- return active_batch_->is_full();
- }
-
- template<typename DataAppendF, typename CullingF>
- void finalize(const DataAppendF &data_append, const CullingF &draw_culling)
- {
- float near_z = DRW_view_near_distance_get(view_);
- float far_z = DRW_view_far_distance_get(view_);
-
- for (auto i : IndexRange(used_batch_count_)) {
- batches_[i]->finalize(near_z, far_z, data_append, draw_culling);
- }
- }
-
- /**
- * Getters
- **/
- const CullingBatchType *operator[](int64_t index) const
- {
- return batches_[index];
- }
- IndexRange index_range(void) const
- {
- return IndexRange(used_batch_count_);
- }
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name CullingDebugPass
- * \{ */
-
-class CullingDebugPass {
- private:
- Instance &inst_;
-
- GPUTexture *input_depth_tx_ = nullptr;
-
- DRWPass *debug_ps_ = nullptr;
-
- public:
- CullingDebugPass(Instance &inst) : inst_(inst){};
-
- void sync(void);
- void render(GPUTexture *input_depth_tx);
-};
-
-/** \} */
-
-} // namespace blender::eevee \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/eevee_instance.cc b/source/blender/draw/engines/eevee/eevee_instance.cc
index 63c9c690180..f614d4b4a97 100644
--- a/source/blender/draw/engines/eevee/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee/eevee_instance.cc
@@ -22,6 +22,7 @@
* An instance contains all structures needed to do a complete render.
*/
+#include "BKE_global.h"
#include "BKE_object.h"
#include "BLI_rect.h"
#include "DEG_depsgraph_query.h"
@@ -63,6 +64,8 @@ void Instance::init(const ivec2 &output_res,
rv3d = rv3d_;
baking_probe = light_probe_;
+ debug_mode = (eDebugMode)G.debug_value;
+
update_eval_members();
rcti render_border = output_crop(output_res, output_rect);
diff --git a/source/blender/draw/engines/eevee/eevee_instance.hh b/source/blender/draw/engines/eevee/eevee_instance.hh
index 09548ca504d..3079f931231 100644
--- a/source/blender/draw/engines/eevee/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee/eevee_instance.hh
@@ -99,6 +99,8 @@ class Instance {
/** Can be null. Used to exclude objects during baking. */
const struct LightProbe *baking_probe = nullptr;
+ eDebugMode debug_mode = SHADOW_DEBUG_NONE;
+
/* Info string displayed at the top of the render / viewport. */
char info[64];
diff --git a/source/blender/draw/engines/eevee/eevee_light.cc b/source/blender/draw/engines/eevee/eevee_light.cc
index b80954e2195..d73d650d78a 100644
--- a/source/blender/draw/engines/eevee/eevee_light.cc
+++ b/source/blender/draw/engines/eevee/eevee_light.cc
@@ -284,10 +284,10 @@ void LightModule::sync_light(const Object *ob, ObjectHandle &handle)
void LightModule::end_sync(void)
{
- lights_refs_.clear();
-
Vector<ObjectKey, 0> deleted_keys;
+ light_refs_.clear();
+
/* Detect light deletion. */
for (auto item : lights_.items()) {
Light &light = item.value;
@@ -297,7 +297,7 @@ void LightModule::end_sync(void)
}
else {
light.used = false;
- lights_refs_.append(&light);
+ light_refs_.append(&light);
}
}
@@ -308,110 +308,171 @@ void LightModule::end_sync(void)
lights_.remove(key);
}
+ if (light_refs_.size() > CULLING_MAX_ITEM) {
+ /* TODO(fclem) Print error to user. */
+ light_refs_.resize(CULLING_MAX_ITEM);
+ }
+
+ batch_len_ = divide_ceil_u(max_ii(light_refs_.size(), 1), CULLING_BATCH_SIZE);
+ lights_data.resize(batch_len_ * CULLING_BATCH_SIZE);
+ shadows_data.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_key_buf.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_light_buf.resize(batch_len_ * CULLING_BATCH_SIZE);
+ culling_zbin_buf.resize(batch_len_ * CULLING_ZBIN_COUNT);
+ culling_data.items_count = light_refs_.size();
+ culling_data.tile_word_len = divide_ceil_u(max_ii(culling_data.items_count, 1), 32);
+
/* Call shadows.end_sync after light pruning to avoid packing deleted shadows. */
inst_.shadows.end_sync();
+
+ for (auto l_idx : light_refs_.index_range()) {
+ Light &light = *light_refs_[l_idx];
+ lights_data[l_idx] = light;
+ lights_data[l_idx].shadow_id = LIGHT_NO_SHADOW;
+
+ if (light.shadow_id != LIGHT_NO_SHADOW) {
+ if (light.type == LIGHT_SUN) {
+ shadows_data[l_idx] = this->inst_.shadows.directionals[light.shadow_id];
+ }
+ else {
+ shadows_data[l_idx] = this->inst_.shadows.punctuals[light.shadow_id];
+ }
+ }
+ }
+
+ lights_data.push_update();
+ shadows_data.push_update();
+
+ {
+ culling_ps_ = DRW_pass_create("CullingLight", (DRWState)0);
+
+ uint lights_len = light_refs_.size();
+ uint batch_len = divide_ceil_u(lights_len, CULLING_BATCH_SIZE);
+
+ if (batch_len > 0) {
+ /* NOTE: We reference the buffers that may be resized or updated later. */
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SELECT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf);
+ DRW_shgroup_call_compute(grp, batch_len, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SORT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "out_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "out_items_buf", &culling_light_buf);
+ DRW_shgroup_call_compute(grp, batch_len, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_TILE);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
+ DRW_shgroup_vertex_buffer(grp, "lights_buf", culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "culling_tile_buf", &culling_tile_buf);
+ DRW_shgroup_call_compute_ref(grp, culling_tile_dispatch_size_);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
+ }
+ }
+ }
+
+ debug_end_sync();
}
-/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */
-void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular)
+void LightModule::debug_end_sync(void)
{
- if (extent.x == 0) {
- culling_.set_empty();
+ if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) {
+ debug_draw_ps_ = nullptr;
return;
}
- culling_.set_view(view, extent);
-
- for (auto light_id : lights_refs_.index_range()) {
- Light &light = *lights_refs_[light_id];
+ debug_draw_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR);
- BoundSphere bsphere;
- if (light.type == LIGHT_SUN) {
- /* Make sun lights cover the whole frustum. */
- float viewinv[4][4];
- DRW_view_viewmat_get(view, viewinv, true);
- copy_v3_v3(bsphere.center, viewinv[3]);
- bsphere.radius = fabsf(DRW_view_far_distance_get(view));
- }
- else {
- /* TODO(fclem) fit cones better. */
- copy_v3_v3(bsphere.center, light._position);
- bsphere.radius = light.influence_radius_max;
- }
+ GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf);
+ DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
+ DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+}
- culling_.insert(light_id, bsphere);
- }
+/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */
+void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular)
+{
+ const bool no_lights = (extent.x == 0);
- DRW_view_set_active(view);
+ /* Target 1bit per pixel. */
+ uint tile_size = 1u << log2_ceil_u(ceil(sqrtf(culling_data.tile_word_len * 32)));
- /* This is only called if the light is visible under this view. */
- auto data_copy = [&](LightBatch &light_batch, uint32_t dst_index, uint32_t src_index) {
- Light &light = *this->lights_refs_[src_index];
- LightData &dst = light_batch.lights_data[dst_index];
+ int3 tiles_extent;
+ tiles_extent.x = divide_ceil_u(extent.x, tile_size);
+ tiles_extent.y = divide_ceil_u(extent.y, tile_size);
+ tiles_extent.z = batch_len_;
- dst = light;
- if (!enable_specular) {
- dst.specular_power = 0.0f;
- }
+ float far_z = DRW_view_far_distance_get(view);
+ float near_z = DRW_view_near_distance_get(view);
- if (light.shadow_id != LIGHT_NO_SHADOW) {
- ShadowData &shadow_dst = light_batch.shadows_data[dst_index];
- if (light.type == LIGHT_SUN) {
- shadow_dst = this->inst_.shadows.directionals[light.shadow_id];
- }
- else {
- shadow_dst = this->inst_.shadows.punctuals[light.shadow_id];
- }
- }
- };
+ culling_data.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
+ culling_data.zbin_bias = -near_z * culling_data.zbin_scale;
+ culling_data.tile_size = tile_size;
+ culling_data.tile_x_len = tiles_extent.x;
+ culling_data.tile_y_len = tiles_extent.y;
+ culling_data.tile_to_uv_fac = tile_size / float2(UNPACK2(extent));
- /* Called for each batch. Do 2D gpu culling. */
- auto culling_func = [&](LightBatch &light_batch, CullingDataBuf &culling_data) {
- LightDataBuf &lights_data = light_batch.lights_data;
- ShadowDataBuf &shadows_data = light_batch.shadows_data;
- lights_data.push_update();
- shadows_data.push_update();
+ culling_data.enable_specular = enable_specular;
+ culling_data.items_count = no_lights ? 0 : light_refs_.size();
+ culling_data.visible_count = 0;
+ culling_data.push_update();
- this->inst_.shading_passes.light_culling.render(lights_data.ubo_get(), culling_data.ubo_get());
- };
+ if (no_lights) {
+ return;
+ }
- culling_.finalize(data_copy, culling_func);
+ uint word_count = tiles_extent.x * tiles_extent.y * tiles_extent.z * culling_data.tile_word_len;
- inst_.shadows.update_visible(view);
-}
+ /* TODO(fclem) Only resize once per redraw. */
+ culling_tile_buf.resize(word_count);
-void LightModule::bind_batch(int batch_index)
-{
- active_batch_ = batch_index;
- auto &batch = *culling_[batch_index];
- active_lights_ubo_ = batch.item_data.lights_data.ubo_get();
- active_shadows_ubo_ = batch.item_data.shadows_data.ubo_get();
- active_culling_ubo_ = batch.culling_ubo_get();
- active_culling_tx_ = batch.culling_texture_get();
-}
+ culling_tile_dispatch_size_.x = divide_ceil_u(word_count, 1024);
+ culling_tile_dispatch_size_.y = 1;
+ culling_tile_dispatch_size_.z = 1;
-/** \} */
+ DRW_view_set_active(view);
+ DRW_draw_pass(culling_ps_);
-/* -------------------------------------------------------------------- */
-/** \name CullingPass
- * \{ */
+ inst_.shadows.update_visible(view);
+}
-void CullingLightPass::sync(void)
+void LightModule::debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz)
{
- culling_ps_ = DRW_pass_create("CullingLight", DRW_STATE_WRITE_COLOR);
+ if (debug_draw_ps_ == nullptr) {
+ return;
+ }
+ input_depth_tx_ = hiz.texture_get();
- GPUShader *sh = inst_.shaders.static_shader_get(CULLING_LIGHT);
- DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", &lights_ubo_);
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", &culling_ubo_);
- DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(debug_draw_ps_);
}
-void CullingLightPass::render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo)
+void LightModule::shgroup_resources(DRWShadingGroup *grp)
{
- lights_ubo_ = lights_ubo;
- culling_ubo_ = culling_ubo;
- DRW_draw_pass(culling_ps_);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf);
+ DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf);
+
+ DRW_shgroup_vertex_buffer_ref(grp, "shadows_buf", &shadows_data);
+ DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
+ DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get());
}
/** \} */
diff --git a/source/blender/draw/engines/eevee/eevee_light.hh b/source/blender/draw/engines/eevee/eevee_light.hh
index 36bacf9ac8a..254d9231eef 100644
--- a/source/blender/draw/engines/eevee/eevee_light.hh
+++ b/source/blender/draw/engines/eevee/eevee_light.hh
@@ -29,7 +29,6 @@
#include "DNA_light_types.h"
#include "eevee_camera.hh"
-#include "eevee_culling.hh"
#include "eevee_id_map.hh"
#include "eevee_sampling.hh"
#include "eevee_shader.hh"
@@ -72,27 +71,6 @@ struct Light : public LightData {
/** \} */
/* -------------------------------------------------------------------- */
-/** \name CullingPass
- * \{ */
-
-class CullingLightPass {
- private:
- Instance &inst_;
-
- DRWPass *culling_ps_ = nullptr;
- const GPUUniformBuf *lights_ubo_ = nullptr;
- const GPUUniformBuf *culling_ubo_ = nullptr;
-
- public:
- CullingLightPass(Instance &inst) : inst_(inst){};
-
- void sync(void);
- void render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo);
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
/** \name LightModule
* \{ */
@@ -102,30 +80,47 @@ class CullingLightPass {
class LightModule {
friend ShadowModule;
+ public:
+ /** Scene lights data. */
+ LightDataBuf lights_data;
+ /** Shadow data. TODO(fclem): merge with lights_data. */
+ ShadowDataBuf shadows_data;
+ /** Culling infos. */
+ CullingDataBuf culling_data;
+ /** Key buffer containing only visible lights indices. */
+ CullingKeyBuf culling_key_buf;
+ /** LightData buffer used for rendering. Ordered by the culling phase. */
+ CullingLightBuf culling_light_buf;
+ /** Zbins containing min and max light index for each Z bin. */
+ CullingZbinBuf culling_zbin_buf;
+ /** Bitmap of lights touching each tiles. Using one layer for each culling batch. */
+ CullingTileBuf culling_tile_buf;
+
private:
Instance &inst_;
/** Map of light objects. This is used to track light deletion. */
Map<ObjectKey, Light> lights_;
- /** References to data in lights_ for easy indexing. */
- Vector<Light *> lights_refs_;
- /** Batches of lights alongside their culling data. */
- struct LightBatch {
- LightDataBuf lights_data;
- ShadowDataBuf shadows_data;
- };
- Culling<LightBatch, true> culling_;
- /** Active data pointers used for rendering. */
- const GPUUniformBuf *active_lights_ubo_;
- const GPUUniformBuf *active_shadows_ubo_;
- const GPUUniformBuf *active_culling_ubo_;
- GPUTexture *active_culling_tx_;
- int active_batch_ = 0;
+
+ Vector<Light *> light_refs_;
+
+ /** Follows the principles of Tiled Culling + Z binning from:
+ * "Improved Culling for Tiled and Clustered Rendering"
+ * by Michal Drobot
+ * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf */
+ DRWPass *culling_ps_ = nullptr;
+ int3 culling_tile_dispatch_size_ = int3(1);
+ /* Number of batches of lights that are separately processed. */
+ int batch_len_ = 1;
float light_threshold_;
+ /** Debug Culling visualization. */
+ DRWPass *debug_draw_ps_ = nullptr;
+ GPUTexture *input_depth_tx_ = nullptr;
+
public:
- LightModule(Instance &inst) : inst_(inst), culling_(){};
+ LightModule(Instance &inst) : inst_(inst){};
~LightModule(){};
void begin_sync(void);
@@ -134,40 +129,10 @@ class LightModule {
void set_view(const DRWView *view, const ivec2 extent, bool enable_specular = true);
- void bind_batch(int range_id);
+ void shgroup_resources(DRWShadingGroup *grp);
- /**
- * Getters
- **/
- const GPUUniformBuf **lights_ubo_ref_get(void)
- {
- return &active_lights_ubo_;
- }
- const GPUUniformBuf **shadows_ubo_ref_get(void)
- {
- return &active_shadows_ubo_;
- }
- const GPUUniformBuf **culling_ubo_ref_get(void)
- {
- return &active_culling_ubo_;
- }
- /** Returns the active Span of lights that passed the culling test. */
- Span<LightData> lights_get(void) const
- {
- const auto &batch = *culling_[active_batch_];
- Span<LightData> span = batch.item_data.lights_data;
- return span.take_front(batch.items_count_get());
- }
- GPUTexture **culling_tx_ref_get(void)
- {
- return &active_culling_tx_;
- }
- /* Return a range iterator to loop over all lights.
- * In practice, we render with light in waves of LIGHT_MAX lights at a time. */
- IndexRange index_range(void) const
- {
- return culling_.index_range();
- }
+ void debug_end_sync(void);
+ void debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz);
};
/** \} */
diff --git a/source/blender/draw/engines/eevee/eevee_lookdev.cc b/source/blender/draw/engines/eevee/eevee_lookdev.cc
index 84c77130b05..e5633ac47ee 100644
--- a/source/blender/draw/engines/eevee/eevee_lookdev.cc
+++ b/source/blender/draw/engines/eevee/eevee_lookdev.cc
@@ -302,18 +302,13 @@ void LookDev::sync_overlay(void)
GPUMaterial *gpumat = inst_.shaders.material_shader_get(
mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_LOOKDEV, false);
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, overlay_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
- DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
offset.x -= sphere_size_ + sphere_margin;
@@ -340,7 +335,6 @@ void LookDev::render_overlay(GPUFrameBuffer *fb)
inst_.lightprobes.set_view(active_view, ivec2(0));
inst_.lights.set_view(active_view, ivec2(0));
- inst_.lights.bind_batch(0);
/* Create subview for correct shading. Sub because we don not care about culling. */
const CameraData &cam = inst_.camera.data_get();
diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc
index f802303036f..d1d6e50d5d8 100644
--- a/source/blender/draw/engines/eevee/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee/eevee_shader.cc
@@ -50,7 +50,9 @@ extern char datatoc_eevee_cubemap_lib_glsl[];
extern char datatoc_eevee_culling_debug_frag_glsl[];
extern char datatoc_eevee_culling_iter_lib_glsl[];
extern char datatoc_eevee_culling_lib_glsl[];
-extern char datatoc_eevee_culling_light_frag_glsl[];
+extern char datatoc_eevee_culling_select_comp_glsl[];
+extern char datatoc_eevee_culling_sort_comp_glsl[];
+extern char datatoc_eevee_culling_tile_comp_glsl[];
extern char datatoc_eevee_deferred_direct_frag_glsl[];
extern char datatoc_eevee_deferred_holdout_frag_glsl[];
extern char datatoc_eevee_deferred_transparent_frag_glsl[];
@@ -236,7 +238,9 @@ ShaderModule::ShaderModule()
#define SHADER_FULLSCREEN(enum_, frag_) SHADER_FULLSCREEN_DEFINES(enum_, frag_, nullptr)
SHADER_FULLSCREEN(CULLING_DEBUG, eevee_culling_debug_frag);
- SHADER_FULLSCREEN(CULLING_LIGHT, eevee_culling_light_frag);
+ SHADER_COMPUTE(CULLING_SELECT, eevee_culling_select_comp, nullptr);
+ SHADER_COMPUTE(CULLING_SORT, eevee_culling_sort_comp, nullptr);
+ SHADER_COMPUTE(CULLING_TILE, eevee_culling_tile_comp, nullptr);
SHADER_FULLSCREEN(FILM_FILTER, eevee_film_filter_frag);
SHADER_FULLSCREEN(FILM_RESOLVE, eevee_film_resolve_frag);
SHADER_FULLSCREEN(FILM_RESOLVE_DEPTH, eevee_film_resolve_depth_frag);
diff --git a/source/blender/draw/engines/eevee/eevee_shader.hh b/source/blender/draw/engines/eevee/eevee_shader.hh
index f9d4fe2785d..e42e49e35c3 100644
--- a/source/blender/draw/engines/eevee/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader.hh
@@ -40,7 +40,9 @@ namespace blender::eevee {
/* Keep alphabetical order and clean prefix. */
enum eShaderType {
CULLING_DEBUG = 0,
- CULLING_LIGHT,
+ CULLING_SELECT,
+ CULLING_SORT,
+ CULLING_TILE,
DEFERRED_EVAL_DIRECT,
DEFERRED_EVAL_HOLDOUT,
diff --git a/source/blender/draw/engines/eevee/eevee_shader_shared.hh b/source/blender/draw/engines/eevee/eevee_shader_shared.hh
index 4675d9cc882..6801d4cbd59 100644
--- a/source/blender/draw/engines/eevee/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader_shared.hh
@@ -178,6 +178,52 @@ BLI_STATIC_ASSERT_ALIGN(CameraData, 16)
/** \name Film
* \{ */
+enum eDebugMode : uint32_t {
+ /* TODO(fclem) Rename shadow cases. */
+ SHADOW_DEBUG_NONE = 0u,
+ /**
+ * Gradient showing light evaluation hotspots.
+ */
+ DEBUG_LIGHT_CULLING = 4u,
+ /**
+ * Tilemaps to screen. Is also present in other modes.
+ * - Black pixels, no pages allocated.
+ * - Green pixels, pages cached.
+ * - Red pixels, pages allocated.
+ */
+ SHADOW_DEBUG_TILEMAPS = 5u,
+ /**
+ * Random color per pages. Validates page density allocation and sampling.
+ */
+ SHADOW_DEBUG_PAGES = 6u,
+ /**
+ * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
+ * Black means not covered by any tilemaps LOD of the shadow.
+ */
+ SHADOW_DEBUG_LOD = 7u,
+ /**
+ * Outputs white pixels for pages allocated and black pixels for unused pages.
+ * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work.
+ */
+ SHADOW_DEBUG_PAGE_ALLOCATION = 8u,
+ /**
+ * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
+ * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+ */
+ SHADOW_DEBUG_TILE_ALLOCATION = 9u,
+ /**
+ * Visualize linear depth stored in the atlas regions of the active light.
+ * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+ */
+ SHADOW_DEBUG_SHADOW_DEPTH = 10u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
enum eFilmDataType : uint32_t {
/** Color is accumulated using the pixel filter. No negative values. */
FILM_DATA_COLOR = 0u,
@@ -341,34 +387,44 @@ BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16)
/** \name Cullings
* \{ */
-/* Number of items in a culling batch. Needs to be Power of 2. */
+/* TODO(fclem) Rename this. Only used by probes now. */
#define CULLING_ITEM_BATCH 128
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536. */
+/* Current limiting factor is the sorting phase which is single pass and only sort within a
+ * threadgroup which maximum size is 1024. */
+#define CULLING_BATCH_SIZE 1024
/* Maximum number of 32 bit uint stored per tile. */
-#define CULLING_MAX_WORD ((CULLING_ITEM_BATCH + 1) / 32)
-/* TODO(fclem) Support more than 4 words using layered texture for culling result. */
-#if CULLING_MAX_WORD > 4
-# error "CULLING_MAX_WORD is greater than supported maximum."
-#endif
-/* Fine grained subdivision in the Z direction. */
-#define CULLING_ZBIN_COUNT 4088
+#define CULLING_MAX_WORD (CULLING_BATCH_SIZE / 32)
+/* Fine grained subdivision in the Z direction (Must be multiple of CULLING_BATCH_SIZE). */
+#define CULLING_ZBIN_COUNT 4096
struct CullingData {
- /* Linearly distributed z-bins with encoded uint16_t min and max index. */
- /* NOTE: due to alignment restrictions of uint arrays, use uvec4. */
- uvec4 zbins[CULLING_ZBIN_COUNT / 4];
- /* Extent of one square tile in pixels. */
- int tile_size;
- /* Valid item count in the data array. */
- uint items_count;
- /* Scale and bias applied to linear Z to get zbin. */
+ /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+ vec2 tile_to_uv_fac;
+ /** Scale and bias applied to linear Z to get zbin. */
float zbin_scale;
float zbin_bias;
- /* Scale applied to tile pixel coordinates to get target UV coordinate. */
- vec2 tile_to_uv_fac;
- vec2 _pad0;
+ /** Valid item count in the source data array. */
+ uint items_count;
+ /** Number of items that passes the first culling test. */
+ uint visible_count;
+ /** Will disable specular during light data copy.. */
+ bool enable_specular;
+ /** Extent of one square tile in pixels. */
+ uint tile_size;
+ /** Number of tiles on the X/Y axis. */
+ uint tile_x_len;
+ uint tile_y_len;
+ /** Number of word per tile. Depends on the maximum number of lights. */
+ uint tile_word_len;
+ int _pad0;
};
BLI_STATIC_ASSERT_ALIGN(CullingData, 16)
-BLI_STATIC_ASSERT_SIZE(CullingData, UBO_MIN_MAX_SUPPORTED_SIZE)
+
+#define CullingZBin uint
+#define CullingWord uint
static inline int culling_z_to_zbin(CullingData data, float z)
{
@@ -542,41 +598,6 @@ struct ShadowTileMapData {
};
BLI_STATIC_ASSERT_ALIGN(ShadowTileMapData, 16)
-enum eShadowDebug : uint32_t {
- SHADOW_DEBUG_NONE = 0u,
- /**
- * Tilemaps to screen. Is also present in other modes.
- * - Black pixels, no pages allocated.
- * - Green pixels, pages cached.
- * - Red pixels, pages allocated.
- */
- SHADOW_DEBUG_TILEMAPS = 1u,
- /**
- * Random color per pages. Validates page density allocation and sampling.
- */
- SHADOW_DEBUG_PAGES = 2u,
- /**
- * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
- * Black means not covered by any tilemaps LOD of the shadow.
- */
- SHADOW_DEBUG_LOD = 3u,
- /**
- * Outputs white pixels for pages allocated and black pixels for unused pages.
- * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work.
- */
- SHADOW_DEBUG_PAGE_ALLOCATION = 4u,
- /**
- * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
- * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
- */
- SHADOW_DEBUG_TILE_ALLOCATION = 5u,
- /**
- * Visualize linear depth stored in the atlas regions of the active light.
- * This way, one can check if the rendering, the copying and the shadow sampling functions works.
- */
- SHADOW_DEBUG_SHADOW_DEPTH = 6u
-};
-
/**
* Shadow data for debugging the active light shadow.
*/
@@ -584,7 +605,7 @@ struct ShadowDebugData {
LightData light;
ShadowData shadow;
vec3 camera_position;
- eShadowDebug type;
+ eDebugMode type;
int tilemap_data_index;
int _pad1;
int _pad2;
@@ -869,19 +890,23 @@ vec4 utility_tx_sample(vec2 uv, float layer);
#ifdef __cplusplus
using CameraDataBuf = StructBuffer<CameraData>;
using CubemapDataBuf = StructArrayBuffer<CubemapData, CULLING_ITEM_BATCH>;
-using CullingDataBuf = StructBuffer<CullingData>;
+using CullingDataBuf = StorageBuffer<CullingData>;
+using CullingKeyBuf = StorageArrayBuffer<uint, CULLING_BATCH_SIZE, true>;
+using CullingLightBuf = StorageArrayBuffer<LightData, CULLING_BATCH_SIZE, true>;
+using CullingTileBuf = StorageArrayBuffer<uint, 16 * 16 * CULLING_MAX_WORD, true>;
+using CullingZbinBuf = StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
using DepthOfFieldDataBuf = StructBuffer<DepthOfFieldData>;
using GridDataBuf = StructArrayBuffer<GridData, GRID_MAX>;
using HiZDataBuf = StructBuffer<HiZData>;
-using LightDataBuf = StructArrayBuffer<LightData, CULLING_ITEM_BATCH>;
+using LightDataBuf = StorageArrayBuffer<LightData, CULLING_BATCH_SIZE>;
using LightProbeFilterDataBuf = StructBuffer<LightProbeFilterData>;
using LightProbeInfoDataBuf = StructBuffer<LightProbeInfoData>;
using RaytraceBufferDataBuf = StructBuffer<RaytraceBufferData>;
using RaytraceDataBuf = StructBuffer<RaytraceData>;
-using ShadowDataBuf = StructArrayBuffer<ShadowData, CULLING_ITEM_BATCH>;
-using ShadowTileMapDataBuf = StorageArrayBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
-using ShadowPageHeapBuf = StorageArrayBuffer<ShadowPagePacked, SHADOW_MAX_PAGE, true>;
+using ShadowDataBuf = StorageArrayBuffer<ShadowData, CULLING_BATCH_SIZE>;
using ShadowDebugDataBuf = StructBuffer<ShadowDebugData>;
+using ShadowPageHeapBuf = StorageArrayBuffer<ShadowPagePacked, SHADOW_MAX_PAGE, true>;
+using ShadowTileMapDataBuf = StorageArrayBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
using SubsurfaceDataBuf = StructBuffer<SubsurfaceData>;
using VelocityObjectBuf = StructBuffer<VelocityObjectData>;
diff --git a/source/blender/draw/engines/eevee/eevee_shading.cc b/source/blender/draw/engines/eevee/eevee_shading.cc
index 37501b561c8..3d802299a05 100644
--- a/source/blender/draw/engines/eevee/eevee_shading.cc
+++ b/source/blender/draw/engines/eevee/eevee_shading.cc
@@ -96,23 +96,17 @@ DRWShadingGroup *ForwardPass::material_opaque_add(::Material *blender_mat, GPUMa
{
DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
if (true) {
DRW_shgroup_uniform_texture_ref(
@@ -143,23 +137,17 @@ DRWShadingGroup *ForwardPass::material_transparent_add(::Material *blender_mat,
GPUMaterial *gpumat)
{
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
if (true) {
DRW_shgroup_uniform_texture_ref(
@@ -224,9 +212,6 @@ void ForwardPass::render(GBuffer &gbuffer, HiZBuffer &hiz, GPUFrameBuffer *view_
GPU_framebuffer_bind(view_fb);
}
- /* Only one batch of light is supported. */
- inst_.lights.bind_batch(0);
-
DRW_draw_pass(prepass_ps_);
DRW_draw_pass(opaque_ps_);
@@ -305,10 +290,7 @@ void DeferredLayer::volume_add(Object *ob)
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_VOLUME);
DRWShadingGroup *grp = DRW_shgroup_create(sh, volume_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_texture_ref(grp, "depth_max_tx", &deferred_pass.input_depth_behind_tx_);
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
DRW_shgroup_stencil_set(grp, CLOSURE_VOLUME | CLOSURE_TRANSPARENCY, 0xFF, 0xFF);
@@ -371,12 +353,8 @@ void DeferredLayer::render(GBuffer &gbuffer,
// gbuffer.copy_depth_behind();
// deferred_pass.input_depth_behind_tx_ = gbuffer.depth_behind_tx;
- for (auto index : inst_.lights.index_range()) {
- inst_.lights.bind_batch(index);
-
- gbuffer.bind_volume();
- DRW_draw_pass(volume_ps_);
- }
+ gbuffer.bind_volume();
+ DRW_draw_pass(volume_ps_);
}
if (use_holdout) {
@@ -404,22 +382,16 @@ void DeferredLayer::render(GBuffer &gbuffer,
rt_buffer.resolve(CLOSURE_REFRACTION, gbuffer);
}
- for (auto index : inst_.lights.index_range()) {
- inst_.lights.bind_batch(index);
-
- if (!no_volumes) {
- /* TODO(fclem) volume fb. */
- GPU_framebuffer_bind(view_fb);
- DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_);
- }
-
- if (!no_surfaces) {
- gbuffer.bind_radiance();
- DRW_draw_pass(deferred_pass.eval_direct_ps_);
- }
+ if (!no_volumes) {
+ /* TODO(fclem) volume fb. */
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_);
}
if (!no_surfaces) {
+ gbuffer.bind_radiance();
+ DRW_draw_pass(deferred_pass.eval_direct_ps_);
+
if (use_diffuse) {
rt_buffer.trace(CLOSURE_DIFFUSE, gbuffer, hiz_front, hiz_front);
rt_buffer.denoise(CLOSURE_DIFFUSE);
@@ -469,7 +441,6 @@ void DeferredPass::sync(void)
volumetric_layer_.sync();
LightModule &lights = inst_.lights;
- ShadowModule &shadows = inst_.shadows;
LightProbeModule &lightprobes = inst_.lightprobes;
eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
@@ -479,19 +450,14 @@ void DeferredPass::sync(void)
eval_direct_ps_ = DRW_pass_create("DeferredDirect", state);
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_DIRECT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_direct_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get());
DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get());
DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get());
DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
DRW_shgroup_uniform_texture_ref_ex(
grp, "emission_data_tx", &input_emission_data_tx_, no_interp);
DRW_shgroup_uniform_texture_ref_ex(
@@ -535,13 +501,8 @@ void DeferredPass::sync(void)
eval_volume_homogeneous_ps_ = DRW_pass_create("DeferredVolume", state);
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_VOLUME);
DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_volume_homogeneous_ps_);
- DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get());
- DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get());
- DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get());
+ lights.shgroup_resources(grp);
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx);
- DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get());
- DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get());
DRW_shgroup_uniform_texture_ref_ex(
grp, "transparency_data_tx", &input_transparency_data_tx_, no_interp);
DRW_shgroup_uniform_texture_ref_ex(grp, "volume_data_tx", &input_volume_data_tx_, no_interp);
diff --git a/source/blender/draw/engines/eevee/eevee_shading.hh b/source/blender/draw/engines/eevee/eevee_shading.hh
index 8adbecf3dd5..b84739a7220 100644
--- a/source/blender/draw/engines/eevee/eevee_shading.hh
+++ b/source/blender/draw/engines/eevee/eevee_shading.hh
@@ -30,7 +30,6 @@
#include "eevee_lut.h"
-#include "eevee_culling.hh"
#include "eevee_gbuffer.hh"
#include "eevee_raytracing.hh"
#include "eevee_shadow.hh"
@@ -280,38 +279,24 @@ class UtilityTexture : public Texture {
*/
class ShadingPasses {
public:
- CullingLightPass light_culling;
-
BackgroundPass background;
DeferredPass deferred;
ForwardPass forward;
ShadowPass shadow;
VelocityPass velocity;
- CullingDebugPass debug_culling;
-
UtilityTexture utility_tx;
public:
ShadingPasses(Instance &inst)
- : light_culling(inst),
- background(inst),
- deferred(inst),
- forward(inst),
- shadow(inst),
- velocity(inst),
- debug_culling(inst){};
+ : background(inst), deferred(inst), forward(inst), shadow(inst), velocity(inst){};
void sync()
{
- light_culling.sync();
-
deferred.sync();
forward.sync();
shadow.sync();
velocity.sync();
-
- debug_culling.sync();
}
DRWShadingGroup *material_add(::Material *blender_mat,
diff --git a/source/blender/draw/engines/eevee/eevee_shadow.cc b/source/blender/draw/engines/eevee/eevee_shadow.cc
index 3c132bd9ac7..f869d9e643d 100644
--- a/source/blender/draw/engines/eevee/eevee_shadow.cc
+++ b/source/blender/draw/engines/eevee/eevee_shadow.cc
@@ -554,32 +554,16 @@ void ShadowModule::init(void)
inst_.sampling.reset();
}
- switch (G.debug_value) {
- case 4:
- debug_data_.type = SHADOW_DEBUG_TILEMAPS;
- break;
- case 5:
- debug_data_.type = SHADOW_DEBUG_LOD;
- break;
- case 6:
- debug_data_.type = SHADOW_DEBUG_PAGE_ALLOCATION;
#ifndef SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED
- BLI_assert_msg(0,
- "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but "
- "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED "
- "is not defined");
-#endif
- break;
- case 7:
- debug_data_.type = SHADOW_DEBUG_TILE_ALLOCATION;
- break;
- case 8:
- debug_data_.type = SHADOW_DEBUG_SHADOW_DEPTH;
- break;
- default:
- debug_data_.type = SHADOW_DEBUG_NONE;
- break;
+ if (inst_.debug_mode == SHADOW_DEBUG_PAGE_ALLOCATION) {
+ BLI_assert_msg(0,
+ "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but "
+ "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED "
+ "is not defined");
}
+#endif
+
+ debug_data_.type = inst_.debug_mode;
memset(views_, 0, sizeof(views_));
}
diff --git a/source/blender/draw/engines/eevee/eevee_view.cc b/source/blender/draw/engines/eevee/eevee_view.cc
index 7b4516dd727..e868bad0259 100644
--- a/source/blender/draw/engines/eevee/eevee_view.cc
+++ b/source/blender/draw/engines/eevee/eevee_view.cc
@@ -109,9 +109,6 @@ void ShadingView::sync(ivec2 render_extent_)
view_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_));
- /* Reuse postfx_tx_. */
- debug_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(postfx_tx_));
-
gbuffer_.sync(depth_tx_, combined_tx_, owner);
}
}
@@ -150,6 +147,7 @@ void ShadingView::render(void)
inst_.shading_passes.forward.render(gbuffer_, hiz_front_, view_fb_);
+ inst_.lights.debug_draw(view_fb_, hiz_front_);
inst_.shadows.debug_draw(view_fb_, hiz_front_);
velocity_.render(depth_tx_);
@@ -160,15 +158,7 @@ void ShadingView::render(void)
GPUTexture *final_radiance_tx = render_post(combined_tx_);
- /* TODO(fclem) Have a special renderpass for this. */
- if (G.debug_value == 3) {
- GPU_framebuffer_bind(debug_fb_);
- inst_.shading_passes.debug_culling.render(depth_tx_);
-
- // inst_.render_passes.debug_culling->accumulate(debug_tx_, sub_view_);
- inst_.render_passes.combined->accumulate(postfx_tx_, sub_view_);
- }
- else if (inst_.render_passes.combined) {
+ if (inst_.render_passes.combined) {
inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_);
}
diff --git a/source/blender/draw/engines/eevee/eevee_view.hh b/source/blender/draw/engines/eevee/eevee_view.hh
index 44512c69ef4..4f1aae0d825 100644
--- a/source/blender/draw/engines/eevee/eevee_view.hh
+++ b/source/blender/draw/engines/eevee/eevee_view.hh
@@ -78,7 +78,6 @@ class ShadingView {
/** Owned resources. */
eevee::Framebuffer view_fb_;
- eevee::Framebuffer debug_fb_;
/** Draw resources. Not owned. */
GPUTexture *combined_tx_ = nullptr;
GPUTexture *depth_tx_ = nullptr;
diff --git a/source/blender/draw/engines/eevee/eevee_wrapper.hh b/source/blender/draw/engines/eevee/eevee_wrapper.hh
index 96007f0dd6d..d5daa247d1c 100644
--- a/source/blender/draw/engines/eevee/eevee_wrapper.hh
+++ b/source/blender/draw/engines/eevee/eevee_wrapper.hh
@@ -139,6 +139,8 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
T *data_ = nullptr;
/* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */
GPUVertBuf *ssbo_;
+ /* Currently allocated size. */
+ int64_t size;
#ifdef DEBUG
const char *name_ = typeid(T).name();
@@ -149,22 +151,36 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
public:
StorageArrayBuffer()
{
- BLI_assert(((sizeof(T) * len) % 16) == 0);
+ init(len);
+ }
+ ~StorageArrayBuffer()
+ {
+ GPU_vertbuf_discard(ssbo_);
+ }
+
+ void init(int64_t new_size)
+ {
+ size = new_size;
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage);
- GPU_vertbuf_data_alloc(ssbo_, (sizeof(T) / 4) * len);
+ GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T) * size, 4));
if (!device_only) {
data_ = (T *)GPU_vertbuf_get_data(ssbo_);
GPU_vertbuf_use(ssbo_);
}
}
- ~StorageArrayBuffer()
+
+ void resize(int64_t new_size)
{
- GPU_vertbuf_discard(ssbo_);
+ BLI_assert(new_size > 0);
+ if (new_size != size) {
+ GPU_vertbuf_discard(ssbo_);
+ this->init(new_size);
+ }
}
void push_update(void)
@@ -179,6 +195,11 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
return ssbo_;
}
+ /* To be able to use it with DRW_shgroup_*_ref(). */
+ GPUVertBuf **operator&()
+ {
+ return &ssbo_;
+ }
/**
* Get the value at the given index. This invokes undefined behavior when the index is out of
@@ -188,7 +209,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
BLI_assert(!device_only);
BLI_assert(index >= 0);
- BLI_assert(index < len);
+ BLI_assert(index < size);
return data_[index];
}
@@ -196,7 +217,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
{
BLI_assert(!device_only);
BLI_assert(index >= 0);
- BLI_assert(index < len);
+ BLI_assert(index < size);
return data_[index];
}
@@ -247,6 +268,68 @@ class StorageArrayBuffer : NonMovable, NonCopyable {
};
/** Simpler version where data is not an array. */
+template<
+ /** Type of the values stored in this uniform buffer. */
+ typename T,
+ /** True if created on device and no memory host memory is allocated. */
+ bool device_only = false>
+class StorageBuffer : public T, NonMovable, NonCopyable {
+ private:
+ /* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */
+ GPUVertBuf *ssbo_;
+
+#ifdef DEBUG
+ const char *name_ = typeid(T).name();
+#else
+ constexpr static const char *name_ = "StorageBuffer";
+#endif
+
+ public:
+ StorageBuffer()
+ {
+ GPUVertFormat format = {0};
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+
+ GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC;
+ ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage);
+ GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T), 4));
+ if (!device_only) {
+ GPU_vertbuf_use(ssbo_);
+ }
+ }
+ ~StorageBuffer()
+ {
+ GPU_vertbuf_discard(ssbo_);
+ }
+
+ void push_update(void)
+ {
+ BLI_assert(!device_only);
+ /* TODO(fclem): Avoid a full copy. */
+ T *data = (T *)GPU_vertbuf_get_data(ssbo_);
+ *data = *this;
+
+ GPU_vertbuf_use(ssbo_);
+ }
+
+ operator GPUVertBuf *() const
+ {
+ return ssbo_;
+ }
+ /* To be able to use it with DRW_shgroup_*_ref(). */
+ GPUVertBuf **operator&()
+ {
+ return &ssbo_;
+ }
+
+ StorageBuffer<T> &operator=(const T &other)
+ {
+ *static_cast<T *>(this) = other;
+ return *this;
+ }
+};
+
+/** Simpler version where data is not an array. */
template<typename T> class StructBuffer : public T, NonMovable, NonCopyable {
private:
GPUUniformBuf *ubo_;
@@ -366,6 +449,42 @@ class Texture {
}
/* Return true is a texture has been created. */
+ bool ensure(const char *name,
+ int w,
+ int h,
+ int d,
+ int mips,
+ eGPUTextureFormat format,
+ bool layered = false)
+ {
+
+ /* TODO(fclem) In the future, we need to check if mip_count did not change.
+ * For now it's ok as we always define all mip level.*/
+ if (tx_) {
+ int3 size = this->size();
+ BLI_assert(GPU_texture_array(tx_) == layered);
+ if (size != int3(w, h, d) || GPU_texture_format(tx_) != format) {
+ GPU_TEXTURE_FREE_SAFE(tx_);
+ }
+ }
+ if (tx_ == nullptr) {
+ if (layered) {
+ tx_ = GPU_texture_create_2d_array(name, w, h, d, mips, format, nullptr);
+ }
+ else {
+ tx_ = GPU_texture_create_3d(name, w, h, d, mips, format, GPU_DATA_FLOAT, nullptr);
+ }
+ if (mips > 1) {
+ /* TODO(fclem) Remove once we have immutable storage or when mips are
+ * generated on creation. */
+ GPU_texture_generate_mipmap(tx_);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /* Return true is a texture has been created. */
bool ensure(const char *name, int w, int h, int mips, eGPUTextureFormat format)
{
/* TODO(fclem) In the future, we need to check if mip_count did not change.
@@ -474,6 +593,10 @@ class Texture {
return &tx_;
}
+ bool is_valid(void) const
+ {
+ return !!tx_;
+ }
int width(void) const
{
return GPU_texture_width(tx_);
@@ -482,6 +605,12 @@ class Texture {
{
return GPU_texture_height(tx_);
}
+ int3 size(void) const
+ {
+ int3 size;
+ GPU_texture_get_mipmap_size(tx_, 0, size);
+ return size;
+ }
};
class Framebuffer {
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
index f559788145d..33734324445 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl
@@ -2,25 +2,34 @@
/**
* Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
* Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
- * pass is not conservative enough). This shader will only work on the last light batch so remove
- * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights.
+ * pass is not conservative enough).
*/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
{
- CullingData culling;
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
+{
+ CullingData light_culling;
+};
+
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
};
-uniform usampler2D item_culling_tx;
uniform sampler2D depth_tx;
in vec4 uvcoordsvar;
@@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color;
void main(void)
{
- float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r;
+ float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r;
float vP_z = get_view_z_from_depth(depth);
vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
float lights_count = 0.0;
uint lights_cull = 0u;
- ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
lights_cull |= 1u << l_idx;
lights_count += 1.0;
@@ -44,7 +53,7 @@ void main(void)
ITEM_FOREACH_END
uint lights_nocull = 0u;
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
+ ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) {
LightData light = lights[l_idx];
if (distance(light._position, P) < light.influence_radius_max) {
lights_nocull |= 1u << l_idx;
@@ -57,6 +66,6 @@ void main(void)
out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
}
else {
- out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0);
+ out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0);
}
} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
index a0ea075db22..640ffb4a6a1 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl
@@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min)
return ~mask << bit_min;
}
-uint zbin_mask(int word_index, int zbin_min, int zbin_max)
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
{
- int local_min = clamp(zbin_min - word_index * 32, 0, 31);
- int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32);
- return bit_field_mask(uint(mask_width), uint(local_min));
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
}
/* Waiting to implement extensions support. We need:
@@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max)
# define subgroupBroadcastFirst(a) a
#endif
-#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \
+#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \
{ \
- int zbin_index = culling_z_to_zbin(_culling, _linearz); \
- zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \
- uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \
- int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \
- int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \
- /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
- max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
- int word_min = 0; \
- int word_max = max(0, CULLING_MAX_WORD - 1); \
- word_min = max(min_index / 32, word_min); \
- word_max = min(max_index / 32, word_max); \
- for (int word_index = word_min; word_index <= word_max; word_index++) { \
- /* TODO(fclem) Support bigger max_word with larger texture. */ \
- ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \
- uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \
- uint mask = zbin_mask(word_index, min_index, max_index); \
- word &= mask; \
+ uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \
+ uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ for (uint batch = 0; batch < batch_count; batch++) { \
+ int zbin_index = culling_z_to_zbin(_culling, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
/* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
- word = subgroupBroadcastFirst(subgroupOr(word)); \
- /* TODO(fclem) Replace by findLSB on supported hardware. */ \
- for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \
- if ((word & 1u) != 0u) { \
- int _item_index = word_index * 32 + int(i);
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ uint word_min = min_index / 32u; \
+ uint word_max = max_index / 32u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ while (word != 0u) { \
+ uint bit_index = uint(findLSB(word)); \
+ word &= ~1u << bit_index; \
+ uint _item_index = word_idx * 32u + bit_index;
/* No culling. Iterate over all items. */
#define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \
{ \
{ \
{ \
- for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) {
+ for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) {
#define ITEM_FOREACH_END \
} \
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
index f128b89e864..27a39817140 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl
@@ -7,11 +7,6 @@
/** \name Intersection Tests
* \{ */
-struct Sphere {
- vec3 position;
- float radius;
-};
-
struct Cone {
vec3 direction;
float angle_cos;
@@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
* by Eric Zhang
* https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
*/
- float sphere_distance = length(sphere.position);
+ float sphere_distance = length(sphere.center);
float sphere_sin = saturate(sphere.radius / sphere_distance);
float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
- float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction);
+ float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction);
/* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
-1.0 :
@@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone)
bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder)
{
- float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy);
+ float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy);
return (distance_squared < sqr(cylinder.radius + sphere.radius));
}
bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
{
- if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) {
return false;
}
- if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) {
+ if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) {
return false;
}
return true;
@@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum)
bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile)
{
/* Culling in view space for precision and simplicity. */
- sphere.position = transform_point(ViewMatrix, sphere.position);
+ sphere.center = transform_point(ViewMatrix, sphere.center);
bool isect;
/* Test tile intersection using bounding cone or bounding cylinder.
* This has less false positive cases when the sphere is large. */
@@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset)
return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0;
}
-CullingTile culling_tile_get(CullingData culling)
+CullingTile culling_tile_get(CullingData culling, uvec2 tile_co)
{
+ vec2 ftile = vec2(tile_co);
/* Culling frustum corners for this tile. */
vec3 corners[8];
- corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5));
- corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5));
- corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5));
- corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5));
+ corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1));
+ corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0));
+ corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0));
+ corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1));
/* The corners depth only matter for precision. Use a mix of not so close to clip plane to
* avoid small float imprecision if near clip is low. */
corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
deleted file mode 100644
index c81a94b35f3..00000000000
--- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl
+++ /dev/null
@@ -1,51 +0,0 @@
-
-/**
- * 2D Culling pass for lights.
- * We iterate over all items and check if they intersect with the tile frustum.
- */
-
-#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
-
-layout(std140) uniform lights_block
-{
- LightData lights[CULLING_ITEM_BATCH];
-};
-
-layout(std140) uniform lights_culling_block
-{
- CullingData culling;
-};
-
-in vec4 uvcoordsvar;
-
-layout(location = 0) out uvec4 out_items_bits;
-
-void main(void)
-{
- CullingTile tile = culling_tile_get(culling);
-
- out_items_bits = uvec4(0);
- ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) {
- LightData light = lights[l_idx];
-
- bool intersect_tile = true;
- switch (light.type) {
- case LIGHT_SPOT:
- /* TODO cone culling. */
- case LIGHT_RECT:
- case LIGHT_ELLIPSE:
- case LIGHT_POINT:
- Sphere sphere = Sphere(light._position, light.influence_radius_max);
- intersect_tile = culling_sphere_tile_isect(sphere, tile);
- break;
- default:
- break;
- }
-
- if (intersect_tile) {
- out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u);
- }
- }
- ITEM_FOREACH_END
-} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
new file mode 100644
index 00000000000..138e54b8bae
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersection_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_ITEM_BATCH) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) restrict buffer key_buf
+{
+ uint keys[];
+};
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= culling.items_count) {
+ return;
+ }
+
+ LightData light = lights[l_idx];
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SUN:
+ sphere = Sphere(cameraPos, ViewFar * 2.0);
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(culling.visible_count, 1);
+ keys[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
new file mode 100644
index 00000000000..dfd2c80a45a
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl
@@ -0,0 +1,138 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer and associated zbins.
+ * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins.
+ * One thread process one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+
+layout(local_size_x = CULLING_BATCH_SIZE) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) readonly restrict buffer key_buf
+{
+ uint keys[];
+};
+
+layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf
+{
+ CullingZBin out_zbins[];
+};
+
+layout(std430, binding = 4) writeonly restrict buffer out_items_buf
+{
+ LightData out_lights[];
+};
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= culling.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = culling.visible_count - 1;
+ valid_thread = false;
+ }
+
+ uint key = keys[src_index];
+ LightData light = lights[key];
+
+ if (!culling.enable_specular) {
+ light.specular_power = 0.0;
+ }
+
+ int index = 0;
+ int contenders = 0;
+
+ /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */
+ vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position;
+ float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max;
+ float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos);
+
+ int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1);
+ int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1);
+
+ if (!valid_thread) {
+ /* Do not register invalid threads. */
+ z_max = z_min - 1;
+ }
+
+ /* Fits the limit of 32KB. */
+ shared int zbin_max[CULLING_ZBIN_COUNT];
+ shared int zbin_min[CULLING_ZBIN_COUNT];
+ /* Compilers do not release shared memory from early declaration.
+ * So we are forced to reuse the same variables in another form. */
+#define z_dists zbin_max
+#define contender_table zbin_min
+
+ /**
+ * Find how many values are before the local value.
+ * This finds the first possible destination index.
+ */
+ z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist);
+ barrier();
+
+ const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE;
+ uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start);
+ for (uint i = 0; i < i_max; i++) {
+ float ref = intBitsToFloat(z_dists[i]);
+ if (ref > z_dist) {
+ index++;
+ }
+ else if (ref == z_dist) {
+ contenders++;
+ }
+ }
+
+ atomicExchange(contender_table[index], contenders);
+ barrier();
+
+ if (valid_thread) {
+ /**
+ * For each clashing index (where two lights have exactly the same z distances)
+ * we use an atomic counter to know how much to offset from the disputed index.
+ */
+ index += atomicAdd(contender_table[index], -1) - 1;
+ index += int(i_start);
+ out_lights[index] = light;
+ }
+
+ const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE);
+ const uint zbin_local = gl_LocalInvocationID.x * iter;
+ const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local;
+
+ for (uint i = 0u, l = zbin_local; i < iter; i++, l++) {
+ zbin_max[l] = 0x0000;
+ zbin_min[l] = 0xFFFF;
+ }
+ barrier();
+
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ barrier();
+
+ /* Write result to zbins buffer. */
+ for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) {
+ /* Pack min & max into 1 uint. */
+ out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]);
+ }
+}
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
new file mode 100644
index 00000000000..913e094980e
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl
@@ -0,0 +1,73 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
+#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl)
+
+layout(local_size_x = 1024) in;
+
+layout(std430, binding = 0) readonly restrict buffer lights_buf
+{
+ LightData lights[];
+};
+
+layout(std430, binding = 1) readonly restrict buffer culling_buf
+{
+ CullingData culling;
+};
+
+layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf
+{
+ CullingWord culling_words[];
+};
+
+void main(void)
+{
+ uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len);
+
+ if (tile_co.y >= culling.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = culling_tile_get(culling, tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, culling.visible_count);
+ uint word = 0u;
+
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = lights[l_idx];
+
+ bool intersect_tile;
+ switch (light.type) {
+ case LIGHT_SUN:
+ intersect_tile = true;
+ break;
+ case LIGHT_SPOT:
+ /* TODO cone culling. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ Sphere sphere = Sphere(light._position, light.influence_radius_max);
+ intersect_tile = culling_sphere_tile_isect(sphere, tile);
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx & 0x1Fu);
+ }
+ }
+
+ culling_words[gl_GlobalInvocationID.x] = word;
+} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
index 942f75961e9..14e38d6f1d6 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl
@@ -17,19 +17,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx;
uniform sampler2D reflect_color_tx;
uniform sampler2D reflect_normal_tx;
uniform sampler1D sss_transmittance_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
index 677881abd71..068db3e78fd 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl
@@ -15,24 +15,33 @@
#pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shader_shared.hh)
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
uniform sampler2D transparency_data_tx;
uniform usampler2D volume_data_tx;
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2DShadow shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
index 448e5b54886..d3d5f859174 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl
@@ -4,9 +4,10 @@
* A prototype needs to be declared before main in order to use it.
*
* The resources expected to be defined are:
- * - light_culling
- * - lights_culling_tx
* - lights
+ * - lights_zbins
+ * - light_culling
+ * - lights_culling_words
* - shadows
* - shadow_atlas_tx
* - shadow_tilemaps_tx
@@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse,
vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER);
float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x;
- ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) {
+ ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) {
LightData light = lights[l_idx];
vec3 L;
float dist;
diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
index 9723d24544c..152bfbeacec 100644
--- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl
@@ -25,19 +25,29 @@ layout(std140) uniform sampling_block
SamplingData sampling;
};
-layout(std140) uniform lights_block
+layout(std430, binding = 0) readonly restrict buffer lights_buf
{
- LightData lights[CULLING_ITEM_BATCH];
+ LightData lights[];
};
-layout(std140) uniform lights_culling_block
+layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf
+{
+ CullingZBin lights_zbins[];
+};
+
+layout(std430, binding = 2) readonly restrict buffer lights_culling_buf
{
CullingData light_culling;
};
-layout(std140) uniform shadows_block
+layout(std430, binding = 3) readonly restrict buffer lights_tile_buf
+{
+ CullingWord lights_culling_words[];
+};
+
+layout(std430, binding = 4) readonly restrict buffer shadows_buf
{
- ShadowData shadows[CULLING_ITEM_BATCH];
+ ShadowData shadows[];
};
layout(std140) uniform grids_block
@@ -75,7 +85,6 @@ layout(std140) uniform hiz_block
HiZData hiz;
};
-uniform usampler2D lights_culling_tx;
uniform sampler2DArray utility_tx;
uniform sampler2D shadow_atlas_tx;
uniform usampler2D shadow_tilemaps_tx;
diff --git a/source/blender/draw/intern/shaders/common_intersection_lib.glsl b/source/blender/draw/intern/shaders/common_intersection_lib.glsl
index a2151ecf7a7..c8a97808247 100644
--- a/source/blender/draw/intern/shaders/common_intersection_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_intersection_lib.glsl
@@ -66,7 +66,6 @@ bool intersect_view(Pyramid pyramid)
*/
bool intersects = true;
-#if TEST_ENABLED
/* Do Pyramid vertices vs Frustum planes. */
for (int p = 0; p < 6 && intersects; ++p) {
bool is_any_vertex_on_positive_side = false;
@@ -79,9 +78,7 @@ bool intersect_view(Pyramid pyramid)
intersects = false;
}
}
-#endif
-#if TEST_ENABLED && FALSE_POSITIVE_REJECTION
if (intersects) {
vec4 pyramid_planes[5];
planes_setup(pyramid, pyramid_planes);
@@ -98,7 +95,6 @@ bool intersect_view(Pyramid pyramid)
}
}
}
-#endif
#if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT)
drw_debug(pyramid, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1));
@@ -110,7 +106,6 @@ bool intersect_view(Box box)
{
bool intersects = true;
-#if TEST_ENABLED
/* Do Box vertices vs Frustum planes. */
for (int p = 0; p < 6 && intersects; ++p) {
bool is_any_vertex_on_positive_side = false;
@@ -123,9 +118,7 @@ bool intersect_view(Box box)
intersects = false;
}
}
-#endif
-#if TEST_ENABLED && FALSE_POSITIVE_REJECTION
if (intersects) {
vec4 box_planes[6];
planes_setup(box, box_planes);
@@ -142,11 +135,31 @@ bool intersect_view(Box box)
}
}
}
+
+#if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT)
+ if (intersects) {
+ drw_debug(box, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1));
+ }
#endif
+ return intersects;
+}
+
+bool intersect_view(Sphere sph)
+{
+ bool intersects = true;
+
+ for (int p = 0; p < 6 && intersects; ++p) {
+ float dist_to_plane = dot(frustum_planes[p], vec4(sph.center, 1.0));
+ if (dist_to_plane < -sph.radius) {
+ intersects = false;
+ }
+ }
+
+ /* TODO reject false positive. */
#if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT)
if (intersects) {
- drw_debug(box, vec4(0, 1, 0, 1));
+ drw_debug(sph, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1));
}
#endif
return intersects;
diff --git a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
index 7a58a82eb2b..6c6ee599168 100644
--- a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
@@ -139,6 +139,11 @@ vec3 world_to_tangent(vec3 vector, vec3 N, vec3 T, vec3 B)
/** \name Shapes
* \{ */
+struct Sphere {
+ vec3 center;
+ float radius;
+};
+
struct Box {
vec3 corners[8];
};
@@ -169,6 +174,12 @@ void drw_debug(Pyramid shape, vec4 color)
drw_debug_quad(shape.corners[1], shape.corners[2], shape.corners[3], shape.corners[4], color);
}
+void drw_debug(Sphere shape, vec4 color)
+{
+ /* TODO(fclem): Counld be better. */
+ drw_debug_point(shape.center, shape.radius, color);
+}
+
#endif
/** \} */
diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl
index f483d55ef97..44596dd5b8d 100644
--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@@ -122,6 +122,11 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu
#define in_range_exclusive(val, min_v, max_v) \
(all(greaterThan(val, min_v)) && all(lessThan(val, max_v)))
+uint divide_ceil_u(uint visible_count, uint divisor)
+{
+ return (visible_count + (divisor - 1)) / divisor;
+}
+
float distance_squared(vec2 a, vec2 b)
{
a -= b;