From 1b00ca35758dacf7ece7b95275ea3c41e53bec6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Tue, 23 Nov 2021 21:24:00 +0100 Subject: EEVEE: Light: Port light culling to GPU This removes the light count limit for the forward shaded object. This also provides a more efficient way of computing the culling directly on the GPU. Moreover, this avoids doing multiple lighting passes for high light counts in the deferred pipeline, improving performance. --- source/blender/draw/CMakeLists.txt | 5 +- source/blender/draw/engines/eevee/eevee_culling.cc | 68 ----- source/blender/draw/engines/eevee/eevee_culling.hh | 338 --------------------- .../blender/draw/engines/eevee/eevee_instance.cc | 3 + .../blender/draw/engines/eevee/eevee_instance.hh | 2 + source/blender/draw/engines/eevee/eevee_light.cc | 221 +++++++++----- source/blender/draw/engines/eevee/eevee_light.hh | 105 +++---- source/blender/draw/engines/eevee/eevee_lookdev.cc | 8 +- source/blender/draw/engines/eevee/eevee_shader.cc | 8 +- source/blender/draw/engines/eevee/eevee_shader.hh | 4 +- .../draw/engines/eevee/eevee_shader_shared.hh | 147 +++++---- source/blender/draw/engines/eevee/eevee_shading.cc | 67 +--- source/blender/draw/engines/eevee/eevee_shading.hh | 17 +- source/blender/draw/engines/eevee/eevee_shadow.cc | 32 +- source/blender/draw/engines/eevee/eevee_view.cc | 14 +- source/blender/draw/engines/eevee/eevee_view.hh | 1 - source/blender/draw/engines/eevee/eevee_wrapper.hh | 141 ++++++++- .../eevee/shaders/eevee_culling_debug_frag.glsl | 31 +- .../eevee/shaders/eevee_culling_iter_lib.glsl | 61 ++-- .../engines/eevee/shaders/eevee_culling_lib.glsl | 32 +- .../eevee/shaders/eevee_culling_light_frag.glsl | 51 ---- .../eevee/shaders/eevee_culling_select_comp.glsl | 57 ++++ .../eevee/shaders/eevee_culling_sort_comp.glsl | 138 +++++++++ .../eevee/shaders/eevee_culling_tile_comp.glsl | 73 +++++ .../eevee/shaders/eevee_deferred_direct_frag.glsl | 21 +- .../eevee/shaders/eevee_deferred_volume_frag.glsl | 21 +- .../eevee/shaders/eevee_light_eval_lib.glsl | 7 +- .../eevee/shaders/eevee_surface_forward_frag.glsl | 21 +- .../intern/shaders/common_intersection_lib.glsl | 29 +- .../draw/intern/shaders/common_math_geom_lib.glsl | 11 + .../draw/intern/shaders/common_math_lib.glsl | 5 + 31 files changed, 860 insertions(+), 879 deletions(-) delete mode 100644 source/blender/draw/engines/eevee/eevee_culling.cc delete mode 100644 source/blender/draw/engines/eevee/eevee_culling.hh delete mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl create mode 100644 source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 27fcf32915c..5d80c7bf36e 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -132,7 +132,6 @@ set(SRC engines/image/image_engine.c engines/image/image_shader.c engines/eevee/eevee_camera.cc - engines/eevee/eevee_culling.cc engines/eevee/eevee_depth_of_field.cc engines/eevee/eevee_engine.c engines/eevee/eevee_engine.cc @@ -270,7 +269,9 @@ data_to_c_simple(engines/eevee/shaders/eevee_cubemap_lib.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_culling_debug_frag.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_culling_iter_lib.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_culling_lib.glsl SRC) -data_to_c_simple(engines/eevee/shaders/eevee_culling_light_frag.glsl SRC) +data_to_c_simple(engines/eevee/shaders/eevee_culling_select_comp.glsl SRC) +data_to_c_simple(engines/eevee/shaders/eevee_culling_sort_comp.glsl SRC) +data_to_c_simple(engines/eevee/shaders/eevee_culling_tile_comp.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_deferred_direct_frag.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_deferred_holdout_frag.glsl SRC) data_to_c_simple(engines/eevee/shaders/eevee_deferred_transparent_frag.glsl SRC) diff --git a/source/blender/draw/engines/eevee/eevee_culling.cc b/source/blender/draw/engines/eevee/eevee_culling.cc deleted file mode 100644 index c54c7fa9320..00000000000 --- a/source/blender/draw/engines/eevee/eevee_culling.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright 2021, Blender Foundation. - */ - -/** \file - * \ingroup eevee - * - * A culling object is a data structure that contains fine grained culling - * of entities against in the whole view frustum. The Culling structure contains the - * final entity list since it has to have a special order. - * - * Follows the principles of Tiled Culling + Z binning from: - * "Improved Culling for Tiled and Clustered Rendering" - * by Michal Drobot - * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf - */ - -#include "eevee_instance.hh" - -#include "eevee_culling.hh" - -namespace blender::eevee { - -/* -------------------------------------------------------------------- */ -/** \name CullingDebugPass - * \{ */ - -void CullingDebugPass::sync(void) -{ - LightModule &lights = inst_.lights; - - debug_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR); - - GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG); - DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_); - DRW_shgroup_call_procedural_triangles(grp, nullptr, 1); -} - -void CullingDebugPass::render(GPUTexture *input_depth_tx) -{ - input_depth_tx_ = input_depth_tx; - - inst_.lights.bind_batch(0); - - DRW_draw_pass(debug_ps_); -} - -/** \} */ - -} // namespace blender::eevee \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/eevee_culling.hh b/source/blender/draw/engines/eevee/eevee_culling.hh deleted file mode 100644 index 976c30a1efb..00000000000 --- a/source/blender/draw/engines/eevee/eevee_culling.hh +++ /dev/null @@ -1,338 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright 2021, Blender Foundation. - */ - -/** \file - * \ingroup eevee - * - * A culling object is a data structure that contains fine grained culling - * of entities against in the whole view frustum. The Culling structure contains the - * final entity list since it has to have a special order. - * - * Follows the principles of Tiled Culling + Z binning from: - * "Improved Culling for Tiled and Clustered Rendering" - * by Michal Drobot - * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf - */ - -#pragma once - -#include "DRW_render.h" - -#include "BLI_vector.hh" - -#include "eevee_shader_shared.hh" - -namespace blender::eevee { - -class Instance; - -/* -------------------------------------------------------------------- */ -/** \name CullingBatch - * \{ */ - -/** - * Do not use directly. Use Culling object instead. - */ -template< - /* Type of data contained per culling batch. */ - typename Tdata> -class CullingBatch { - public: - /** Z ordered items. */ - Tdata item_data; - - private: - /* Items to order in Z. */ - struct ItemHandle { - /** Index inside item_source_. */ - uint32_t source_index; - /** Signed Z distance along camera Z axis. */ - float z_dist; - /** Item radius. */ - float radius; - }; - - /** Compact handle list to order without moving source. */ - Vector item_handles_; - /** Z bins. */ - CullingDataBuf culling_data_; - /** Tile texture and framebuffer handling the 2D culling. */ - eevee::Texture tiles_tx_ = Texture("culling_tx_"); - eevee::Framebuffer tiles_fb_; - - public: - CullingBatch(){}; - ~CullingBatch(){}; - - void init(const ivec2 &extent) - { - item_handles_.clear(); - - uint tile_size = 8; - - uint res[2] = {divide_ceil_u(extent.x, tile_size), divide_ceil_u(extent.y, tile_size)}; - - tiles_tx_.ensure(UNPACK2(res), 1, GPU_RGBA32UI); - - culling_data_.tile_size = tile_size; - for (int i = 0; i < 2; i++) { - culling_data_.tile_to_uv_fac[i] = tile_size / (float)extent[i]; - } - - // tiles_tx_.ensure(1, 1, 1, GPU_RGBA32UI); - // uvec4 no_2D_culling = {UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX}; - // GPU_texture_update(tiles_tx_, GPU_DATA_UINT, no_2D_culling); - - tiles_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(tiles_tx_)); - } - - void set_empty(void) - { - init_min_max(); - culling_data_.push_update(); - } - - void insert(int32_t index, float z_dist, float radius) - { - ItemHandle handle = {(uint32_t)index, z_dist, radius}; - item_handles_.append(handle); - } - - template - void finalize(float near_z, - float far_z, - const DataAppendF &data_append, - const CullingF &draw_culling) - { - culling_data_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z); - culling_data_.zbin_bias = -near_z * culling_data_.zbin_scale; - - /* Order items by Z distance to the camera. */ - auto sort = [](const ItemHandle &a, const ItemHandle &b) { return a.z_dist > b.z_dist; }; - std::sort(item_handles_.begin(), item_handles_.end(), sort); - - init_min_max(); - /* Fill the GPU data buffer. */ - for (auto item_idx : item_handles_.index_range()) { - ItemHandle &handle = item_handles_[item_idx]; - data_append(item_data, item_idx, handle.source_index); - /* Register to Z bins. */ - int z_min = max_ii(culling_z_to_zbin(culling_data_, handle.z_dist + handle.radius), 0); - int z_max = min_ii(culling_z_to_zbin(culling_data_, handle.z_dist - handle.radius), - CULLING_ZBIN_COUNT - 1); - for (auto z : IndexRange(z_min, z_max - z_min + 1)) { - BLI_assert(z >= 0 && z < CULLING_ZBIN_COUNT); - uint16_t(&zbin_minmax)[2] = ((uint16_t(*)[2])culling_data_.zbins)[z]; - if (item_idx < zbin_minmax[0]) { - zbin_minmax[0] = (uint16_t)item_idx; - } - if (item_idx > zbin_minmax[1]) { - zbin_minmax[1] = (uint16_t)item_idx; - } - } - } - /* Set item count for no-cull iterator. */ - culling_data_.items_count = item_handles_.size(); - /* Upload data to GPU. */ - culling_data_.push_update(); - - GPU_framebuffer_bind(tiles_fb_); - - draw_culling(item_data, culling_data_); - } - - /** - * Getters - **/ - bool is_full(void) - { - return item_handles_.size() == CULLING_ITEM_BATCH; - } - const GPUUniformBuf *culling_ubo_get(void) const - { - return culling_data_.ubo_get(); - } - uint items_count_get(void) const - { - return culling_data_.items_count; - } - GPUTexture *culling_texture_get(void) const - { - return tiles_tx_; - } - - private: - void init_min_max(void) - { - /* Init min-max for each bin. */ - for (auto i : IndexRange(CULLING_ZBIN_COUNT)) { - uint16_t *zbin_minmax = (uint16_t *)culling_data_.zbins; - zbin_minmax[i * 2 + 0] = CULLING_ITEM_BATCH - 1; - zbin_minmax[i * 2 + 1] = 0; - } - culling_data_.items_count = 0; - } -}; - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Culling - * \{ */ - -template -class Culling { - private: - using CullingBatchType = CullingBatch; - /** Multiple culling batches containing at most CULLING_ITEM_BATCH items worth of data. */ - Vector batches_; - /** Number of active batches. Allocated count may be higher. */ - int used_batch_count_; - /** Pointer to the active batch being filled. */ - CullingBatchType *active_batch_; - /** Used to get Z distance. */ - vec3 camera_z_axis_; - float camera_z_offset_; - /** View for which the culling is computed. */ - const DRWView *view_; - /** View resolution. */ - ivec2 extent_ = ivec2(0); - - public: - Culling(){}; - ~Culling() - { - for (CullingBatchType *batch : batches_) { - delete batch; - } - } - - void set_view(const DRWView *view, const ivec2 extent) - { - view_ = view; - extent_ = extent; - - float viewinv[4][4]; - DRW_view_viewmat_get(view, viewinv, true); - - camera_z_axis_ = viewinv[2]; - camera_z_offset_ = -vec3::dot(camera_z_axis_, viewinv[3]); - - if (batches_.size() == 0) { - batches_.append(new CullingBatchType()); - } - - used_batch_count_ = 1; - active_batch_ = batches_[0]; - active_batch_->init(extent_); - } - - /* Cull every items. Do not reset the batches to avoid freeing the vectors' memory. */ - void set_empty(void) - { - if (extent_.x == 0) { - extent_ = ivec2(1); - } - - if (batches_.size() == 0) { - batches_.append(new CullingBatchType()); - - active_batch_ = batches_[0]; - active_batch_->init(extent_); - } - - active_batch_ = batches_[0]; - active_batch_->set_empty(); - } - - /* Returns true if we cannot add any more items. - * In this case, the caller is expected to not try to insert another item. */ - bool insert(int32_t index, BoundSphere &bsphere) - { - if (!DRW_culling_sphere_test(view_, &bsphere)) { - return false; - } - - if (active_batch_->is_full()) { - BLI_assert(is_extendable); - /* TODO(fclem) degrow vector of batches. */ - if (batches_.size() < (used_batch_count_ + 1)) { - batches_.append(new CullingBatchType()); - } - active_batch_ = batches_[used_batch_count_]; - active_batch_->init(extent_); - used_batch_count_++; - } - - float z_dist = vec3::dot(bsphere.center, camera_z_axis_) + camera_z_offset_; - active_batch_->insert(index, z_dist, bsphere.radius); - - return active_batch_->is_full(); - } - - template - void finalize(const DataAppendF &data_append, const CullingF &draw_culling) - { - float near_z = DRW_view_near_distance_get(view_); - float far_z = DRW_view_far_distance_get(view_); - - for (auto i : IndexRange(used_batch_count_)) { - batches_[i]->finalize(near_z, far_z, data_append, draw_culling); - } - } - - /** - * Getters - **/ - const CullingBatchType *operator[](int64_t index) const - { - return batches_[index]; - } - IndexRange index_range(void) const - { - return IndexRange(used_batch_count_); - } -}; - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name CullingDebugPass - * \{ */ - -class CullingDebugPass { - private: - Instance &inst_; - - GPUTexture *input_depth_tx_ = nullptr; - - DRWPass *debug_ps_ = nullptr; - - public: - CullingDebugPass(Instance &inst) : inst_(inst){}; - - void sync(void); - void render(GPUTexture *input_depth_tx); -}; - -/** \} */ - -} // namespace blender::eevee \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/eevee_instance.cc b/source/blender/draw/engines/eevee/eevee_instance.cc index 63c9c690180..f614d4b4a97 100644 --- a/source/blender/draw/engines/eevee/eevee_instance.cc +++ b/source/blender/draw/engines/eevee/eevee_instance.cc @@ -22,6 +22,7 @@ * An instance contains all structures needed to do a complete render. */ +#include "BKE_global.h" #include "BKE_object.h" #include "BLI_rect.h" #include "DEG_depsgraph_query.h" @@ -63,6 +64,8 @@ void Instance::init(const ivec2 &output_res, rv3d = rv3d_; baking_probe = light_probe_; + debug_mode = (eDebugMode)G.debug_value; + update_eval_members(); rcti render_border = output_crop(output_res, output_rect); diff --git a/source/blender/draw/engines/eevee/eevee_instance.hh b/source/blender/draw/engines/eevee/eevee_instance.hh index 09548ca504d..3079f931231 100644 --- a/source/blender/draw/engines/eevee/eevee_instance.hh +++ b/source/blender/draw/engines/eevee/eevee_instance.hh @@ -99,6 +99,8 @@ class Instance { /** Can be null. Used to exclude objects during baking. */ const struct LightProbe *baking_probe = nullptr; + eDebugMode debug_mode = SHADOW_DEBUG_NONE; + /* Info string displayed at the top of the render / viewport. */ char info[64]; diff --git a/source/blender/draw/engines/eevee/eevee_light.cc b/source/blender/draw/engines/eevee/eevee_light.cc index b80954e2195..d73d650d78a 100644 --- a/source/blender/draw/engines/eevee/eevee_light.cc +++ b/source/blender/draw/engines/eevee/eevee_light.cc @@ -284,10 +284,10 @@ void LightModule::sync_light(const Object *ob, ObjectHandle &handle) void LightModule::end_sync(void) { - lights_refs_.clear(); - Vector deleted_keys; + light_refs_.clear(); + /* Detect light deletion. */ for (auto item : lights_.items()) { Light &light = item.value; @@ -297,7 +297,7 @@ void LightModule::end_sync(void) } else { light.used = false; - lights_refs_.append(&light); + light_refs_.append(&light); } } @@ -308,110 +308,171 @@ void LightModule::end_sync(void) lights_.remove(key); } + if (light_refs_.size() > CULLING_MAX_ITEM) { + /* TODO(fclem) Print error to user. */ + light_refs_.resize(CULLING_MAX_ITEM); + } + + batch_len_ = divide_ceil_u(max_ii(light_refs_.size(), 1), CULLING_BATCH_SIZE); + lights_data.resize(batch_len_ * CULLING_BATCH_SIZE); + shadows_data.resize(batch_len_ * CULLING_BATCH_SIZE); + culling_key_buf.resize(batch_len_ * CULLING_BATCH_SIZE); + culling_light_buf.resize(batch_len_ * CULLING_BATCH_SIZE); + culling_zbin_buf.resize(batch_len_ * CULLING_ZBIN_COUNT); + culling_data.items_count = light_refs_.size(); + culling_data.tile_word_len = divide_ceil_u(max_ii(culling_data.items_count, 1), 32); + /* Call shadows.end_sync after light pruning to avoid packing deleted shadows. */ inst_.shadows.end_sync(); + + for (auto l_idx : light_refs_.index_range()) { + Light &light = *light_refs_[l_idx]; + lights_data[l_idx] = light; + lights_data[l_idx].shadow_id = LIGHT_NO_SHADOW; + + if (light.shadow_id != LIGHT_NO_SHADOW) { + if (light.type == LIGHT_SUN) { + shadows_data[l_idx] = this->inst_.shadows.directionals[light.shadow_id]; + } + else { + shadows_data[l_idx] = this->inst_.shadows.punctuals[light.shadow_id]; + } + } + } + + lights_data.push_update(); + shadows_data.push_update(); + + { + culling_ps_ = DRW_pass_create("CullingLight", (DRWState)0); + + uint lights_len = light_refs_.size(); + uint batch_len = divide_ceil_u(lights_len, CULLING_BATCH_SIZE); + + if (batch_len > 0) { + /* NOTE: We reference the buffers that may be resized or updated later. */ + { + GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SELECT); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_); + DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data); + DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data); + DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf); + DRW_shgroup_call_compute(grp, batch_len, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } + { + GPUShader *sh = inst_.shaders.static_shader_get(CULLING_SORT); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_); + DRW_shgroup_vertex_buffer(grp, "lights_buf", lights_data); + DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data); + DRW_shgroup_vertex_buffer(grp, "key_buf", culling_key_buf); + DRW_shgroup_vertex_buffer_ref(grp, "out_zbins_buf", &culling_zbin_buf); + DRW_shgroup_vertex_buffer_ref(grp, "out_items_buf", &culling_light_buf); + DRW_shgroup_call_compute(grp, batch_len, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } + { + GPUShader *sh = inst_.shaders.static_shader_get(CULLING_TILE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_); + DRW_shgroup_vertex_buffer(grp, "lights_buf", culling_light_buf); + DRW_shgroup_vertex_buffer_ref(grp, "culling_buf", &culling_data); + DRW_shgroup_vertex_buffer_ref(grp, "culling_tile_buf", &culling_tile_buf); + DRW_shgroup_call_compute_ref(grp, culling_tile_dispatch_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); + } + } + } + + debug_end_sync(); } -/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */ -void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular) +void LightModule::debug_end_sync(void) { - if (extent.x == 0) { - culling_.set_empty(); + if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) { + debug_draw_ps_ = nullptr; return; } - culling_.set_view(view, extent); - - for (auto light_id : lights_refs_.index_range()) { - Light &light = *lights_refs_[light_id]; + debug_draw_ps_ = DRW_pass_create("CullingDebug", DRW_STATE_WRITE_COLOR); - BoundSphere bsphere; - if (light.type == LIGHT_SUN) { - /* Make sun lights cover the whole frustum. */ - float viewinv[4][4]; - DRW_view_viewmat_get(view, viewinv, true); - copy_v3_v3(bsphere.center, viewinv[3]); - bsphere.radius = fabsf(DRW_view_far_distance_get(view)); - } - else { - /* TODO(fclem) fit cones better. */ - copy_v3_v3(bsphere.center, light._position); - bsphere.radius = light.influence_radius_max; - } + GPUShader *sh = inst_.shaders.static_shader_get(CULLING_DEBUG); + DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_); + DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf); + DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data); + DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf); + DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf); + DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_); + DRW_shgroup_call_procedural_triangles(grp, nullptr, 1); +} - culling_.insert(light_id, bsphere); - } +/* Compute acceleration structure for the given view. If extent is 0, bind no lights. */ +void LightModule::set_view(const DRWView *view, const ivec2 extent, bool enable_specular) +{ + const bool no_lights = (extent.x == 0); - DRW_view_set_active(view); + /* Target 1bit per pixel. */ + uint tile_size = 1u << log2_ceil_u(ceil(sqrtf(culling_data.tile_word_len * 32))); - /* This is only called if the light is visible under this view. */ - auto data_copy = [&](LightBatch &light_batch, uint32_t dst_index, uint32_t src_index) { - Light &light = *this->lights_refs_[src_index]; - LightData &dst = light_batch.lights_data[dst_index]; + int3 tiles_extent; + tiles_extent.x = divide_ceil_u(extent.x, tile_size); + tiles_extent.y = divide_ceil_u(extent.y, tile_size); + tiles_extent.z = batch_len_; - dst = light; - if (!enable_specular) { - dst.specular_power = 0.0f; - } + float far_z = DRW_view_far_distance_get(view); + float near_z = DRW_view_near_distance_get(view); - if (light.shadow_id != LIGHT_NO_SHADOW) { - ShadowData &shadow_dst = light_batch.shadows_data[dst_index]; - if (light.type == LIGHT_SUN) { - shadow_dst = this->inst_.shadows.directionals[light.shadow_id]; - } - else { - shadow_dst = this->inst_.shadows.punctuals[light.shadow_id]; - } - } - }; + culling_data.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z); + culling_data.zbin_bias = -near_z * culling_data.zbin_scale; + culling_data.tile_size = tile_size; + culling_data.tile_x_len = tiles_extent.x; + culling_data.tile_y_len = tiles_extent.y; + culling_data.tile_to_uv_fac = tile_size / float2(UNPACK2(extent)); - /* Called for each batch. Do 2D gpu culling. */ - auto culling_func = [&](LightBatch &light_batch, CullingDataBuf &culling_data) { - LightDataBuf &lights_data = light_batch.lights_data; - ShadowDataBuf &shadows_data = light_batch.shadows_data; - lights_data.push_update(); - shadows_data.push_update(); + culling_data.enable_specular = enable_specular; + culling_data.items_count = no_lights ? 0 : light_refs_.size(); + culling_data.visible_count = 0; + culling_data.push_update(); - this->inst_.shading_passes.light_culling.render(lights_data.ubo_get(), culling_data.ubo_get()); - }; + if (no_lights) { + return; + } - culling_.finalize(data_copy, culling_func); + uint word_count = tiles_extent.x * tiles_extent.y * tiles_extent.z * culling_data.tile_word_len; - inst_.shadows.update_visible(view); -} + /* TODO(fclem) Only resize once per redraw. */ + culling_tile_buf.resize(word_count); -void LightModule::bind_batch(int batch_index) -{ - active_batch_ = batch_index; - auto &batch = *culling_[batch_index]; - active_lights_ubo_ = batch.item_data.lights_data.ubo_get(); - active_shadows_ubo_ = batch.item_data.shadows_data.ubo_get(); - active_culling_ubo_ = batch.culling_ubo_get(); - active_culling_tx_ = batch.culling_texture_get(); -} + culling_tile_dispatch_size_.x = divide_ceil_u(word_count, 1024); + culling_tile_dispatch_size_.y = 1; + culling_tile_dispatch_size_.z = 1; -/** \} */ + DRW_view_set_active(view); + DRW_draw_pass(culling_ps_); -/* -------------------------------------------------------------------- */ -/** \name CullingPass - * \{ */ + inst_.shadows.update_visible(view); +} -void CullingLightPass::sync(void) +void LightModule::debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz) { - culling_ps_ = DRW_pass_create("CullingLight", DRW_STATE_WRITE_COLOR); + if (debug_draw_ps_ == nullptr) { + return; + } + input_depth_tx_ = hiz.texture_get(); - GPUShader *sh = inst_.shaders.static_shader_get(CULLING_LIGHT); - DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", &lights_ubo_); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", &culling_ubo_); - DRW_shgroup_call_procedural_triangles(grp, nullptr, 1); + GPU_framebuffer_bind(view_fb); + DRW_draw_pass(debug_draw_ps_); } -void CullingLightPass::render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo) +void LightModule::shgroup_resources(DRWShadingGroup *grp) { - lights_ubo_ = lights_ubo; - culling_ubo_ = culling_ubo; - DRW_draw_pass(culling_ps_); + DRW_shgroup_vertex_buffer_ref(grp, "lights_buf", &culling_light_buf); + DRW_shgroup_vertex_buffer_ref(grp, "lights_culling_buf", &culling_data); + DRW_shgroup_vertex_buffer_ref(grp, "lights_zbins_buf", &culling_zbin_buf); + DRW_shgroup_vertex_buffer_ref(grp, "lights_tile_buf", &culling_tile_buf); + + DRW_shgroup_vertex_buffer_ref(grp, "shadows_buf", &shadows_data); + DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get()); + DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get()); } /** \} */ diff --git a/source/blender/draw/engines/eevee/eevee_light.hh b/source/blender/draw/engines/eevee/eevee_light.hh index 36bacf9ac8a..254d9231eef 100644 --- a/source/blender/draw/engines/eevee/eevee_light.hh +++ b/source/blender/draw/engines/eevee/eevee_light.hh @@ -29,7 +29,6 @@ #include "DNA_light_types.h" #include "eevee_camera.hh" -#include "eevee_culling.hh" #include "eevee_id_map.hh" #include "eevee_sampling.hh" #include "eevee_shader.hh" @@ -71,27 +70,6 @@ struct Light : public LightData { /** \} */ -/* -------------------------------------------------------------------- */ -/** \name CullingPass - * \{ */ - -class CullingLightPass { - private: - Instance &inst_; - - DRWPass *culling_ps_ = nullptr; - const GPUUniformBuf *lights_ubo_ = nullptr; - const GPUUniformBuf *culling_ubo_ = nullptr; - - public: - CullingLightPass(Instance &inst) : inst_(inst){}; - - void sync(void); - void render(const GPUUniformBuf *lights_ubo, const GPUUniformBuf *culling_ubo); -}; - -/** \} */ - /* -------------------------------------------------------------------- */ /** \name LightModule * \{ */ @@ -102,30 +80,47 @@ class CullingLightPass { class LightModule { friend ShadowModule; + public: + /** Scene lights data. */ + LightDataBuf lights_data; + /** Shadow data. TODO(fclem): merge with lights_data. */ + ShadowDataBuf shadows_data; + /** Culling infos. */ + CullingDataBuf culling_data; + /** Key buffer containing only visible lights indices. */ + CullingKeyBuf culling_key_buf; + /** LightData buffer used for rendering. Ordered by the culling phase. */ + CullingLightBuf culling_light_buf; + /** Zbins containing min and max light index for each Z bin. */ + CullingZbinBuf culling_zbin_buf; + /** Bitmap of lights touching each tiles. Using one layer for each culling batch. */ + CullingTileBuf culling_tile_buf; + private: Instance &inst_; /** Map of light objects. This is used to track light deletion. */ Map lights_; - /** References to data in lights_ for easy indexing. */ - Vector lights_refs_; - /** Batches of lights alongside their culling data. */ - struct LightBatch { - LightDataBuf lights_data; - ShadowDataBuf shadows_data; - }; - Culling culling_; - /** Active data pointers used for rendering. */ - const GPUUniformBuf *active_lights_ubo_; - const GPUUniformBuf *active_shadows_ubo_; - const GPUUniformBuf *active_culling_ubo_; - GPUTexture *active_culling_tx_; - int active_batch_ = 0; + + Vector light_refs_; + + /** Follows the principles of Tiled Culling + Z binning from: + * "Improved Culling for Tiled and Clustered Rendering" + * by Michal Drobot + * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf */ + DRWPass *culling_ps_ = nullptr; + int3 culling_tile_dispatch_size_ = int3(1); + /* Number of batches of lights that are separately processed. */ + int batch_len_ = 1; float light_threshold_; + /** Debug Culling visualization. */ + DRWPass *debug_draw_ps_ = nullptr; + GPUTexture *input_depth_tx_ = nullptr; + public: - LightModule(Instance &inst) : inst_(inst), culling_(){}; + LightModule(Instance &inst) : inst_(inst){}; ~LightModule(){}; void begin_sync(void); @@ -134,40 +129,10 @@ class LightModule { void set_view(const DRWView *view, const ivec2 extent, bool enable_specular = true); - void bind_batch(int range_id); + void shgroup_resources(DRWShadingGroup *grp); - /** - * Getters - **/ - const GPUUniformBuf **lights_ubo_ref_get(void) - { - return &active_lights_ubo_; - } - const GPUUniformBuf **shadows_ubo_ref_get(void) - { - return &active_shadows_ubo_; - } - const GPUUniformBuf **culling_ubo_ref_get(void) - { - return &active_culling_ubo_; - } - /** Returns the active Span of lights that passed the culling test. */ - Span lights_get(void) const - { - const auto &batch = *culling_[active_batch_]; - Span span = batch.item_data.lights_data; - return span.take_front(batch.items_count_get()); - } - GPUTexture **culling_tx_ref_get(void) - { - return &active_culling_tx_; - } - /* Return a range iterator to loop over all lights. - * In practice, we render with light in waves of LIGHT_MAX lights at a time. */ - IndexRange index_range(void) const - { - return culling_.index_range(); - } + void debug_end_sync(void); + void debug_draw(GPUFrameBuffer *view_fb, HiZBuffer &hiz); }; /** \} */ diff --git a/source/blender/draw/engines/eevee/eevee_lookdev.cc b/source/blender/draw/engines/eevee/eevee_lookdev.cc index 84c77130b05..e5633ac47ee 100644 --- a/source/blender/draw/engines/eevee/eevee_lookdev.cc +++ b/source/blender/draw/engines/eevee/eevee_lookdev.cc @@ -302,18 +302,13 @@ void LookDev::sync_overlay(void) GPUMaterial *gpumat = inst_.shaders.material_shader_get( mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_LOOKDEV, false); DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, overlay_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get()); DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get()); DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get()); DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); - DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); - DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get()); offset.x -= sphere_size_ + sphere_margin; @@ -340,7 +335,6 @@ void LookDev::render_overlay(GPUFrameBuffer *fb) inst_.lightprobes.set_view(active_view, ivec2(0)); inst_.lights.set_view(active_view, ivec2(0)); - inst_.lights.bind_batch(0); /* Create subview for correct shading. Sub because we don not care about culling. */ const CameraData &cam = inst_.camera.data_get(); diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc index f802303036f..d1d6e50d5d8 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.cc +++ b/source/blender/draw/engines/eevee/eevee_shader.cc @@ -50,7 +50,9 @@ extern char datatoc_eevee_cubemap_lib_glsl[]; extern char datatoc_eevee_culling_debug_frag_glsl[]; extern char datatoc_eevee_culling_iter_lib_glsl[]; extern char datatoc_eevee_culling_lib_glsl[]; -extern char datatoc_eevee_culling_light_frag_glsl[]; +extern char datatoc_eevee_culling_select_comp_glsl[]; +extern char datatoc_eevee_culling_sort_comp_glsl[]; +extern char datatoc_eevee_culling_tile_comp_glsl[]; extern char datatoc_eevee_deferred_direct_frag_glsl[]; extern char datatoc_eevee_deferred_holdout_frag_glsl[]; extern char datatoc_eevee_deferred_transparent_frag_glsl[]; @@ -236,7 +238,9 @@ ShaderModule::ShaderModule() #define SHADER_FULLSCREEN(enum_, frag_) SHADER_FULLSCREEN_DEFINES(enum_, frag_, nullptr) SHADER_FULLSCREEN(CULLING_DEBUG, eevee_culling_debug_frag); - SHADER_FULLSCREEN(CULLING_LIGHT, eevee_culling_light_frag); + SHADER_COMPUTE(CULLING_SELECT, eevee_culling_select_comp, nullptr); + SHADER_COMPUTE(CULLING_SORT, eevee_culling_sort_comp, nullptr); + SHADER_COMPUTE(CULLING_TILE, eevee_culling_tile_comp, nullptr); SHADER_FULLSCREEN(FILM_FILTER, eevee_film_filter_frag); SHADER_FULLSCREEN(FILM_RESOLVE, eevee_film_resolve_frag); SHADER_FULLSCREEN(FILM_RESOLVE_DEPTH, eevee_film_resolve_depth_frag); diff --git a/source/blender/draw/engines/eevee/eevee_shader.hh b/source/blender/draw/engines/eevee/eevee_shader.hh index f9d4fe2785d..e42e49e35c3 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.hh +++ b/source/blender/draw/engines/eevee/eevee_shader.hh @@ -40,7 +40,9 @@ namespace blender::eevee { /* Keep alphabetical order and clean prefix. */ enum eShaderType { CULLING_DEBUG = 0, - CULLING_LIGHT, + CULLING_SELECT, + CULLING_SORT, + CULLING_TILE, DEFERRED_EVAL_DIRECT, DEFERRED_EVAL_HOLDOUT, diff --git a/source/blender/draw/engines/eevee/eevee_shader_shared.hh b/source/blender/draw/engines/eevee/eevee_shader_shared.hh index 4675d9cc882..6801d4cbd59 100644 --- a/source/blender/draw/engines/eevee/eevee_shader_shared.hh +++ b/source/blender/draw/engines/eevee/eevee_shader_shared.hh @@ -174,6 +174,52 @@ BLI_STATIC_ASSERT_ALIGN(CameraData, 16) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Film + * \{ */ + +enum eDebugMode : uint32_t { + /* TODO(fclem) Rename shadow cases. */ + SHADOW_DEBUG_NONE = 0u, + /** + * Gradient showing light evaluation hotspots. + */ + DEBUG_LIGHT_CULLING = 4u, + /** + * Tilemaps to screen. Is also present in other modes. + * - Black pixels, no pages allocated. + * - Green pixels, pages cached. + * - Red pixels, pages allocated. + */ + SHADOW_DEBUG_TILEMAPS = 5u, + /** + * Random color per pages. Validates page density allocation and sampling. + */ + SHADOW_DEBUG_PAGES = 6u, + /** + * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage. + * Black means not covered by any tilemaps LOD of the shadow. + */ + SHADOW_DEBUG_LOD = 7u, + /** + * Outputs white pixels for pages allocated and black pixels for unused pages. + * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work. + */ + SHADOW_DEBUG_PAGE_ALLOCATION = 8u, + /** + * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution. + * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option. + */ + SHADOW_DEBUG_TILE_ALLOCATION = 9u, + /** + * Visualize linear depth stored in the atlas regions of the active light. + * This way, one can check if the rendering, the copying and the shadow sampling functions works. + */ + SHADOW_DEBUG_SHADOW_DEPTH = 10u +}; + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Film * \{ */ @@ -341,34 +387,44 @@ BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16) /** \name Cullings * \{ */ -/* Number of items in a culling batch. Needs to be Power of 2. */ +/* TODO(fclem) Rename this. Only used by probes now. */ #define CULLING_ITEM_BATCH 128 +/* Number of items we can cull. Limited by how we store CullingZBin. */ +#define CULLING_MAX_ITEM 65536 +/* Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536. */ +/* Current limiting factor is the sorting phase which is single pass and only sort within a + * threadgroup which maximum size is 1024. */ +#define CULLING_BATCH_SIZE 1024 /* Maximum number of 32 bit uint stored per tile. */ -#define CULLING_MAX_WORD ((CULLING_ITEM_BATCH + 1) / 32) -/* TODO(fclem) Support more than 4 words using layered texture for culling result. */ -#if CULLING_MAX_WORD > 4 -# error "CULLING_MAX_WORD is greater than supported maximum." -#endif -/* Fine grained subdivision in the Z direction. */ -#define CULLING_ZBIN_COUNT 4088 +#define CULLING_MAX_WORD (CULLING_BATCH_SIZE / 32) +/* Fine grained subdivision in the Z direction (Must be multiple of CULLING_BATCH_SIZE). */ +#define CULLING_ZBIN_COUNT 4096 struct CullingData { - /* Linearly distributed z-bins with encoded uint16_t min and max index. */ - /* NOTE: due to alignment restrictions of uint arrays, use uvec4. */ - uvec4 zbins[CULLING_ZBIN_COUNT / 4]; - /* Extent of one square tile in pixels. */ - int tile_size; - /* Valid item count in the data array. */ - uint items_count; - /* Scale and bias applied to linear Z to get zbin. */ + /** Scale applied to tile pixel coordinates to get target UV coordinate. */ + vec2 tile_to_uv_fac; + /** Scale and bias applied to linear Z to get zbin. */ float zbin_scale; float zbin_bias; - /* Scale applied to tile pixel coordinates to get target UV coordinate. */ - vec2 tile_to_uv_fac; - vec2 _pad0; + /** Valid item count in the source data array. */ + uint items_count; + /** Number of items that passes the first culling test. */ + uint visible_count; + /** Will disable specular during light data copy.. */ + bool enable_specular; + /** Extent of one square tile in pixels. */ + uint tile_size; + /** Number of tiles on the X/Y axis. */ + uint tile_x_len; + uint tile_y_len; + /** Number of word per tile. Depends on the maximum number of lights. */ + uint tile_word_len; + int _pad0; }; BLI_STATIC_ASSERT_ALIGN(CullingData, 16) -BLI_STATIC_ASSERT_SIZE(CullingData, UBO_MIN_MAX_SUPPORTED_SIZE) + +#define CullingZBin uint +#define CullingWord uint static inline int culling_z_to_zbin(CullingData data, float z) { @@ -542,41 +598,6 @@ struct ShadowTileMapData { }; BLI_STATIC_ASSERT_ALIGN(ShadowTileMapData, 16) -enum eShadowDebug : uint32_t { - SHADOW_DEBUG_NONE = 0u, - /** - * Tilemaps to screen. Is also present in other modes. - * - Black pixels, no pages allocated. - * - Green pixels, pages cached. - * - Red pixels, pages allocated. - */ - SHADOW_DEBUG_TILEMAPS = 1u, - /** - * Random color per pages. Validates page density allocation and sampling. - */ - SHADOW_DEBUG_PAGES = 2u, - /** - * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage. - * Black means not covered by any tilemaps LOD of the shadow. - */ - SHADOW_DEBUG_LOD = 3u, - /** - * Outputs white pixels for pages allocated and black pixels for unused pages. - * This needs SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED defined in order to work. - */ - SHADOW_DEBUG_PAGE_ALLOCATION = 4u, - /** - * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution. - * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option. - */ - SHADOW_DEBUG_TILE_ALLOCATION = 5u, - /** - * Visualize linear depth stored in the atlas regions of the active light. - * This way, one can check if the rendering, the copying and the shadow sampling functions works. - */ - SHADOW_DEBUG_SHADOW_DEPTH = 6u -}; - /** * Shadow data for debugging the active light shadow. */ @@ -584,7 +605,7 @@ struct ShadowDebugData { LightData light; ShadowData shadow; vec3 camera_position; - eShadowDebug type; + eDebugMode type; int tilemap_data_index; int _pad1; int _pad2; @@ -869,19 +890,23 @@ vec4 utility_tx_sample(vec2 uv, float layer); #ifdef __cplusplus using CameraDataBuf = StructBuffer; using CubemapDataBuf = StructArrayBuffer; -using CullingDataBuf = StructBuffer; +using CullingDataBuf = StorageBuffer; +using CullingKeyBuf = StorageArrayBuffer; +using CullingLightBuf = StorageArrayBuffer; +using CullingTileBuf = StorageArrayBuffer; +using CullingZbinBuf = StorageArrayBuffer; using DepthOfFieldDataBuf = StructBuffer; using GridDataBuf = StructArrayBuffer; using HiZDataBuf = StructBuffer; -using LightDataBuf = StructArrayBuffer; +using LightDataBuf = StorageArrayBuffer; using LightProbeFilterDataBuf = StructBuffer; using LightProbeInfoDataBuf = StructBuffer; using RaytraceBufferDataBuf = StructBuffer; using RaytraceDataBuf = StructBuffer; -using ShadowDataBuf = StructArrayBuffer; -using ShadowTileMapDataBuf = StorageArrayBuffer; -using ShadowPageHeapBuf = StorageArrayBuffer; +using ShadowDataBuf = StorageArrayBuffer; using ShadowDebugDataBuf = StructBuffer; +using ShadowPageHeapBuf = StorageArrayBuffer; +using ShadowTileMapDataBuf = StorageArrayBuffer; using SubsurfaceDataBuf = StructBuffer; using VelocityObjectBuf = StructBuffer; diff --git a/source/blender/draw/engines/eevee/eevee_shading.cc b/source/blender/draw/engines/eevee/eevee_shading.cc index 37501b561c8..3d802299a05 100644 --- a/source/blender/draw/engines/eevee/eevee_shading.cc +++ b/source/blender/draw/engines/eevee/eevee_shading.cc @@ -96,23 +96,17 @@ DRWShadingGroup *ForwardPass::material_opaque_add(::Material *blender_mat, GPUMa { DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_; LightModule &lights = inst_.lights; - ShadowModule &shadows = inst_.shadows; LightProbeModule &lightprobes = inst_.lightprobes; eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get()); DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get()); DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get()); DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); - DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get()); - DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get()); /* TODO(fclem): Make this only needed if material uses it ... somehow. */ if (true) { DRW_shgroup_uniform_texture_ref( @@ -143,23 +137,17 @@ DRWShadingGroup *ForwardPass::material_transparent_add(::Material *blender_mat, GPUMaterial *gpumat) { LightModule &lights = inst_.lights; - ShadowModule &shadows = inst_.shadows; LightProbeModule &lightprobes = inst_.lightprobes; eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get()); DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get()); DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get()); DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); - DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get()); - DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get()); /* TODO(fclem): Make this only needed if material uses it ... somehow. */ if (true) { DRW_shgroup_uniform_texture_ref( @@ -224,9 +212,6 @@ void ForwardPass::render(GBuffer &gbuffer, HiZBuffer &hiz, GPUFrameBuffer *view_ GPU_framebuffer_bind(view_fb); } - /* Only one batch of light is supported. */ - inst_.lights.bind_batch(0); - DRW_draw_pass(prepass_ps_); DRW_draw_pass(opaque_ps_); @@ -305,10 +290,7 @@ void DeferredLayer::volume_add(Object *ob) GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_VOLUME); DRWShadingGroup *grp = DRW_shgroup_create(sh, volume_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_texture_ref(grp, "depth_max_tx", &deferred_pass.input_depth_behind_tx_); DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); DRW_shgroup_stencil_set(grp, CLOSURE_VOLUME | CLOSURE_TRANSPARENCY, 0xFF, 0xFF); @@ -371,12 +353,8 @@ void DeferredLayer::render(GBuffer &gbuffer, // gbuffer.copy_depth_behind(); // deferred_pass.input_depth_behind_tx_ = gbuffer.depth_behind_tx; - for (auto index : inst_.lights.index_range()) { - inst_.lights.bind_batch(index); - - gbuffer.bind_volume(); - DRW_draw_pass(volume_ps_); - } + gbuffer.bind_volume(); + DRW_draw_pass(volume_ps_); } if (use_holdout) { @@ -404,22 +382,16 @@ void DeferredLayer::render(GBuffer &gbuffer, rt_buffer.resolve(CLOSURE_REFRACTION, gbuffer); } - for (auto index : inst_.lights.index_range()) { - inst_.lights.bind_batch(index); - - if (!no_volumes) { - /* TODO(fclem) volume fb. */ - GPU_framebuffer_bind(view_fb); - DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_); - } - - if (!no_surfaces) { - gbuffer.bind_radiance(); - DRW_draw_pass(deferred_pass.eval_direct_ps_); - } + if (!no_volumes) { + /* TODO(fclem) volume fb. */ + GPU_framebuffer_bind(view_fb); + DRW_draw_pass(deferred_pass.eval_volume_homogeneous_ps_); } if (!no_surfaces) { + gbuffer.bind_radiance(); + DRW_draw_pass(deferred_pass.eval_direct_ps_); + if (use_diffuse) { rt_buffer.trace(CLOSURE_DIFFUSE, gbuffer, hiz_front, hiz_front); rt_buffer.denoise(CLOSURE_DIFFUSE); @@ -469,7 +441,6 @@ void DeferredPass::sync(void) volumetric_layer_.sync(); LightModule &lights = inst_.lights; - ShadowModule &shadows = inst_.shadows; LightProbeModule &lightprobes = inst_.lightprobes; eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; @@ -479,19 +450,14 @@ void DeferredPass::sync(void) eval_direct_ps_ = DRW_pass_create("DeferredDirect", state); GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_DIRECT); DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_direct_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_block(grp, "sampling_block", inst_.sampling.ubo_get()); DRW_shgroup_uniform_block(grp, "grids_block", lightprobes.grid_ubo_get()); DRW_shgroup_uniform_block(grp, "cubes_block", lightprobes.cube_ubo_get()); DRW_shgroup_uniform_block(grp, "lightprobes_info_block", lightprobes.info_ubo_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); - DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get()); - DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get()); DRW_shgroup_uniform_texture_ref_ex( grp, "emission_data_tx", &input_emission_data_tx_, no_interp); DRW_shgroup_uniform_texture_ref_ex( @@ -535,13 +501,8 @@ void DeferredPass::sync(void) eval_volume_homogeneous_ps_ = DRW_pass_create("DeferredVolume", state); GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_EVAL_VOLUME); DRWShadingGroup *grp = DRW_shgroup_create(sh, eval_volume_homogeneous_ps_); - DRW_shgroup_uniform_block_ref(grp, "lights_block", lights.lights_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "shadows_block", lights.shadows_ubo_ref_get()); - DRW_shgroup_uniform_block_ref(grp, "lights_culling_block", lights.culling_ubo_ref_get()); - DRW_shgroup_uniform_texture_ref(grp, "lights_culling_tx", lights.culling_tx_ref_get()); + lights.shgroup_resources(grp); DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.shading_passes.utility_tx); - DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", shadows.atlas_tx_get()); - DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", shadows.tilemap_tx_get()); DRW_shgroup_uniform_texture_ref_ex( grp, "transparency_data_tx", &input_transparency_data_tx_, no_interp); DRW_shgroup_uniform_texture_ref_ex(grp, "volume_data_tx", &input_volume_data_tx_, no_interp); diff --git a/source/blender/draw/engines/eevee/eevee_shading.hh b/source/blender/draw/engines/eevee/eevee_shading.hh index 8adbecf3dd5..b84739a7220 100644 --- a/source/blender/draw/engines/eevee/eevee_shading.hh +++ b/source/blender/draw/engines/eevee/eevee_shading.hh @@ -30,7 +30,6 @@ #include "eevee_lut.h" -#include "eevee_culling.hh" #include "eevee_gbuffer.hh" #include "eevee_raytracing.hh" #include "eevee_shadow.hh" @@ -280,38 +279,24 @@ class UtilityTexture : public Texture { */ class ShadingPasses { public: - CullingLightPass light_culling; - BackgroundPass background; DeferredPass deferred; ForwardPass forward; ShadowPass shadow; VelocityPass velocity; - CullingDebugPass debug_culling; - UtilityTexture utility_tx; public: ShadingPasses(Instance &inst) - : light_culling(inst), - background(inst), - deferred(inst), - forward(inst), - shadow(inst), - velocity(inst), - debug_culling(inst){}; + : background(inst), deferred(inst), forward(inst), shadow(inst), velocity(inst){}; void sync() { - light_culling.sync(); - deferred.sync(); forward.sync(); shadow.sync(); velocity.sync(); - - debug_culling.sync(); } DRWShadingGroup *material_add(::Material *blender_mat, diff --git a/source/blender/draw/engines/eevee/eevee_shadow.cc b/source/blender/draw/engines/eevee/eevee_shadow.cc index 3c132bd9ac7..f869d9e643d 100644 --- a/source/blender/draw/engines/eevee/eevee_shadow.cc +++ b/source/blender/draw/engines/eevee/eevee_shadow.cc @@ -554,32 +554,16 @@ void ShadowModule::init(void) inst_.sampling.reset(); } - switch (G.debug_value) { - case 4: - debug_data_.type = SHADOW_DEBUG_TILEMAPS; - break; - case 5: - debug_data_.type = SHADOW_DEBUG_LOD; - break; - case 6: - debug_data_.type = SHADOW_DEBUG_PAGE_ALLOCATION; #ifndef SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED - BLI_assert_msg(0, - "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but " - "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED " - "is not defined"); -#endif - break; - case 7: - debug_data_.type = SHADOW_DEBUG_TILE_ALLOCATION; - break; - case 8: - debug_data_.type = SHADOW_DEBUG_SHADOW_DEPTH; - break; - default: - debug_data_.type = SHADOW_DEBUG_NONE; - break; + if (inst_.debug_mode == SHADOW_DEBUG_PAGE_ALLOCATION) { + BLI_assert_msg(0, + "Error: EEVEE: SHADOW_DEBUG_PAGE_ALLOCATION used but " + "SHADOW_DEBUG_PAGE_ALLOCATION_ENABLED " + "is not defined"); } +#endif + + debug_data_.type = inst_.debug_mode; memset(views_, 0, sizeof(views_)); } diff --git a/source/blender/draw/engines/eevee/eevee_view.cc b/source/blender/draw/engines/eevee/eevee_view.cc index 7b4516dd727..e868bad0259 100644 --- a/source/blender/draw/engines/eevee/eevee_view.cc +++ b/source/blender/draw/engines/eevee/eevee_view.cc @@ -109,9 +109,6 @@ void ShadingView::sync(ivec2 render_extent_) view_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_)); - /* Reuse postfx_tx_. */ - debug_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(postfx_tx_)); - gbuffer_.sync(depth_tx_, combined_tx_, owner); } } @@ -150,6 +147,7 @@ void ShadingView::render(void) inst_.shading_passes.forward.render(gbuffer_, hiz_front_, view_fb_); + inst_.lights.debug_draw(view_fb_, hiz_front_); inst_.shadows.debug_draw(view_fb_, hiz_front_); velocity_.render(depth_tx_); @@ -160,15 +158,7 @@ void ShadingView::render(void) GPUTexture *final_radiance_tx = render_post(combined_tx_); - /* TODO(fclem) Have a special renderpass for this. */ - if (G.debug_value == 3) { - GPU_framebuffer_bind(debug_fb_); - inst_.shading_passes.debug_culling.render(depth_tx_); - - // inst_.render_passes.debug_culling->accumulate(debug_tx_, sub_view_); - inst_.render_passes.combined->accumulate(postfx_tx_, sub_view_); - } - else if (inst_.render_passes.combined) { + if (inst_.render_passes.combined) { inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_); } diff --git a/source/blender/draw/engines/eevee/eevee_view.hh b/source/blender/draw/engines/eevee/eevee_view.hh index 44512c69ef4..4f1aae0d825 100644 --- a/source/blender/draw/engines/eevee/eevee_view.hh +++ b/source/blender/draw/engines/eevee/eevee_view.hh @@ -78,7 +78,6 @@ class ShadingView { /** Owned resources. */ eevee::Framebuffer view_fb_; - eevee::Framebuffer debug_fb_; /** Draw resources. Not owned. */ GPUTexture *combined_tx_ = nullptr; GPUTexture *depth_tx_ = nullptr; diff --git a/source/blender/draw/engines/eevee/eevee_wrapper.hh b/source/blender/draw/engines/eevee/eevee_wrapper.hh index 96007f0dd6d..d5daa247d1c 100644 --- a/source/blender/draw/engines/eevee/eevee_wrapper.hh +++ b/source/blender/draw/engines/eevee/eevee_wrapper.hh @@ -139,6 +139,8 @@ class StorageArrayBuffer : NonMovable, NonCopyable { T *data_ = nullptr; /* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */ GPUVertBuf *ssbo_; + /* Currently allocated size. */ + int64_t size; #ifdef DEBUG const char *name_ = typeid(T).name(); @@ -149,22 +151,36 @@ class StorageArrayBuffer : NonMovable, NonCopyable { public: StorageArrayBuffer() { - BLI_assert(((sizeof(T) * len) % 16) == 0); + init(len); + } + ~StorageArrayBuffer() + { + GPU_vertbuf_discard(ssbo_); + } + + void init(int64_t new_size) + { + size = new_size; GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC; ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage); - GPU_vertbuf_data_alloc(ssbo_, (sizeof(T) / 4) * len); + GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T) * size, 4)); if (!device_only) { data_ = (T *)GPU_vertbuf_get_data(ssbo_); GPU_vertbuf_use(ssbo_); } } - ~StorageArrayBuffer() + + void resize(int64_t new_size) { - GPU_vertbuf_discard(ssbo_); + BLI_assert(new_size > 0); + if (new_size != size) { + GPU_vertbuf_discard(ssbo_); + this->init(new_size); + } } void push_update(void) @@ -179,6 +195,11 @@ class StorageArrayBuffer : NonMovable, NonCopyable { { return ssbo_; } + /* To be able to use it with DRW_shgroup_*_ref(). */ + GPUVertBuf **operator&() + { + return &ssbo_; + } /** * Get the value at the given index. This invokes undefined behavior when the index is out of @@ -188,7 +209,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable { { BLI_assert(!device_only); BLI_assert(index >= 0); - BLI_assert(index < len); + BLI_assert(index < size); return data_[index]; } @@ -196,7 +217,7 @@ class StorageArrayBuffer : NonMovable, NonCopyable { { BLI_assert(!device_only); BLI_assert(index >= 0); - BLI_assert(index < len); + BLI_assert(index < size); return data_[index]; } @@ -246,6 +267,68 @@ class StorageArrayBuffer : NonMovable, NonCopyable { } }; +/** Simpler version where data is not an array. */ +template< + /** Type of the values stored in this uniform buffer. */ + typename T, + /** True if created on device and no memory host memory is allocated. */ + bool device_only = false> +class StorageBuffer : public T, NonMovable, NonCopyable { + private: + /* Use vertex buffer for now. Until there is a complete GPUStorageBuf implementation. */ + GPUVertBuf *ssbo_; + +#ifdef DEBUG + const char *name_ = typeid(T).name(); +#else + constexpr static const char *name_ = "StorageBuffer"; +#endif + + public: + StorageBuffer() + { + GPUVertFormat format = {0}; + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + + GPUUsageType usage = device_only ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_DYNAMIC; + ssbo_ = GPU_vertbuf_create_with_format_ex(&format, usage); + GPU_vertbuf_data_alloc(ssbo_, divide_ceil_u(sizeof(T), 4)); + if (!device_only) { + GPU_vertbuf_use(ssbo_); + } + } + ~StorageBuffer() + { + GPU_vertbuf_discard(ssbo_); + } + + void push_update(void) + { + BLI_assert(!device_only); + /* TODO(fclem): Avoid a full copy. */ + T *data = (T *)GPU_vertbuf_get_data(ssbo_); + *data = *this; + + GPU_vertbuf_use(ssbo_); + } + + operator GPUVertBuf *() const + { + return ssbo_; + } + /* To be able to use it with DRW_shgroup_*_ref(). */ + GPUVertBuf **operator&() + { + return &ssbo_; + } + + StorageBuffer &operator=(const T &other) + { + *static_cast(this) = other; + return *this; + } +}; + /** Simpler version where data is not an array. */ template class StructBuffer : public T, NonMovable, NonCopyable { private: @@ -365,6 +448,42 @@ class Texture { tx_ = nullptr; } + /* Return true is a texture has been created. */ + bool ensure(const char *name, + int w, + int h, + int d, + int mips, + eGPUTextureFormat format, + bool layered = false) + { + + /* TODO(fclem) In the future, we need to check if mip_count did not change. + * For now it's ok as we always define all mip level.*/ + if (tx_) { + int3 size = this->size(); + BLI_assert(GPU_texture_array(tx_) == layered); + if (size != int3(w, h, d) || GPU_texture_format(tx_) != format) { + GPU_TEXTURE_FREE_SAFE(tx_); + } + } + if (tx_ == nullptr) { + if (layered) { + tx_ = GPU_texture_create_2d_array(name, w, h, d, mips, format, nullptr); + } + else { + tx_ = GPU_texture_create_3d(name, w, h, d, mips, format, GPU_DATA_FLOAT, nullptr); + } + if (mips > 1) { + /* TODO(fclem) Remove once we have immutable storage or when mips are + * generated on creation. */ + GPU_texture_generate_mipmap(tx_); + } + return true; + } + return false; + } + /* Return true is a texture has been created. */ bool ensure(const char *name, int w, int h, int mips, eGPUTextureFormat format) { @@ -474,6 +593,10 @@ class Texture { return &tx_; } + bool is_valid(void) const + { + return !!tx_; + } int width(void) const { return GPU_texture_width(tx_); @@ -482,6 +605,12 @@ class Texture { { return GPU_texture_height(tx_); } + int3 size(void) const + { + int3 size; + GPU_texture_get_mipmap_size(tx_, 0, size); + return size; + } }; class Framebuffer { diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl index f559788145d..33734324445 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_debug_frag.glsl @@ -2,25 +2,34 @@ /** * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots. * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling - * pass is not conservative enough). This shader will only work on the last light batch so remove - * some lights from the scene you are debugging to have below CULLING_ITEM_BATCH lights. + * pass is not conservative enough). */ #pragma BLENDER_REQUIRE(common_view_lib.glsl) #pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) #pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf { - CullingData culling; + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf +{ + CullingData light_culling; +}; + +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; }; -uniform usampler2D item_culling_tx; uniform sampler2D depth_tx; in vec4 uvcoordsvar; @@ -29,14 +38,14 @@ layout(location = 0) out vec4 out_debug_color; void main(void) { - float depth = textureLod(depth_tx, uvcoordsvar.xy, 0.0).r; + float depth = texelFetch(depth_tx, ivec2(gl_FragCoord.xy), 0).r; float vP_z = get_view_z_from_depth(depth); vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth); float lights_count = 0.0; uint lights_cull = 0u; - ITEM_FOREACH_BEGIN (culling, item_culling_tx, vP_z, l_idx) { + ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) { LightData light = lights[l_idx]; lights_cull |= 1u << l_idx; lights_count += 1.0; @@ -44,7 +53,7 @@ void main(void) ITEM_FOREACH_END uint lights_nocull = 0u; - ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) { + ITEM_FOREACH_BEGIN_NO_CULL (light_culling, l_idx) { LightData light = lights[l_idx]; if (distance(light._position, P) < light.influence_radius_max) { lights_nocull |= 1u << l_idx; @@ -57,6 +66,6 @@ void main(void) out_debug_color = vec4(0.0, 1.0, 0.0, 1.0); } else { - out_debug_color = vec4(heatmap_gradient(lights_count / 16.0), 1.0); + out_debug_color = vec4(heatmap_gradient(lights_count / 4.0), 1.0); } } \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl index a0ea075db22..640ffb4a6a1 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_iter_lib.glsl @@ -8,11 +8,14 @@ uint bit_field_mask(uint bit_width, uint bit_min) return ~mask << bit_min; } -uint zbin_mask(int word_index, int zbin_min, int zbin_max) +uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max) { - int local_min = clamp(zbin_min - word_index * 32, 0, 31); - int mask_width = clamp(zbin_max - zbin_min + 1, 0, 32); - return bit_field_mask(uint(mask_width), uint(local_min)); + uint word_start = word_index * 32u; + uint word_end = word_start + 31u; + uint local_min = max(zbin_min, word_start); + uint local_max = min(zbin_max, word_end); + uint mask_width = local_max - local_min + 1; + return bit_field_mask(mask_width, local_min); } /* Waiting to implement extensions support. We need: @@ -28,39 +31,39 @@ uint zbin_mask(int word_index, int zbin_min, int zbin_max) # define subgroupBroadcastFirst(a) a #endif -#define ITEM_FOREACH_BEGIN(_culling, _tiles_tx, _linearz, _item_index) \ +#define ITEM_FOREACH_BEGIN(_culling, _zbins, _words, _linearz, _item_index) \ { \ - int zbin_index = culling_z_to_zbin(_culling, _linearz); \ - zbin_index = min(max(zbin_index, 0), int(CULLING_ZBIN_COUNT - 1)); \ - uint zbin_data = _culling.zbins[zbin_index / 4][zbin_index % 4]; \ - int min_index = int(zbin_data & uint(CULLING_ITEM_BATCH - 1)); \ - int max_index = int((zbin_data >> 16u) & uint(CULLING_ITEM_BATCH - 1)); \ - /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ - min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ - max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ - int word_min = 0; \ - int word_max = max(0, CULLING_MAX_WORD - 1); \ - word_min = max(min_index / 32, word_min); \ - word_max = min(max_index / 32, word_max); \ - for (int word_index = word_min; word_index <= word_max; word_index++) { \ - /* TODO(fclem) Support bigger max_word with larger texture. */ \ - ivec2 texel = ivec2(gl_FragCoord.xy) / _culling.tile_size; \ - uint word = texelFetch(_tiles_tx, texel, 0)[word_index]; \ - uint mask = zbin_mask(word_index, min_index, max_index); \ - word &= mask; \ + uint batch_count = divide_ceil_u(_culling.visible_count, CULLING_BATCH_SIZE); \ + uvec2 tile_co = uvec2(gl_FragCoord.xy) / _culling.tile_size; \ + uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \ + _culling.tile_word_len; \ + for (uint batch = 0; batch < batch_count; batch++) { \ + int zbin_index = culling_z_to_zbin(_culling, _linearz); \ + zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \ + uint zbin_data = _zbins[zbin_index + batch * CULLING_ZBIN_COUNT]; \ + uint min_index = zbin_data & 0xFFFFu; \ + uint max_index = zbin_data >> 16u; \ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ - word = subgroupBroadcastFirst(subgroupOr(word)); \ - /* TODO(fclem) Replace by findLSB on supported hardware. */ \ - for (uint i = 0u; word != 0u; word = word >> 1u, i++) { \ - if ((word & 1u) != 0u) { \ - int _item_index = word_index * 32 + int(i); + min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ + max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ + uint word_min = min_index / 32u; \ + uint word_max = max_index / 32u; \ + for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \ + uint word = _words[tile_word_offset + word_idx]; \ + word &= zbin_mask(word_idx, min_index, max_index); \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + word = subgroupBroadcastFirst(subgroupOr(word)); \ + while (word != 0u) { \ + uint bit_index = uint(findLSB(word)); \ + word &= ~1u << bit_index; \ + uint _item_index = word_idx * 32u + bit_index; /* No culling. Iterate over all items. */ #define ITEM_FOREACH_BEGIN_NO_CULL(_culling, _item_index) \ { \ { \ { \ - for (uint _item_index = 0u; _item_index < _culling.items_count; _item_index++) { + for (uint _item_index = 0u; _item_index < _culling.visible_count; _item_index++) { #define ITEM_FOREACH_END \ } \ diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl index f128b89e864..27a39817140 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_lib.glsl @@ -7,11 +7,6 @@ /** \name Intersection Tests * \{ */ -struct Sphere { - vec3 position; - float radius; -}; - struct Cone { vec3 direction; float angle_cos; @@ -39,12 +34,12 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone) * by Eric Zhang * https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html */ - float sphere_distance = length(sphere.position); + float sphere_distance = length(sphere.center); float sphere_sin = saturate(sphere.radius / sphere_distance); float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin); float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos); - float cone_sphere_center_cos = dot(sphere.position / sphere_distance, cone.direction); + float cone_sphere_center_cos = dot(sphere.center / sphere_distance, cone.direction); /* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */ float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ? -1.0 : @@ -58,22 +53,22 @@ bool culling_sphere_cone_isect(Sphere sphere, Cone cone) bool culling_sphere_cylinder_isect(Sphere sphere, Cylinder cylinder) { - float distance_squared = len_squared(sphere.position.xy - cylinder.center.xy); + float distance_squared = len_squared(sphere.center.xy - cylinder.center.xy); return (distance_squared < sqr(cylinder.radius + sphere.radius)); } bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum) { - if (dot(vec4(sphere.position, 1.0), frustum.planes[0]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[0]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[1]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[1]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[2]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[2]) > sphere.radius) { return false; } - if (dot(vec4(sphere.position, 1.0), frustum.planes[3]) > sphere.radius) { + if (dot(vec4(sphere.center, 1.0), frustum.planes[3]) > sphere.radius) { return false; } return true; @@ -82,7 +77,7 @@ bool culling_sphere_frustum_isect(Sphere sphere, Frustum frustum) bool culling_sphere_tile_isect(Sphere sphere, CullingTile tile) { /* Culling in view space for precision and simplicity. */ - sphere.position = transform_point(ViewMatrix, sphere.position); + sphere.center = transform_point(ViewMatrix, sphere.center); bool isect; /* Test tile intersection using bounding cone or bounding cylinder. * This has less false positive cases when the sphere is large. */ @@ -148,14 +143,15 @@ vec2 tile_to_ndc(CullingData culling, vec2 tile_co, vec2 offset) return tile_co * culling.tile_to_uv_fac * 2.0 - 1.0; } -CullingTile culling_tile_get(CullingData culling) +CullingTile culling_tile_get(CullingData culling, uvec2 tile_co) { + vec2 ftile = vec2(tile_co); /* Culling frustum corners for this tile. */ vec3 corners[8]; - corners[0].xy = corners[4].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, 0.5)); - corners[1].xy = corners[5].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(0.5, -0.5)); - corners[2].xy = corners[6].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, -0.5)); - corners[3].xy = corners[7].xy = tile_to_ndc(culling, gl_FragCoord.xy, vec2(-0.5, 0.5)); + corners[0].xy = corners[4].xy = tile_to_ndc(culling, ftile, vec2(1, 1)); + corners[1].xy = corners[5].xy = tile_to_ndc(culling, ftile, vec2(1, 0)); + corners[2].xy = corners[6].xy = tile_to_ndc(culling, ftile, vec2(0, 0)); + corners[3].xy = corners[7].xy = tile_to_ndc(culling, ftile, vec2(0, 1)); /* The corners depth only matter for precision. Use a mix of not so close to clip plane to * avoid small float imprecision if near clip is low. */ corners[0].z = corners[1].z = corners[2].z = corners[3].z = -0.5; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl deleted file mode 100644 index c81a94b35f3..00000000000 --- a/source/blender/draw/engines/eevee/shaders/eevee_culling_light_frag.glsl +++ /dev/null @@ -1,51 +0,0 @@ - -/** - * 2D Culling pass for lights. - * We iterate over all items and check if they intersect with the tile frustum. - */ - -#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl) -#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) - -layout(std140) uniform lights_block -{ - LightData lights[CULLING_ITEM_BATCH]; -}; - -layout(std140) uniform lights_culling_block -{ - CullingData culling; -}; - -in vec4 uvcoordsvar; - -layout(location = 0) out uvec4 out_items_bits; - -void main(void) -{ - CullingTile tile = culling_tile_get(culling); - - out_items_bits = uvec4(0); - ITEM_FOREACH_BEGIN_NO_CULL (culling, l_idx) { - LightData light = lights[l_idx]; - - bool intersect_tile = true; - switch (light.type) { - case LIGHT_SPOT: - /* TODO cone culling. */ - case LIGHT_RECT: - case LIGHT_ELLIPSE: - case LIGHT_POINT: - Sphere sphere = Sphere(light._position, light.influence_radius_max); - intersect_tile = culling_sphere_tile_isect(sphere, tile); - break; - default: - break; - } - - if (intersect_tile) { - out_items_bits[l_idx / 32u] |= 1u << (l_idx % 32u); - } - } - ITEM_FOREACH_END -} \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl new file mode 100644 index 00000000000..138e54b8bae --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_select_comp.glsl @@ -0,0 +1,57 @@ + +/** + * Select the visible items inside the active view and put them inside the sorting buffer. + */ + +#pragma BLENDER_REQUIRE(common_debug_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersection_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) + +layout(local_size_x = CULLING_ITEM_BATCH) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) restrict buffer key_buf +{ + uint keys[]; +}; + +void main() +{ + uint l_idx = gl_GlobalInvocationID.x; + if (l_idx >= culling.items_count) { + return; + } + + LightData light = lights[l_idx]; + + Sphere sphere; + switch (light.type) { + case LIGHT_SUN: + sphere = Sphere(cameraPos, ViewFar * 2.0); + break; + case LIGHT_SPOT: + /* TODO cone culling. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + sphere = Sphere(light._position, light.influence_radius_max); + break; + } + + if (intersect_view(sphere)) { + uint index = atomicAdd(culling.visible_count, 1); + keys[index] = l_idx; + } +} diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl new file mode 100644 index 00000000000..dfd2c80a45a --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_sort_comp.glsl @@ -0,0 +1,138 @@ + +/** + * Sort the lights by their Z distance to the camera. + * Outputs ordered light buffer and associated zbins. + * We split the work in CULLING_BATCH_SIZE and iterate to cover all zbins. + * One thread process one Light entity. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) + +layout(local_size_x = CULLING_BATCH_SIZE) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) readonly restrict buffer key_buf +{ + uint keys[]; +}; + +layout(std430, binding = 3) writeonly restrict buffer out_zbins_buf +{ + CullingZBin out_zbins[]; +}; + +layout(std430, binding = 4) writeonly restrict buffer out_items_buf +{ + LightData out_lights[]; +}; + +void main() +{ + uint src_index = gl_GlobalInvocationID.x; + bool valid_thread = true; + + if (src_index >= culling.visible_count) { + /* Do not return because we use barriers later on (which need uniform control flow). + * Just process the same last item but avoid insertion. */ + src_index = culling.visible_count - 1; + valid_thread = false; + } + + uint key = keys[src_index]; + LightData light = lights[key]; + + if (!culling.enable_specular) { + light.specular_power = 0.0; + } + + int index = 0; + int contenders = 0; + + /* TODO(fclem): Sun lights are polutting the zbins with no reasons. Better bypass culling. */ + vec3 lP = (light.type == LIGHT_SUN) ? cameraPos : light._position; + float radius = (light.type == LIGHT_SUN) ? ViewFar * 2.0 : light.influence_radius_max; + float z_dist = dot(cameraForward, lP) - dot(cameraForward, cameraPos); + + int z_min = clamp(culling_z_to_zbin(culling, z_dist + radius), 0, CULLING_ZBIN_COUNT - 1); + int z_max = clamp(culling_z_to_zbin(culling, z_dist - radius), 0, CULLING_ZBIN_COUNT - 1); + + if (!valid_thread) { + /* Do not register invalid threads. */ + z_max = z_min - 1; + } + + /* Fits the limit of 32KB. */ + shared int zbin_max[CULLING_ZBIN_COUNT]; + shared int zbin_min[CULLING_ZBIN_COUNT]; + /* Compilers do not release shared memory from early declaration. + * So we are forced to reuse the same variables in another form. */ +#define z_dists zbin_max +#define contender_table zbin_min + + /** + * Find how many values are before the local value. + * This finds the first possible destination index. + */ + z_dists[gl_LocalInvocationID.x] = floatBitsToInt(z_dist); + barrier(); + + const uint i_start = gl_WorkGroupID.x * CULLING_BATCH_SIZE; + uint i_max = min(CULLING_BATCH_SIZE, culling.visible_count - i_start); + for (uint i = 0; i < i_max; i++) { + float ref = intBitsToFloat(z_dists[i]); + if (ref > z_dist) { + index++; + } + else if (ref == z_dist) { + contenders++; + } + } + + atomicExchange(contender_table[index], contenders); + barrier(); + + if (valid_thread) { + /** + * For each clashing index (where two lights have exactly the same z distances) + * we use an atomic counter to know how much to offset from the disputed index. + */ + index += atomicAdd(contender_table[index], -1) - 1; + index += int(i_start); + out_lights[index] = light; + } + + const uint iter = uint(CULLING_ZBIN_COUNT / CULLING_BATCH_SIZE); + const uint zbin_local = gl_LocalInvocationID.x * iter; + const uint zbin_global = gl_WorkGroupID.x * CULLING_ZBIN_COUNT + zbin_local; + + for (uint i = 0u, l = zbin_local; i < iter; i++, l++) { + zbin_max[l] = 0x0000; + zbin_min[l] = 0xFFFF; + } + barrier(); + + /* Register to Z bins. */ + for (int z = z_min; z <= z_max; z++) { + atomicMin(zbin_min[z], index); + atomicMax(zbin_max[z], index); + } + barrier(); + + /* Write result to zbins buffer. */ + for (uint i = 0u, g = zbin_global, l = zbin_local; i < iter; i++, g++, l++) { + /* Pack min & max into 1 uint. */ + out_zbins[g] = (uint(zbin_max[l]) << 16u) | uint(zbin_min[l]); + } +} diff --git a/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl new file mode 100644 index 00000000000..913e094980e --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/eevee_culling_tile_comp.glsl @@ -0,0 +1,73 @@ + +/** + * 2D Culling pass for lights. + * We iterate over all items and check if they intersect with the tile frustum. + * Dispatch one thread per word. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_shader_shared.hh) +#pragma BLENDER_REQUIRE(eevee_culling_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_culling_iter_lib.glsl) + +layout(local_size_x = 1024) in; + +layout(std430, binding = 0) readonly restrict buffer lights_buf +{ + LightData lights[]; +}; + +layout(std430, binding = 1) readonly restrict buffer culling_buf +{ + CullingData culling; +}; + +layout(std430, binding = 2) writeonly restrict buffer culling_tile_buf +{ + CullingWord culling_words[]; +}; + +void main(void) +{ + uint word_idx = gl_GlobalInvocationID.x % culling.tile_word_len; + uint tile_idx = gl_GlobalInvocationID.x / culling.tile_word_len; + uvec2 tile_co = uvec2(tile_idx % culling.tile_x_len, tile_idx / culling.tile_x_len); + + if (tile_co.y >= culling.tile_y_len) { + return; + } + + /* TODO(fclem): We could stop the tile at the HiZ depth. */ + CullingTile tile = culling_tile_get(culling, tile_co); + + uint l_idx = word_idx * 32u; + uint l_end = min(l_idx + 32u, culling.visible_count); + uint word = 0u; + + for (; l_idx < l_end; l_idx++) { + LightData light = lights[l_idx]; + + bool intersect_tile; + switch (light.type) { + case LIGHT_SUN: + intersect_tile = true; + break; + case LIGHT_SPOT: + /* TODO cone culling. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + Sphere sphere = Sphere(light._position, light.influence_radius_max); + intersect_tile = culling_sphere_tile_isect(sphere, tile); + break; + } + + if (intersect_tile) { + word |= 1u << (l_idx & 0x1Fu); + } + } + + culling_words[gl_GlobalInvocationID.x] = word; +} \ No newline at end of file diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl index 942f75961e9..14e38d6f1d6 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_direct_frag.glsl @@ -17,19 +17,29 @@ layout(std140) uniform sampling_block SamplingData sampling; }; -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; layout(std140) uniform grids_block @@ -55,7 +65,6 @@ uniform sampler2D transmit_data_tx; uniform sampler2D reflect_color_tx; uniform sampler2D reflect_normal_tx; uniform sampler1D sss_transmittance_tx; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2D shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl index 677881abd71..068db3e78fd 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_deferred_volume_frag.glsl @@ -15,24 +15,33 @@ #pragma BLENDER_REQUIRE(eevee_volume_eval_lib.glsl) #pragma BLENDER_REQUIRE(eevee_shader_shared.hh) -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; uniform sampler2D transparency_data_tx; uniform usampler2D volume_data_tx; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2DShadow shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl index 448e5b54886..d3d5f859174 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_light_eval_lib.glsl @@ -4,9 +4,10 @@ * A prototype needs to be declared before main in order to use it. * * The resources expected to be defined are: - * - light_culling - * - lights_culling_tx * - lights + * - lights_zbins + * - light_culling + * - lights_culling_words * - shadows * - shadow_atlas_tx * - shadow_tilemaps_tx @@ -33,7 +34,7 @@ void light_eval(ClosureDiffuse diffuse, vec4 ltc_mat = utility_tx_sample(uv, UTIL_LTC_MAT_LAYER); float ltc_mag = utility_tx_sample(uv, UTIL_LTC_MAG_LAYER).x; - ITEM_FOREACH_BEGIN (light_culling, lights_culling_tx, vP_z, l_idx) { + ITEM_FOREACH_BEGIN (light_culling, lights_zbins, lights_culling_words, vP_z, l_idx) { LightData light = lights[l_idx]; vec3 L; float dist; diff --git a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl index 9723d24544c..152bfbeacec 100644 --- a/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/eevee_surface_forward_frag.glsl @@ -25,19 +25,29 @@ layout(std140) uniform sampling_block SamplingData sampling; }; -layout(std140) uniform lights_block +layout(std430, binding = 0) readonly restrict buffer lights_buf { - LightData lights[CULLING_ITEM_BATCH]; + LightData lights[]; }; -layout(std140) uniform lights_culling_block +layout(std430, binding = 1) readonly restrict buffer lights_zbins_buf +{ + CullingZBin lights_zbins[]; +}; + +layout(std430, binding = 2) readonly restrict buffer lights_culling_buf { CullingData light_culling; }; -layout(std140) uniform shadows_block +layout(std430, binding = 3) readonly restrict buffer lights_tile_buf +{ + CullingWord lights_culling_words[]; +}; + +layout(std430, binding = 4) readonly restrict buffer shadows_buf { - ShadowData shadows[CULLING_ITEM_BATCH]; + ShadowData shadows[]; }; layout(std140) uniform grids_block @@ -75,7 +85,6 @@ layout(std140) uniform hiz_block HiZData hiz; }; -uniform usampler2D lights_culling_tx; uniform sampler2DArray utility_tx; uniform sampler2D shadow_atlas_tx; uniform usampler2D shadow_tilemaps_tx; diff --git a/source/blender/draw/intern/shaders/common_intersection_lib.glsl b/source/blender/draw/intern/shaders/common_intersection_lib.glsl index a2151ecf7a7..c8a97808247 100644 --- a/source/blender/draw/intern/shaders/common_intersection_lib.glsl +++ b/source/blender/draw/intern/shaders/common_intersection_lib.glsl @@ -66,7 +66,6 @@ bool intersect_view(Pyramid pyramid) */ bool intersects = true; -#if TEST_ENABLED /* Do Pyramid vertices vs Frustum planes. */ for (int p = 0; p < 6 && intersects; ++p) { bool is_any_vertex_on_positive_side = false; @@ -79,9 +78,7 @@ bool intersect_view(Pyramid pyramid) intersects = false; } } -#endif -#if TEST_ENABLED && FALSE_POSITIVE_REJECTION if (intersects) { vec4 pyramid_planes[5]; planes_setup(pyramid, pyramid_planes); @@ -98,7 +95,6 @@ bool intersect_view(Pyramid pyramid) } } } -#endif #if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT) drw_debug(pyramid, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1)); @@ -110,7 +106,6 @@ bool intersect_view(Box box) { bool intersects = true; -#if TEST_ENABLED /* Do Box vertices vs Frustum planes. */ for (int p = 0; p < 6 && intersects; ++p) { bool is_any_vertex_on_positive_side = false; @@ -123,9 +118,7 @@ bool intersect_view(Box box) intersects = false; } } -#endif -#if TEST_ENABLED && FALSE_POSITIVE_REJECTION if (intersects) { vec4 box_planes[6]; planes_setup(box, box_planes); @@ -142,11 +135,31 @@ bool intersect_view(Box box) } } } + +#if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT) + if (intersects) { + drw_debug(box, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1)); + } #endif + return intersects; +} + +bool intersect_view(Sphere sph) +{ + bool intersects = true; + + for (int p = 0; p < 6 && intersects; ++p) { + float dist_to_plane = dot(frustum_planes[p], vec4(sph.center, 1.0)); + if (dist_to_plane < -sph.radius) { + intersects = false; + } + } + + /* TODO reject false positive. */ #if defined(DEBUG_DRAW) && defined(DEBUG_DRAW_ISECT) if (intersects) { - drw_debug(box, vec4(0, 1, 0, 1)); + drw_debug(sph, intersects ? vec4(0, 1, 0, 1) : vec4(1, 0, 0, 1)); } #endif return intersects; diff --git a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl index 7a58a82eb2b..6c6ee599168 100644 --- a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl +++ b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl @@ -139,6 +139,11 @@ vec3 world_to_tangent(vec3 vector, vec3 N, vec3 T, vec3 B) /** \name Shapes * \{ */ +struct Sphere { + vec3 center; + float radius; +}; + struct Box { vec3 corners[8]; }; @@ -169,6 +174,12 @@ void drw_debug(Pyramid shape, vec4 color) drw_debug_quad(shape.corners[1], shape.corners[2], shape.corners[3], shape.corners[4], color); } +void drw_debug(Sphere shape, vec4 color) +{ + /* TODO(fclem): Counld be better. */ + drw_debug_point(shape.center, shape.radius, color); +} + #endif /** \} */ diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl index f483d55ef97..44596dd5b8d 100644 --- a/source/blender/draw/intern/shaders/common_math_lib.glsl +++ b/source/blender/draw/intern/shaders/common_math_lib.glsl @@ -122,6 +122,11 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu #define in_range_exclusive(val, min_v, max_v) \ (all(greaterThan(val, min_v)) && all(lessThan(val, max_v))) +uint divide_ceil_u(uint visible_count, uint divisor) +{ + return (visible_count + (divisor - 1)) / divisor; +} + float distance_squared(vec2 a, vec2 b) { a -= b; -- cgit v1.2.3