Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2022-08-15 19:08:34 +0300
committerClément Foucault <foucault.clem@gmail.com>2022-08-15 19:36:19 +0300
commitb43b62191cde60fee65f8ff1ad108b271f42295d (patch)
treec1262b5422c33f8fc21adbc4613c9fa22bc95528
parent3195a381200eb98e6add8b0504f701318186946d (diff)
EEVEE-Next: HiZ Buffer: New implementation
This new implementation does all downsampling in a single compute shader dispatch, removing a lot of complexity from the previous recursive downsampling. This is heavilly inspired by the Single-Pass-Downsampler from GPUOpen: https://github.com/GPUOpen-Effects/FidelityFX-SPD However I do not implement all the optimization bits as they require vulkan (GL_KHR_shader_subgroup) and is not as versatile (it is only for HiZ). Timers inside renderdoc report ~0.4ms of saving on a 2048*1024 render for the whole downsampling. Note that the previous implementation only processed 6 mips where the new one processes 8 mips. ``` EEVEE ~1.0ms EEVEE-Next ~0.6ms ``` Padding has been bumped to be of 128px for processing 8 mips. A new debug option has been added (debug value 2) to validate the HiZ.
-rw-r--r--source/blender/draw/CMakeLists.txt3
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_defines.hh10
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc102
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh81
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.cc1
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.hh3
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_light.cc6
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_pipeline.cc10
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_pipeline.hh1
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.cc4
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.hh3
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader_shared.hh41
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_view.cc6
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl24
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl121
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl12
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl2
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl2
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh31
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh8
-rw-r--r--source/blender/draw/intern/DRW_gpu_wrapper.hh5
-rw-r--r--source/blender/draw/intern/shaders/common_math_lib.glsl9
-rw-r--r--source/blender/gpu/CMakeLists.txt1
23 files changed, 449 insertions, 37 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 284787cb475..0a62263ce3b 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -141,6 +141,7 @@ set(SRC
engines/eevee_next/eevee_instance.cc
engines/eevee_next/eevee_light.cc
engines/eevee_next/eevee_material.cc
+ engines/eevee_next/eevee_hizbuffer.cc
engines/eevee_next/eevee_motion_blur.cc
engines/eevee_next/eevee_pipeline.cc
engines/eevee_next/eevee_renderbuffers.cc
@@ -391,6 +392,8 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_geom_gpencil_vert.glsl
engines/eevee_next/shaders/eevee_geom_mesh_vert.glsl
engines/eevee_next/shaders/eevee_geom_world_vert.glsl
+ engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
+ engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index 96c5095317d..67643471639 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -11,6 +11,11 @@
#pragma once
+/* Hierarchical Z down-sampling. */
+#define HIZ_MIP_COUNT 8
+/* NOTE: The shader is written to update 5 mipmaps using LDS. */
+#define HIZ_GROUP_SIZE 32
+
/* Avoid too much overhead caused by resizing the light buffers too many time. */
#define LIGHT_CHUNK 256
@@ -35,10 +40,7 @@
#define SHADOW_MAX_PAGE 4096
#define SHADOW_PAGE_PER_ROW 64
-#define HIZ_MIP_COUNT 6u
-/* Group size is 2x smaller because we simply copy the level 0. */
-#define HIZ_GROUP_SIZE 1u << (HIZ_MIP_COUNT - 2u)
-
+/* Ray-tracing. */
#define RAYTRACE_GROUP_SIZE 16
#define RAYTRACE_MAX_TILES (16384 / RAYTRACE_GROUP_SIZE) * (16384 / RAYTRACE_GROUP_SIZE)
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
new file mode 100644
index 00000000000..e2022d74093
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+#include "BKE_global.h"
+
+#include "eevee_instance.hh"
+
+#include "eevee_hizbuffer.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ *
+ * \{ */
+
+void HiZBuffer::sync()
+{
+ RenderBuffers &render_buffers = inst_.render_buffers;
+
+ int2 render_extent = inst_.film.render_extent_get();
+ /* Padding to avoid complexity during down-sampling and screen tracing. */
+ int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1)));
+ int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE));
+
+ hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT);
+ hiz_tx_.ensure_mip_views();
+ GPU_texture_mipmap_mode(hiz_tx_, true, false);
+
+ data_.uv_scale = float2(render_extent) / float2(hiz_extent);
+ data_.push_update();
+
+ {
+ hiz_update_ps_ = DRW_pass_create("HizUpdate", DRW_STATE_NO_DRAW);
+ GPUShader *sh = inst_.shaders.static_shader_get(HIZ_UPDATE);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, hiz_update_ps_);
+ DRW_shgroup_storage_block(grp, "finished_tile_counter", atomic_tile_counter_);
+ DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, with_filter);
+ DRW_shgroup_uniform_image(grp, "out_mip_0", hiz_tx_.mip_view(0));
+ DRW_shgroup_uniform_image(grp, "out_mip_1", hiz_tx_.mip_view(1));
+ DRW_shgroup_uniform_image(grp, "out_mip_2", hiz_tx_.mip_view(2));
+ DRW_shgroup_uniform_image(grp, "out_mip_3", hiz_tx_.mip_view(3));
+ DRW_shgroup_uniform_image(grp, "out_mip_4", hiz_tx_.mip_view(4));
+ DRW_shgroup_uniform_image(grp, "out_mip_5", hiz_tx_.mip_view(5));
+ DRW_shgroup_uniform_image(grp, "out_mip_6", hiz_tx_.mip_view(6));
+ DRW_shgroup_uniform_image(grp, "out_mip_7", hiz_tx_.mip_view(7));
+ /* TODO(@fclem): There might be occasions where we might not want to
+ * copy mip 0 for performance reasons if there is no need for it. */
+ DRW_shgroup_uniform_bool_copy(grp, "update_mip_0", true);
+ DRW_shgroup_call_compute(grp, UNPACK2(dispatch_size), 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
+ }
+
+ if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
+ DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
+ debug_draw_ps_ = DRW_pass_create("HizUpdate.Debug", state);
+ GPUShader *sh = inst_.shaders.static_shader_get(HIZ_DEBUG);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
+ this->bind_resources(grp);
+ DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+ }
+ else {
+ debug_draw_ps_ = nullptr;
+ }
+}
+
+void HiZBuffer::update()
+{
+ if (!is_dirty_) {
+ return;
+ }
+
+ /* Bind another framebuffer in order to avoid triggering the feedback loop check.
+ * This is safe because we only use compute shaders in this section of the code.
+ * Ideally the check should be smarter. */
+ GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+ if (G.debug & G_DEBUG_GPU) {
+ GPU_framebuffer_restore();
+ }
+
+ DRW_draw_pass(hiz_update_ps_);
+
+ if (G.debug & G_DEBUG_GPU) {
+ GPU_framebuffer_bind(fb);
+ }
+}
+
+void HiZBuffer::debug_draw(GPUFrameBuffer *view_fb)
+{
+ if (debug_draw_ps_ == nullptr) {
+ return;
+ }
+ inst_.info = "Debug Mode: HiZ Validation";
+ inst_.hiz_buffer.update();
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(debug_draw_ps_);
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
new file mode 100644
index 00000000000..039f7e4f16d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The Hierarchical-Z buffer is texture containing a copy of the depth buffer with mipmaps.
+ * Each mip contains the maximum depth of each 4 pixels on the upper level.
+ * The size of the texture is padded to avoid messing with the mipmap pixels alignments.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ * \{ */
+
+class HiZBuffer {
+ private:
+ Instance &inst_;
+
+ /** The texture containing the hiz mip chain. */
+ Texture hiz_tx_ = {"hiz_tx_"};
+ /**
+ * Atomic counter counting the number of tile that have finished down-sampling.
+ * The last one will process the last few mip level.
+ */
+ draw::StorageBuffer<uint4, true> atomic_tile_counter_ = {"atomic_tile_counter"};
+ /** Single pass recursive downsample. */
+ DRWPass *hiz_update_ps_ = nullptr;
+ /** Debug pass. */
+ DRWPass *debug_draw_ps_ = nullptr;
+ /** Dirty flag to check if the update is necessary. */
+ bool is_dirty_ = true;
+
+ HiZDataBuf data_;
+
+ public:
+ HiZBuffer(Instance &inst) : inst_(inst)
+ {
+ atomic_tile_counter_.clear_to_zero();
+ };
+
+ void sync();
+
+ /**
+ * Tag the buffer for update if needed.
+ */
+ void set_dirty()
+ {
+ is_dirty_ = true;
+ }
+
+ /**
+ * Update the content of the HiZ buffer with the depth render target.
+ * Noop if the buffer has not been tagged as dirty.
+ * Should be called before each passes that needs to read the hiz buffer.
+ */
+ void update();
+
+ void debug_draw(GPUFrameBuffer *view_fb);
+
+ void bind_resources(DRWShadingGroup *grp)
+ {
+ DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_);
+ DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_);
+ }
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc
index 57786adb657..6665b3a7c9b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@@ -106,6 +106,7 @@ void Instance::begin_sync()
depth_of_field.sync();
motion_blur.sync();
+ hiz_buffer.sync();
pipelines.sync();
main_view.sync();
world.sync();
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh
index d52e4a8e43b..cc3d1c32fde 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh
@@ -18,6 +18,7 @@
#include "eevee_camera.hh"
#include "eevee_depth_of_field.hh"
#include "eevee_film.hh"
+#include "eevee_hizbuffer.hh"
#include "eevee_light.hh"
#include "eevee_material.hh"
#include "eevee_motion_blur.hh"
@@ -48,6 +49,7 @@ class Instance {
VelocityModule velocity;
MotionBlurModule motion_blur;
DepthOfField depth_of_field;
+ HiZBuffer hiz_buffer;
Sampling sampling;
Camera camera;
Film film;
@@ -88,6 +90,7 @@ class Instance {
velocity(*this),
motion_blur(*this),
depth_of_field(*this),
+ hiz_buffer(*this),
sampling(*this),
camera(*this),
film(*this),
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc
index dbbf481f3f4..5392816124b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_light.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@@ -452,9 +452,11 @@ void LightModule::debug_pass_sync()
return;
}
- debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", DRW_STATE_WRITE_COLOR);
+ DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
+ debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", state);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG);
DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
+ inst_.hiz_buffer.bind_resources(grp);
DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
@@ -490,6 +492,8 @@ void LightModule::debug_draw(GPUFrameBuffer *view_fb)
if (debug_draw_ps_ == nullptr) {
return;
}
+ inst_.info = "Debug Mode: Light Culling Validation";
+ inst_.hiz_buffer.update();
GPU_framebuffer_bind(view_fb);
DRW_draw_pass(debug_draw_ps_);
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
index 9185ce7904a..9260d71b887 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
@@ -245,22 +245,22 @@ DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_ma
void ForwardPipeline::render(const DRWView *view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
- GPUTexture *depth_tx,
GPUTexture *UNUSED(combined_tx))
{
- UNUSED_VARS(view, depth_tx, prepass_fb, combined_fb);
- // HiZBuffer &hiz = inst_.hiz_front;
+ UNUSED_VARS(view);
DRW_stats_group_start("ForwardOpaque");
GPU_framebuffer_bind(prepass_fb);
DRW_draw_pass(prepass_ps_);
- // hiz.set_dirty();
+ if (!DRW_pass_is_empty(prepass_ps_)) {
+ inst_.hiz_buffer.set_dirty();
+ }
// if (inst_.raytracing.enabled()) {
// rt_buffer.radiance_copy(combined_tx);
- // hiz.update(depth_tx);
+ // inst_.hiz_buffer.update();
// }
// inst_.shadows.set_view(view, depth_tx);
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
index 3bdc718767b..ed6986b9b61 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
@@ -91,7 +91,6 @@ class ForwardPipeline {
void render(const DRWView *view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
- GPUTexture *depth_tx,
GPUTexture *combined_tx);
};
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index a535d3407ac..0e49b195ea2 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -82,6 +82,10 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_film_frag";
case FILM_COMP:
return "eevee_film_comp";
+ case HIZ_DEBUG:
+ return "eevee_hiz_debug";
+ case HIZ_UPDATE:
+ return "eevee_hiz_update";
case MOTION_BLUR_GATHER:
return "eevee_motion_blur_gather";
case MOTION_BLUR_TILE_DILATE:
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh
index 5b43a1abf43..9ef42c84373 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@@ -47,6 +47,9 @@ enum eShaderType {
DOF_TILES_DILATE_MINMAX,
DOF_TILES_FLATTEN,
+ HIZ_UPDATE,
+ HIZ_DEBUG,
+
LIGHT_CULLING_DEBUG,
LIGHT_CULLING_SELECT,
LIGHT_CULLING_SORT,
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index 885317fc673..bb25f6184d4 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -42,36 +42,40 @@ enum eDebugMode : uint32_t {
*/
DEBUG_LIGHT_CULLING = 1u,
/**
+ * Show incorrectly downsample tiles in red.
+ */
+ DEBUG_HIZ_VALIDATION = 2u,
+ /**
* Tilemaps to screen. Is also present in other modes.
* - Black pixels, no pages allocated.
* - Green pixels, pages cached.
* - Red pixels, pages allocated.
*/
- DEBUG_SHADOW_TILEMAPS = 2u,
+ DEBUG_SHADOW_TILEMAPS = 10u,
/**
* Random color per pages. Validates page density allocation and sampling.
*/
- DEBUG_SHADOW_PAGES = 3u,
+ DEBUG_SHADOW_PAGES = 11u,
/**
* Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
* Black means not covered by any tilemaps LOD of the shadow.
*/
- DEBUG_SHADOW_LOD = 4u,
+ DEBUG_SHADOW_LOD = 12u,
/**
* Outputs white pixels for pages allocated and black pixels for unused pages.
* This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work.
*/
- DEBUG_SHADOW_PAGE_ALLOCATION = 5u,
+ DEBUG_SHADOW_PAGE_ALLOCATION = 13u,
/**
* Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
* Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
*/
- DEBUG_SHADOW_TILE_ALLOCATION = 6u,
+ DEBUG_SHADOW_TILE_ALLOCATION = 14u,
/**
* Visualize linear depth stored in the atlas regions of the active light.
* This way, one can check if the rendering, the copying and the shadow sampling functions works.
*/
- DEBUG_SHADOW_SHADOW_DEPTH = 7u
+ DEBUG_SHADOW_SHADOW_DEPTH = 15u
};
/** \} */
@@ -613,6 +617,20 @@ BLI_STATIC_ASSERT_ALIGN(LightData, 16)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z Buffer
+ * \{ */
+
+struct HiZData {
+ /** Scale factor to remove HiZBuffer padding. */
+ float2 uv_scale;
+
+ float2 _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(HiZData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Ray-Tracing
* \{ */
@@ -699,16 +717,17 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
using CameraDataBuf = draw::UniformBuffer<CameraData>;
-using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
+using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
+using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
+using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
+using FilmDataBuf = draw::UniformBuffer<FilmData>;
+using HiZDataBuf = draw::UniformBuffer<HiZData>;
using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>;
using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>;
-using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
-using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
-using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
-using FilmDataBuf = draw::UniformBuffer<FilmData>;
+using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
using SamplingDataBuf = draw::StorageBuffer<SamplingData>;
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index b7154465a70..44067aff9ca 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -102,6 +102,8 @@ void ShadingView::render()
update_view();
+ inst_.hiz_buffer.set_dirty();
+
DRW_stats_group_start(name_);
DRW_view_set_active(render_view_);
@@ -128,10 +130,10 @@ void ShadingView::render()
// inst_.lookdev.render_overlay(view_fb_);
- inst_.pipelines.forward.render(
- render_view_, prepass_fb_, combined_fb_, rbufs.depth_tx, rbufs.combined_tx);
+ inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, rbufs.combined_tx);
inst_.lights.debug_draw(combined_fb_);
+ inst_.hiz_buffer.debug_draw(combined_fb_);
GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
new file mode 100644
index 00000000000..e93d0f472fa
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
@@ -0,0 +1,24 @@
+
+/**
+ * Debug hiz down sampling pass.
+ * Output red if above any max pixels, blue otherwise.
+ */
+
+void main()
+{
+ ivec2 texel = ivec2(gl_FragCoord.xy);
+
+ float depth0 = texelFetch(hiz_tx, texel, 0).r;
+
+ vec4 color = vec4(0.1, 0.1, 1.0, 1.0);
+ for (int i = 1; i < HIZ_MIP_COUNT; i++) {
+ ivec2 lvl_texel = texel / ivec2(uvec2(1) << uint(i));
+ lvl_texel = min(lvl_texel, textureSize(hiz_tx, i) - 1);
+ if (texelFetch(hiz_tx, lvl_texel, i).r < depth0) {
+ color = vec4(1.0, 0.1, 0.1, 1.0);
+ break;
+ }
+ }
+ out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+ out_debug_color_mul = color;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
new file mode 100644
index 00000000000..597bc73e2ad
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
@@ -0,0 +1,121 @@
+
+/**
+ * Shader that down-sample depth buffer, creating a Hierarchical-Z buffer.
+ * Saves max value of each 2x2 texel in the mipmap above the one we are
+ * rendering to. Adapted from
+ * http://rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/
+ *
+ * Major simplification has been made since we pad the buffer to always be
+ * bigger than input to avoid mipmapping misalignement.
+ *
+ * Start by copying the base level by quad loading the depth.
+ * Then each thread compute it's local depth for level 1.
+ * After that we use shared variables to do inter thread comunication and
+ * downsample to max level.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float local_depths[gl_WorkGroupSize.y][gl_WorkGroupSize.x];
+
+/* Load values from the previous lod level. */
+vec4 load_local_depths(ivec2 pixel)
+{
+ pixel *= 2;
+ return vec4(local_depths[pixel.y + 1][pixel.x + 0],
+ local_depths[pixel.y + 1][pixel.x + 1],
+ local_depths[pixel.y + 0][pixel.x + 1],
+ local_depths[pixel.y + 0][pixel.x + 0]);
+}
+
+void store_local_depth(ivec2 pixel, float depth)
+{
+ local_depths[pixel.y][pixel.x] = depth;
+}
+
+void main()
+{
+ ivec2 local_px = ivec2(gl_LocalInvocationID.xy);
+ /* Bottom left corner of the kernel. */
+ ivec2 kernel_origin = ivec2(gl_WorkGroupSize.xy * gl_WorkGroupID.xy);
+
+ /* Copy level 0. */
+ ivec2 src_px = ivec2(kernel_origin + local_px) * 2;
+ vec2 samp_co = (vec2(src_px) + 0.5) / vec2(textureSize(depth_tx, 0));
+ vec4 samp = textureGather(depth_tx, samp_co);
+
+ if (update_mip_0) {
+ imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx);
+ imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy);
+ imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz);
+ imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww);
+ }
+
+ /* Level 1. (No load) */
+ float max_depth = max_v4(samp);
+ ivec2 dst_px = ivec2(kernel_origin + local_px);
+ imageStore(out_mip_1, dst_px, vec4(max_depth));
+ store_local_depth(local_px, max_depth);
+
+ /* Level 2-5. */
+ bool active_thread;
+ int mask_shift = 1;
+
+#define downsample_level(out_mip__, lod_) \
+ active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
+ barrier(); /* Wait for previous writes to finish. */ \
+ if (active_thread) { \
+ max_depth = max_v4(load_local_depths(local_px)); \
+ dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \
+ imageStore(out_mip__, dst_px, vec4(max_depth)); \
+ } \
+ barrier(); /* Wait for previous reads to finish. */ \
+ if (active_thread) { \
+ store_local_depth(local_px, max_depth); \
+ } \
+ mask_shift++;
+
+ downsample_level(out_mip_2, 2);
+ downsample_level(out_mip_3, 3);
+ downsample_level(out_mip_4, 4);
+ downsample_level(out_mip_5, 5);
+
+ /* Since we pad the destination texture, the mip size is equal to the dispatch size. */
+ uint tile_count = uint(imageSize(out_mip_5).x * imageSize(out_mip_5).y);
+ /* Let the last tile handle the remaining LOD. */
+ bool last_tile = atomicAdd(finished_tile_counter, 1u) + 1u < tile_count;
+ if (last_tile == false) {
+ return;
+ }
+ finished_tile_counter = 0u;
+
+ ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
+ ivec2 image_border = imageSize(out_mip_5) - 1;
+ for (int y = 0; y < iter.y; y++) {
+ for (int x = 0; x < iter.x; x++) {
+ /* Load result of the other work groups. */
+ kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
+ src_px = ivec2(kernel_origin + local_px) * 2;
+ vec4 samp;
+ samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
+ samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x;
+ samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x;
+ samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x;
+ /* Level 6. */
+ float max_depth = max_v4(samp);
+ ivec2 dst_px = ivec2(kernel_origin + local_px);
+ imageStore(out_mip_6, dst_px, vec4(max_depth));
+ store_local_depth(local_px, max_depth);
+
+ mask_shift = 1;
+
+ /* Level 7. */
+ downsample_level(out_mip_7, 7);
+
+ /* Limited by OpenGL maximum of 8 image slot. */
+ // downsample_level(out_mip_8, 8);
+ // downsample_level(out_mip_9, 9);
+ // downsample_level(out_mip_10, 10);
+ }
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
index 321c99f7952..5c50a2252bd 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
@@ -14,7 +14,7 @@ void main()
{
ivec2 texel = ivec2(gl_FragCoord.xy);
- float depth = texelFetch(depth_tx, texel, 0).r;
+ float depth = texelFetch(hiz_tx, texel, 0).r;
float vP_z = get_view_z_from_depth(depth);
vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
@@ -42,11 +42,13 @@ void main()
}
LIGHT_FOREACH_END
+ vec4 color = vec4(heatmap_gradient(light_count / 4.0), 1.0);
+
if ((light_cull & light_nocull) != light_nocull) {
/* ERROR. Some lights were culled incorrectly. */
- out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
- }
- else {
- out_debug_color = vec4(heatmap_gradient(light_count / 4.0), 1.0);
+ color = vec4(0.0, 1.0, 0.0, 1.0);
}
+
+ out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+ out_debug_color_mul = color;
} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
index daf2016cd35..e98b170cd4c 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
@@ -25,7 +25,7 @@ void main()
int prefix_sum = 0;
/* Iterate over the whole key buffer. */
- uint iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ uint iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
for (uint i = 0u; i < iter; i++) {
uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x;
/* NOTE: This will load duplicated values, but they will be discarded. */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
index d96f191fb77..ae20153f26c 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
@@ -25,7 +25,7 @@ void main()
}
barrier();
- uint light_iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ uint light_iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
for (uint i = 0u; i < light_iter; i++) {
uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
if (index >= light_cull_buf.visible_count) {
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
new file mode 100644
index 00000000000..5e32631a8f8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_data)
+ .sampler(15, ImageType::FLOAT_2D, "hiz_tx")
+ .uniform_buf(5, "HiZData", "hiz_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_update)
+ .do_static_compilation(true)
+ .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ_WRITE, "uint", "finished_tile_counter")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_0")
+ .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_1")
+ .image(2, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_2")
+ .image(3, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_3")
+ .image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4")
+ .image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5")
+ .image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6")
+ .image(7, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_7")
+ .push_constant(Type::BOOL, "update_mip_0")
+ .compute_source("eevee_hiz_update_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_debug)
+ .do_static_compilation(true)
+ .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+ .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
+ .fragment_source("eevee_hiz_debug_frag.glsl")
+ .additional_info("eevee_shared", "eevee_hiz_data", "draw_fullscreen");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
index 56fda25ed13..c54f05719d3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
@@ -67,10 +67,10 @@ GPU_SHADER_CREATE_INFO(eevee_light_culling_tile)
GPU_SHADER_CREATE_INFO(eevee_light_culling_debug)
.do_static_compilation(true)
- .sampler(0, ImageType::DEPTH_2D, "depth_tx")
- .fragment_out(0, Type::VEC4, "out_debug_color")
- .additional_info("eevee_shared", "draw_view")
+ .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+ .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
.fragment_source("eevee_light_culling_debug_frag.glsl")
- .additional_info("draw_fullscreen", "eevee_light_data");
+ .additional_info(
+ "eevee_shared", "draw_view", "draw_fullscreen", "eevee_light_data", "eevee_hiz_data");
/** \} */
diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh
index 5405afd2a90..b32fdedaeb9 100644
--- a/source/blender/draw/intern/DRW_gpu_wrapper.hh
+++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh
@@ -562,6 +562,11 @@ class Texture : NonCopyable {
return mip_views_[miplvl];
}
+ int mip_count() const
+ {
+ return GPU_texture_mip_count(tx_);
+ }
+
/**
* Ensure the availability of mipmap views.
* Layer views covers all layers of array textures.
diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl
index e081a0243ca..e3734939b3f 100644
--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@@ -130,12 +130,17 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu
#define in_texture_range(texel, tex) \
(all(greaterThanEqual(texel, ivec2(0))) && all(lessThan(texel, textureSize(tex, 0).xy)))
-uint divide_ceil_u(uint visible_count, uint divisor)
+uint divide_ceil(uint visible_count, uint divisor)
{
return (visible_count + (divisor - 1u)) / divisor;
}
-int divide_ceil_i(int visible_count, int divisor)
+int divide_ceil(int visible_count, int divisor)
+{
+ return (visible_count + (divisor - 1)) / divisor;
+}
+
+ivec2 divide_ceil(ivec2 visible_count, ivec2 divisor)
{
return (visible_count + (divisor - 1)) / divisor;
}
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index ec964b5d8f3..92dacee1879 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -486,6 +486,7 @@ set(SRC_SHADER_CREATE_INFOS
../draw/engines/basic/shaders/infos/basic_depth_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
+ ../draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh