From b43b62191cde60fee65f8ff1ad108b271f42295d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Mon, 15 Aug 2022 18:08:34 +0200 Subject: EEVEE-Next: HiZ Buffer: New implementation This new implementation does all downsampling in a single compute shader dispatch, removing a lot of complexity from the previous recursive downsampling. This is heavilly inspired by the Single-Pass-Downsampler from GPUOpen: https://github.com/GPUOpen-Effects/FidelityFX-SPD However I do not implement all the optimization bits as they require vulkan (GL_KHR_shader_subgroup) and is not as versatile (it is only for HiZ). Timers inside renderdoc report ~0.4ms of saving on a 2048*1024 render for the whole downsampling. Note that the previous implementation only processed 6 mips where the new one processes 8 mips. ``` EEVEE ~1.0ms EEVEE-Next ~0.6ms ``` Padding has been bumped to be of 128px for processing 8 mips. A new debug option has been added (debug value 2) to validate the HiZ. --- .../draw/engines/eevee_next/eevee_defines.hh | 10 +- .../draw/engines/eevee_next/eevee_hizbuffer.cc | 102 +++++++++++++++++ .../draw/engines/eevee_next/eevee_hizbuffer.hh | 81 ++++++++++++++ .../draw/engines/eevee_next/eevee_instance.cc | 1 + .../draw/engines/eevee_next/eevee_instance.hh | 3 + .../blender/draw/engines/eevee_next/eevee_light.cc | 6 +- .../draw/engines/eevee_next/eevee_pipeline.cc | 10 +- .../draw/engines/eevee_next/eevee_pipeline.hh | 1 - .../draw/engines/eevee_next/eevee_shader.cc | 4 + .../draw/engines/eevee_next/eevee_shader.hh | 3 + .../draw/engines/eevee_next/eevee_shader_shared.hh | 41 +++++-- .../blender/draw/engines/eevee_next/eevee_view.cc | 6 +- .../eevee_next/shaders/eevee_hiz_debug_frag.glsl | 24 ++++ .../eevee_next/shaders/eevee_hiz_update_comp.glsl | 121 +++++++++++++++++++++ .../shaders/eevee_light_culling_debug_frag.glsl | 12 +- .../shaders/eevee_light_culling_sort_comp.glsl | 2 +- .../shaders/eevee_light_culling_zbin_comp.glsl | 2 +- .../eevee_next/shaders/infos/eevee_hiz_info.hh | 31 ++++++ .../shaders/infos/eevee_light_culling_info.hh | 8 +- 19 files changed, 433 insertions(+), 35 deletions(-) create mode 100644 source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc create mode 100644 source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh (limited to 'source/blender/draw/engines/eevee_next') diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index 96c5095317d..67643471639 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -11,6 +11,11 @@ #pragma once +/* Hierarchical Z down-sampling. */ +#define HIZ_MIP_COUNT 8 +/* NOTE: The shader is written to update 5 mipmaps using LDS. */ +#define HIZ_GROUP_SIZE 32 + /* Avoid too much overhead caused by resizing the light buffers too many time. */ #define LIGHT_CHUNK 256 @@ -35,10 +40,7 @@ #define SHADOW_MAX_PAGE 4096 #define SHADOW_PAGE_PER_ROW 64 -#define HIZ_MIP_COUNT 6u -/* Group size is 2x smaller because we simply copy the level 0. */ -#define HIZ_GROUP_SIZE 1u << (HIZ_MIP_COUNT - 2u) - +/* Ray-tracing. */ #define RAYTRACE_GROUP_SIZE 16 #define RAYTRACE_MAX_TILES (16384 / RAYTRACE_GROUP_SIZE) * (16384 / RAYTRACE_GROUP_SIZE) diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc new file mode 100644 index 00000000000..e2022d74093 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +#include "BKE_global.h" + +#include "eevee_instance.hh" + +#include "eevee_hizbuffer.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z buffer + * + * \{ */ + +void HiZBuffer::sync() +{ + RenderBuffers &render_buffers = inst_.render_buffers; + + int2 render_extent = inst_.film.render_extent_get(); + /* Padding to avoid complexity during down-sampling and screen tracing. */ + int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1))); + int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE)); + + hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT); + hiz_tx_.ensure_mip_views(); + GPU_texture_mipmap_mode(hiz_tx_, true, false); + + data_.uv_scale = float2(render_extent) / float2(hiz_extent); + data_.push_update(); + + { + hiz_update_ps_ = DRW_pass_create("HizUpdate", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(HIZ_UPDATE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, hiz_update_ps_); + DRW_shgroup_storage_block(grp, "finished_tile_counter", atomic_tile_counter_); + DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, with_filter); + DRW_shgroup_uniform_image(grp, "out_mip_0", hiz_tx_.mip_view(0)); + DRW_shgroup_uniform_image(grp, "out_mip_1", hiz_tx_.mip_view(1)); + DRW_shgroup_uniform_image(grp, "out_mip_2", hiz_tx_.mip_view(2)); + DRW_shgroup_uniform_image(grp, "out_mip_3", hiz_tx_.mip_view(3)); + DRW_shgroup_uniform_image(grp, "out_mip_4", hiz_tx_.mip_view(4)); + DRW_shgroup_uniform_image(grp, "out_mip_5", hiz_tx_.mip_view(5)); + DRW_shgroup_uniform_image(grp, "out_mip_6", hiz_tx_.mip_view(6)); + DRW_shgroup_uniform_image(grp, "out_mip_7", hiz_tx_.mip_view(7)); + /* TODO(@fclem): There might be occasions where we might not want to + * copy mip 0 for performance reasons if there is no need for it. */ + DRW_shgroup_uniform_bool_copy(grp, "update_mip_0", true); + DRW_shgroup_call_compute(grp, UNPACK2(dispatch_size), 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); + } + + if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) { + DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM; + debug_draw_ps_ = DRW_pass_create("HizUpdate.Debug", state); + GPUShader *sh = inst_.shaders.static_shader_get(HIZ_DEBUG); + DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_); + this->bind_resources(grp); + DRW_shgroup_call_procedural_triangles(grp, nullptr, 1); + } + else { + debug_draw_ps_ = nullptr; + } +} + +void HiZBuffer::update() +{ + if (!is_dirty_) { + return; + } + + /* Bind another framebuffer in order to avoid triggering the feedback loop check. + * This is safe because we only use compute shaders in this section of the code. + * Ideally the check should be smarter. */ + GPUFrameBuffer *fb = GPU_framebuffer_active_get(); + if (G.debug & G_DEBUG_GPU) { + GPU_framebuffer_restore(); + } + + DRW_draw_pass(hiz_update_ps_); + + if (G.debug & G_DEBUG_GPU) { + GPU_framebuffer_bind(fb); + } +} + +void HiZBuffer::debug_draw(GPUFrameBuffer *view_fb) +{ + if (debug_draw_ps_ == nullptr) { + return; + } + inst_.info = "Debug Mode: HiZ Validation"; + inst_.hiz_buffer.update(); + GPU_framebuffer_bind(view_fb); + DRW_draw_pass(debug_draw_ps_); +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh new file mode 100644 index 00000000000..039f7e4f16d --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The Hierarchical-Z buffer is texture containing a copy of the depth buffer with mipmaps. + * Each mip contains the maximum depth of each 4 pixels on the upper level. + * The size of the texture is padded to avoid messing with the mipmap pixels alignments. + */ + +#pragma once + +#include "DRW_render.h" + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z buffer + * \{ */ + +class HiZBuffer { + private: + Instance &inst_; + + /** The texture containing the hiz mip chain. */ + Texture hiz_tx_ = {"hiz_tx_"}; + /** + * Atomic counter counting the number of tile that have finished down-sampling. + * The last one will process the last few mip level. + */ + draw::StorageBuffer atomic_tile_counter_ = {"atomic_tile_counter"}; + /** Single pass recursive downsample. */ + DRWPass *hiz_update_ps_ = nullptr; + /** Debug pass. */ + DRWPass *debug_draw_ps_ = nullptr; + /** Dirty flag to check if the update is necessary. */ + bool is_dirty_ = true; + + HiZDataBuf data_; + + public: + HiZBuffer(Instance &inst) : inst_(inst) + { + atomic_tile_counter_.clear_to_zero(); + }; + + void sync(); + + /** + * Tag the buffer for update if needed. + */ + void set_dirty() + { + is_dirty_ = true; + } + + /** + * Update the content of the HiZ buffer with the depth render target. + * Noop if the buffer has not been tagged as dirty. + * Should be called before each passes that needs to read the hiz buffer. + */ + void update(); + + void debug_draw(GPUFrameBuffer *view_fb); + + void bind_resources(DRWShadingGroup *grp) + { + DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_); + DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_); + } +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index 57786adb657..6665b3a7c9b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -106,6 +106,7 @@ void Instance::begin_sync() depth_of_field.sync(); motion_blur.sync(); + hiz_buffer.sync(); pipelines.sync(); main_view.sync(); world.sync(); diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index d52e4a8e43b..cc3d1c32fde 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -18,6 +18,7 @@ #include "eevee_camera.hh" #include "eevee_depth_of_field.hh" #include "eevee_film.hh" +#include "eevee_hizbuffer.hh" #include "eevee_light.hh" #include "eevee_material.hh" #include "eevee_motion_blur.hh" @@ -48,6 +49,7 @@ class Instance { VelocityModule velocity; MotionBlurModule motion_blur; DepthOfField depth_of_field; + HiZBuffer hiz_buffer; Sampling sampling; Camera camera; Film film; @@ -88,6 +90,7 @@ class Instance { velocity(*this), motion_blur(*this), depth_of_field(*this), + hiz_buffer(*this), sampling(*this), camera(*this), film(*this), diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc index dbbf481f3f4..5392816124b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_light.cc +++ b/source/blender/draw/engines/eevee_next/eevee_light.cc @@ -452,9 +452,11 @@ void LightModule::debug_pass_sync() return; } - debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", DRW_STATE_WRITE_COLOR); + DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM; + debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", state); GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG); DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_); + inst_.hiz_buffer.bind_resources(grp); DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_); DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_); @@ -490,6 +492,8 @@ void LightModule::debug_draw(GPUFrameBuffer *view_fb) if (debug_draw_ps_ == nullptr) { return; } + inst_.info = "Debug Mode: Light Culling Validation"; + inst_.hiz_buffer.update(); GPU_framebuffer_bind(view_fb); DRW_draw_pass(debug_draw_ps_); } diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc index 9185ce7904a..9260d71b887 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc @@ -245,22 +245,22 @@ DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_ma void ForwardPipeline::render(const DRWView *view, Framebuffer &prepass_fb, Framebuffer &combined_fb, - GPUTexture *depth_tx, GPUTexture *UNUSED(combined_tx)) { - UNUSED_VARS(view, depth_tx, prepass_fb, combined_fb); - // HiZBuffer &hiz = inst_.hiz_front; + UNUSED_VARS(view); DRW_stats_group_start("ForwardOpaque"); GPU_framebuffer_bind(prepass_fb); DRW_draw_pass(prepass_ps_); - // hiz.set_dirty(); + if (!DRW_pass_is_empty(prepass_ps_)) { + inst_.hiz_buffer.set_dirty(); + } // if (inst_.raytracing.enabled()) { // rt_buffer.radiance_copy(combined_tx); - // hiz.update(depth_tx); + // inst_.hiz_buffer.update(); // } // inst_.shadows.set_view(view, depth_tx); diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh index 3bdc718767b..ed6986b9b61 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh @@ -91,7 +91,6 @@ class ForwardPipeline { void render(const DRWView *view, Framebuffer &prepass_fb, Framebuffer &combined_fb, - GPUTexture *depth_tx, GPUTexture *combined_tx); }; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index a535d3407ac..0e49b195ea2 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -82,6 +82,10 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_ return "eevee_film_frag"; case FILM_COMP: return "eevee_film_comp"; + case HIZ_DEBUG: + return "eevee_hiz_debug"; + case HIZ_UPDATE: + return "eevee_hiz_update"; case MOTION_BLUR_GATHER: return "eevee_motion_blur_gather"; case MOTION_BLUR_TILE_DILATE: diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index 5b43a1abf43..9ef42c84373 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -47,6 +47,9 @@ enum eShaderType { DOF_TILES_DILATE_MINMAX, DOF_TILES_FLATTEN, + HIZ_UPDATE, + HIZ_DEBUG, + LIGHT_CULLING_DEBUG, LIGHT_CULLING_SELECT, LIGHT_CULLING_SORT, diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh index 885317fc673..bb25f6184d4 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh @@ -41,37 +41,41 @@ enum eDebugMode : uint32_t { * Gradient showing light evaluation hotspots. */ DEBUG_LIGHT_CULLING = 1u, + /** + * Show incorrectly downsample tiles in red. + */ + DEBUG_HIZ_VALIDATION = 2u, /** * Tilemaps to screen. Is also present in other modes. * - Black pixels, no pages allocated. * - Green pixels, pages cached. * - Red pixels, pages allocated. */ - DEBUG_SHADOW_TILEMAPS = 2u, + DEBUG_SHADOW_TILEMAPS = 10u, /** * Random color per pages. Validates page density allocation and sampling. */ - DEBUG_SHADOW_PAGES = 3u, + DEBUG_SHADOW_PAGES = 11u, /** * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage. * Black means not covered by any tilemaps LOD of the shadow. */ - DEBUG_SHADOW_LOD = 4u, + DEBUG_SHADOW_LOD = 12u, /** * Outputs white pixels for pages allocated and black pixels for unused pages. * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work. */ - DEBUG_SHADOW_PAGE_ALLOCATION = 5u, + DEBUG_SHADOW_PAGE_ALLOCATION = 13u, /** * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution. * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option. */ - DEBUG_SHADOW_TILE_ALLOCATION = 6u, + DEBUG_SHADOW_TILE_ALLOCATION = 14u, /** * Visualize linear depth stored in the atlas regions of the active light. * This way, one can check if the rendering, the copying and the shadow sampling functions works. */ - DEBUG_SHADOW_SHADOW_DEPTH = 7u + DEBUG_SHADOW_SHADOW_DEPTH = 15u }; /** \} */ @@ -612,6 +616,20 @@ BLI_STATIC_ASSERT_ALIGN(LightData, 16) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z Buffer + * \{ */ + +struct HiZData { + /** Scale factor to remove HiZBuffer padding. */ + float2 uv_scale; + + float2 _pad0; +}; +BLI_STATIC_ASSERT_ALIGN(HiZData, 16) + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Ray-Tracing * \{ */ @@ -699,16 +717,17 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer) using AOVsInfoDataBuf = draw::StorageBuffer; using CameraDataBuf = draw::UniformBuffer; -using LightDataBuf = draw::StorageArrayBuffer; +using DepthOfFieldDataBuf = draw::UniformBuffer; +using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer; +using DrawIndirectBuf = draw::StorageBuffer; +using FilmDataBuf = draw::UniformBuffer; +using HiZDataBuf = draw::UniformBuffer; using LightCullingDataBuf = draw::StorageBuffer; using LightCullingKeyBuf = draw::StorageArrayBuffer; using LightCullingTileBuf = draw::StorageArrayBuffer; using LightCullingZbinBuf = draw::StorageArrayBuffer; using LightCullingZdistBuf = draw::StorageArrayBuffer; -using DepthOfFieldDataBuf = draw::UniformBuffer; -using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer; -using DrawIndirectBuf = draw::StorageBuffer; -using FilmDataBuf = draw::UniformBuffer; +using LightDataBuf = draw::StorageArrayBuffer; using MotionBlurDataBuf = draw::UniformBuffer; using MotionBlurTileIndirectionBuf = draw::StorageBuffer; using SamplingDataBuf = draw::StorageBuffer; diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc index b7154465a70..44067aff9ca 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.cc +++ b/source/blender/draw/engines/eevee_next/eevee_view.cc @@ -102,6 +102,8 @@ void ShadingView::render() update_view(); + inst_.hiz_buffer.set_dirty(); + DRW_stats_group_start(name_); DRW_view_set_active(render_view_); @@ -128,10 +130,10 @@ void ShadingView::render() // inst_.lookdev.render_overlay(view_fb_); - inst_.pipelines.forward.render( - render_view_, prepass_fb_, combined_fb_, rbufs.depth_tx, rbufs.combined_tx); + inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, rbufs.combined_tx); inst_.lights.debug_draw(combined_fb_); + inst_.hiz_buffer.debug_draw(combined_fb_); GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl new file mode 100644 index 00000000000..e93d0f472fa --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl @@ -0,0 +1,24 @@ + +/** + * Debug hiz down sampling pass. + * Output red if above any max pixels, blue otherwise. + */ + +void main() +{ + ivec2 texel = ivec2(gl_FragCoord.xy); + + float depth0 = texelFetch(hiz_tx, texel, 0).r; + + vec4 color = vec4(0.1, 0.1, 1.0, 1.0); + for (int i = 1; i < HIZ_MIP_COUNT; i++) { + ivec2 lvl_texel = texel / ivec2(uvec2(1) << uint(i)); + lvl_texel = min(lvl_texel, textureSize(hiz_tx, i) - 1); + if (texelFetch(hiz_tx, lvl_texel, i).r < depth0) { + color = vec4(1.0, 0.1, 0.1, 1.0); + break; + } + } + out_debug_color_add = vec4(color.rgb, 0.0) * 0.2; + out_debug_color_mul = color; +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl new file mode 100644 index 00000000000..597bc73e2ad --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl @@ -0,0 +1,121 @@ + +/** + * Shader that down-sample depth buffer, creating a Hierarchical-Z buffer. + * Saves max value of each 2x2 texel in the mipmap above the one we are + * rendering to. Adapted from + * http://rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/ + * + * Major simplification has been made since we pad the buffer to always be + * bigger than input to avoid mipmapping misalignement. + * + * Start by copying the base level by quad loading the depth. + * Then each thread compute it's local depth for level 1. + * After that we use shared variables to do inter thread comunication and + * downsample to max level. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +shared float local_depths[gl_WorkGroupSize.y][gl_WorkGroupSize.x]; + +/* Load values from the previous lod level. */ +vec4 load_local_depths(ivec2 pixel) +{ + pixel *= 2; + return vec4(local_depths[pixel.y + 1][pixel.x + 0], + local_depths[pixel.y + 1][pixel.x + 1], + local_depths[pixel.y + 0][pixel.x + 1], + local_depths[pixel.y + 0][pixel.x + 0]); +} + +void store_local_depth(ivec2 pixel, float depth) +{ + local_depths[pixel.y][pixel.x] = depth; +} + +void main() +{ + ivec2 local_px = ivec2(gl_LocalInvocationID.xy); + /* Bottom left corner of the kernel. */ + ivec2 kernel_origin = ivec2(gl_WorkGroupSize.xy * gl_WorkGroupID.xy); + + /* Copy level 0. */ + ivec2 src_px = ivec2(kernel_origin + local_px) * 2; + vec2 samp_co = (vec2(src_px) + 0.5) / vec2(textureSize(depth_tx, 0)); + vec4 samp = textureGather(depth_tx, samp_co); + + if (update_mip_0) { + imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx); + imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy); + imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz); + imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww); + } + + /* Level 1. (No load) */ + float max_depth = max_v4(samp); + ivec2 dst_px = ivec2(kernel_origin + local_px); + imageStore(out_mip_1, dst_px, vec4(max_depth)); + store_local_depth(local_px, max_depth); + + /* Level 2-5. */ + bool active_thread; + int mask_shift = 1; + +#define downsample_level(out_mip__, lod_) \ + active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \ + barrier(); /* Wait for previous writes to finish. */ \ + if (active_thread) { \ + max_depth = max_v4(load_local_depths(local_px)); \ + dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \ + imageStore(out_mip__, dst_px, vec4(max_depth)); \ + } \ + barrier(); /* Wait for previous reads to finish. */ \ + if (active_thread) { \ + store_local_depth(local_px, max_depth); \ + } \ + mask_shift++; + + downsample_level(out_mip_2, 2); + downsample_level(out_mip_3, 3); + downsample_level(out_mip_4, 4); + downsample_level(out_mip_5, 5); + + /* Since we pad the destination texture, the mip size is equal to the dispatch size. */ + uint tile_count = uint(imageSize(out_mip_5).x * imageSize(out_mip_5).y); + /* Let the last tile handle the remaining LOD. */ + bool last_tile = atomicAdd(finished_tile_counter, 1u) + 1u < tile_count; + if (last_tile == false) { + return; + } + finished_tile_counter = 0u; + + ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u)); + ivec2 image_border = imageSize(out_mip_5) - 1; + for (int y = 0; y < iter.y; y++) { + for (int x = 0; x < iter.x; x++) { + /* Load result of the other work groups. */ + kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y); + src_px = ivec2(kernel_origin + local_px) * 2; + vec4 samp; + samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; + samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x; + samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x; + samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x; + /* Level 6. */ + float max_depth = max_v4(samp); + ivec2 dst_px = ivec2(kernel_origin + local_px); + imageStore(out_mip_6, dst_px, vec4(max_depth)); + store_local_depth(local_px, max_depth); + + mask_shift = 1; + + /* Level 7. */ + downsample_level(out_mip_7, 7); + + /* Limited by OpenGL maximum of 8 image slot. */ + // downsample_level(out_mip_8, 8); + // downsample_level(out_mip_9, 9); + // downsample_level(out_mip_10, 10); + } + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl index 321c99f7952..5c50a2252bd 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl @@ -14,7 +14,7 @@ void main() { ivec2 texel = ivec2(gl_FragCoord.xy); - float depth = texelFetch(depth_tx, texel, 0).r; + float depth = texelFetch(hiz_tx, texel, 0).r; float vP_z = get_view_z_from_depth(depth); vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth); @@ -42,11 +42,13 @@ void main() } LIGHT_FOREACH_END + vec4 color = vec4(heatmap_gradient(light_count / 4.0), 1.0); + if ((light_cull & light_nocull) != light_nocull) { /* ERROR. Some lights were culled incorrectly. */ - out_debug_color = vec4(0.0, 1.0, 0.0, 1.0); - } - else { - out_debug_color = vec4(heatmap_gradient(light_count / 4.0), 1.0); + color = vec4(0.0, 1.0, 0.0, 1.0); } + + out_debug_color_add = vec4(color.rgb, 0.0) * 0.2; + out_debug_color_mul = color; } \ No newline at end of file diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl index daf2016cd35..e98b170cd4c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl @@ -25,7 +25,7 @@ void main() int prefix_sum = 0; /* Iterate over the whole key buffer. */ - uint iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x); + uint iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x); for (uint i = 0u; i < iter; i++) { uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x; /* NOTE: This will load duplicated values, but they will be discarded. */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl index d96f191fb77..ae20153f26c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl @@ -25,7 +25,7 @@ void main() } barrier(); - uint light_iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x); + uint light_iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x); for (uint i = 0u; i < light_iter; i++) { uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x; if (index >= light_cull_buf.visible_count) { diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh new file mode 100644 index 00000000000..5e32631a8f8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(eevee_hiz_data) + .sampler(15, ImageType::FLOAT_2D, "hiz_tx") + .uniform_buf(5, "HiZData", "hiz_buf"); + +GPU_SHADER_CREATE_INFO(eevee_hiz_update) + .do_static_compilation(true) + .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "uint", "finished_tile_counter") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_0") + .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_1") + .image(2, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_2") + .image(3, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_3") + .image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4") + .image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5") + .image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6") + .image(7, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_7") + .push_constant(Type::BOOL, "update_mip_0") + .compute_source("eevee_hiz_update_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_hiz_debug) + .do_static_compilation(true) + .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0) + .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1) + .fragment_source("eevee_hiz_debug_frag.glsl") + .additional_info("eevee_shared", "eevee_hiz_data", "draw_fullscreen"); diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh index 56fda25ed13..c54f05719d3 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh @@ -67,10 +67,10 @@ GPU_SHADER_CREATE_INFO(eevee_light_culling_tile) GPU_SHADER_CREATE_INFO(eevee_light_culling_debug) .do_static_compilation(true) - .sampler(0, ImageType::DEPTH_2D, "depth_tx") - .fragment_out(0, Type::VEC4, "out_debug_color") - .additional_info("eevee_shared", "draw_view") + .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0) + .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1) .fragment_source("eevee_light_culling_debug_frag.glsl") - .additional_info("draw_fullscreen", "eevee_light_data"); + .additional_info( + "eevee_shared", "draw_view", "draw_fullscreen", "eevee_light_data", "eevee_hiz_data"); /** \} */ -- cgit v1.2.3