From 67d7792503e4f598d8620818b1d9887670e144da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Thu, 11 Aug 2022 08:13:47 +0200 Subject: EEVEE-Next: Light: New light module Compared to the previous implementation this has a limit of 65536 lights per scene. Lights exceeding this limit will be ignored. This also introduce fine grained GPU light culling, making rendering many lights in a scene more efficient as long they don't overlap much. Compatible light panels have been unhidden. Note: This commit does not include surface evaluation, only light culling. --- .../scripts/startup/bl_ui/properties_data_light.py | 12 +- source/blender/draw/CMakeLists.txt | 10 + .../draw/engines/eevee_next/eevee_defines.hh | 13 +- .../draw/engines/eevee_next/eevee_instance.cc | 9 +- .../draw/engines/eevee_next/eevee_instance.hh | 7 +- .../blender/draw/engines/eevee_next/eevee_light.cc | 499 +++++++++++++++++++++ .../blender/draw/engines/eevee_next/eevee_light.hh | 164 +++++++ .../draw/engines/eevee_next/eevee_pipeline.cc | 8 +- .../draw/engines/eevee_next/eevee_shader.cc | 10 + .../draw/engines/eevee_next/eevee_shader.hh | 6 + .../draw/engines/eevee_next/eevee_shader_shared.hh | 187 ++++++++ .../blender/draw/engines/eevee_next/eevee_view.cc | 13 +- .../shaders/eevee_light_culling_debug_frag.glsl | 52 +++ .../shaders/eevee_light_culling_select_comp.glsl | 62 +++ .../shaders/eevee_light_culling_sort_comp.glsl | 57 +++ .../shaders/eevee_light_culling_tile_comp.glsl | 188 ++++++++ .../shaders/eevee_light_culling_zbin_comp.glsl | 56 +++ .../eevee_next/shaders/eevee_light_eval_lib.glsl | 129 ++++++ .../eevee_next/shaders/eevee_light_iter_lib.glsl | 72 +++ .../eevee_next/shaders/eevee_light_lib.glsl | 209 +++++++++ .../engines/eevee_next/shaders/eevee_ltc_lib.glsl | 299 ++++++++++++ .../shaders/infos/eevee_light_culling_info.hh | 76 ++++ source/blender/gpu/CMakeLists.txt | 1 + 23 files changed, 2115 insertions(+), 24 deletions(-) create mode 100644 source/blender/draw/engines/eevee_next/eevee_light.cc create mode 100644 source/blender/draw/engines/eevee_next/eevee_light.hh create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl create mode 100644 source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh diff --git a/release/scripts/startup/bl_ui/properties_data_light.py b/release/scripts/startup/bl_ui/properties_data_light.py index df3ad43e6de..2980592ee0b 100644 --- a/release/scripts/startup/bl_ui/properties_data_light.py +++ b/release/scripts/startup/bl_ui/properties_data_light.py @@ -18,7 +18,7 @@ class DataButtonsPanel: class DATA_PT_context_light(DataButtonsPanel, Panel): bl_label = "" bl_options = {'HIDE_HEADER'} - COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} + COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} def draw(self, context): layout = self.layout @@ -36,7 +36,7 @@ class DATA_PT_context_light(DataButtonsPanel, Panel): class DATA_PT_preview(DataButtonsPanel, Panel): bl_label = "Preview" bl_options = {'DEFAULT_CLOSED'} - COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE'} + COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'} def draw(self, context): self.layout.template_preview(context.light) @@ -62,7 +62,7 @@ class DATA_PT_light(DataButtonsPanel, Panel): class DATA_PT_EEVEE_light(DataButtonsPanel, Panel): bl_label = "Light" - COMPAT_ENGINES = {'BLENDER_EEVEE'} + COMPAT_ENGINES = {'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'} def draw(self, context): layout = self.layout @@ -108,7 +108,7 @@ class DATA_PT_EEVEE_light_distance(DataButtonsPanel, Panel): bl_label = "Custom Distance" bl_parent_id = "DATA_PT_EEVEE_light" bl_options = {'DEFAULT_CLOSED'} - COMPAT_ENGINES = {'BLENDER_EEVEE'} + COMPAT_ENGINES = {'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'} @classmethod def poll(cls, context): @@ -256,7 +256,7 @@ class DATA_PT_area(DataButtonsPanel, Panel): class DATA_PT_spot(DataButtonsPanel, Panel): bl_label = "Spot Shape" bl_parent_id = "DATA_PT_EEVEE_light" - COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} + COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} @classmethod def poll(cls, context): @@ -301,7 +301,7 @@ class DATA_PT_falloff_curve(DataButtonsPanel, Panel): class DATA_PT_custom_props_light(DataButtonsPanel, PropertyPanel, Panel): - COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} + COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} _context_path = "object.data" _property_type = bpy.types.Light diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 02b8db9cf0e..322b2e78caa 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -140,6 +140,7 @@ set(SRC engines/eevee_next/eevee_engine.cc engines/eevee_next/eevee_film.cc engines/eevee_next/eevee_instance.cc + engines/eevee_next/eevee_light.cc engines/eevee_next/eevee_material.cc engines/eevee_next/eevee_motion_blur.cc engines/eevee_next/eevee_pipeline.cc @@ -391,6 +392,15 @@ set(GLSL_SRC engines/eevee_next/shaders/eevee_geom_gpencil_vert.glsl engines/eevee_next/shaders/eevee_geom_mesh_vert.glsl engines/eevee_next/shaders/eevee_geom_world_vert.glsl + engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl + engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl + engines/eevee_next/shaders/eevee_light_eval_lib.glsl + engines/eevee_next/shaders/eevee_light_iter_lib.glsl + engines/eevee_next/shaders/eevee_light_lib.glsl + engines/eevee_next/shaders/eevee_ltc_lib.glsl engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index c1e901845f1..96c5095317d 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -11,12 +11,13 @@ #pragma once -/** - * Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536. - * Current limiting factor is the sorting phase which is single pass and only sort within a - * thread-group which maximum size is 1024. - */ -#define CULLING_BATCH_SIZE 1024 +/* Avoid too much overhead caused by resizing the light buffers too many time. */ +#define LIGHT_CHUNK 256 + +#define CULLING_SELECT_GROUP_SIZE 256 +#define CULLING_SORT_GROUP_SIZE 256 +#define CULLING_ZBIN_GROUP_SIZE 1024 +#define CULLING_TILE_GROUP_SIZE 1024 /** * IMPORTANT: Some data packing are tweaked for these values. diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index df7a9ba7702..57786adb657 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -53,6 +53,10 @@ void Instance::init(const int2 &output_res, v3d = v3d_; rv3d = rv3d_; + if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) { + sampling.reset(); + } + info = ""; update_eval_members(); @@ -96,6 +100,7 @@ void Instance::begin_sync() { materials.begin_sync(); velocity.begin_sync(); /* NOTE: Also syncs camera. */ + lights.begin_sync(); gpencil_engine_enabled = false; @@ -109,7 +114,7 @@ void Instance::begin_sync() void Instance::object_sync(Object *ob) { - const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH); + const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH, OB_LAMP); const int ob_visibility = DRW_object_visibility_in_active_context(ob); const bool partsys_is_visible = (ob_visibility & OB_VISIBLE_PARTICLES) != 0 && (ob->type == OB_MESH); @@ -133,6 +138,7 @@ void Instance::object_sync(Object *ob) if (object_is_visible) { switch (ob->type) { case OB_LAMP: + lights.sync_light(ob, ob_handle); break; case OB_MESH: case OB_CURVES_LEGACY: @@ -172,6 +178,7 @@ void Instance::object_sync_render(void *instance_, void Instance::end_sync() { velocity.end_sync(); + lights.end_sync(); sampling.end_sync(); film.end_sync(); } diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index 60dffd7c5ec..d52e4a8e43b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -18,6 +18,7 @@ #include "eevee_camera.hh" #include "eevee_depth_of_field.hh" #include "eevee_film.hh" +#include "eevee_light.hh" #include "eevee_material.hh" #include "eevee_motion_blur.hh" #include "eevee_pipeline.hh" @@ -43,6 +44,7 @@ class Instance { SyncModule sync; MaterialModule materials; PipelineModule pipelines; + LightModule lights; VelocityModule velocity; MotionBlurModule motion_blur; DepthOfField depth_of_field; @@ -71,8 +73,10 @@ class Instance { /** True if the grease pencil engine might be running. */ bool gpencil_engine_enabled; - /* Info string displayed at the top of the render / viewport. */ + /** Info string displayed at the top of the render / viewport. */ std::string info = ""; + /** Debug mode from debug value. */ + eDebugMode debug_mode = eDebugMode::DEBUG_NONE; public: Instance() @@ -80,6 +84,7 @@ class Instance { sync(*this), materials(*this), pipelines(*this), + lights(*this), velocity(*this), motion_blur(*this), depth_of_field(*this), diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc new file mode 100644 index 00000000000..dbbf481f3f4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_light.cc @@ -0,0 +1,499 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The light module manages light data buffers and light culling system. + */ + +#include "draw_debug.hh" + +#include "eevee_instance.hh" + +#include "eevee_light.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name LightData + * \{ */ + +static eLightType to_light_type(short blender_light_type, short blender_area_type) +{ + switch (blender_light_type) { + default: + case LA_LOCAL: + return LIGHT_POINT; + case LA_SUN: + return LIGHT_SUN; + case LA_SPOT: + return LIGHT_SPOT; + case LA_AREA: + return ELEM(blender_area_type, LA_AREA_DISK, LA_AREA_ELLIPSE) ? LIGHT_ELLIPSE : LIGHT_RECT; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Light Object + * \{ */ + +void Light::sync(/* ShadowModule &shadows , */ const Object *ob, float threshold) +{ + const ::Light *la = (const ::Light *)ob->data; + float scale[3]; + + float max_power = max_fff(la->r, la->g, la->b) * fabsf(la->energy / 100.0f); + float surface_max_power = max_ff(la->diff_fac, la->spec_fac) * max_power; + float volume_max_power = la->volume_fac * max_power; + + float influence_radius_surface = attenuation_radius_get(la, threshold, surface_max_power); + float influence_radius_volume = attenuation_radius_get(la, threshold, volume_max_power); + + this->influence_radius_max = max_ff(influence_radius_surface, influence_radius_volume); + this->influence_radius_invsqr_surface = 1.0f / square_f(max_ff(influence_radius_surface, 1e-8f)); + this->influence_radius_invsqr_volume = 1.0f / square_f(max_ff(influence_radius_volume, 1e-8f)); + + this->color = float3(&la->r) * la->energy; + normalize_m4_m4_ex(this->object_mat.ptr(), ob->obmat, scale); + /* Make sure we have consistent handedness (in case of negatively scaled Z axis). */ + float3 cross = math::cross(float3(this->_right), float3(this->_up)); + if (math::dot(cross, float3(this->_back)) < 0.0f) { + negate_v3(this->_up); + } + + shape_parameters_set(la, scale); + + float shape_power = shape_power_get(la); + float point_power = point_power_get(la); + this->diffuse_power = la->diff_fac * shape_power; + this->transmit_power = la->diff_fac * point_power; + this->specular_power = la->spec_fac * shape_power; + this->volume_power = la->volume_fac * point_power; + + eLightType new_type = to_light_type(la->type, la->area_shape); + if (this->type != new_type) { + /* shadow_discard_safe(shadows); */ + this->type = new_type; + } + +#if 0 + if (la->mode & LA_SHADOW) { + if (la->type == LA_SUN) { + if (this->shadow_id == LIGHT_NO_SHADOW) { + this->shadow_id = shadows.directionals.alloc(); + } + + ShadowDirectional &shadow = shadows.directionals[this->shadow_id]; + shadow.sync(this->object_mat, la->bias * 0.05f, 1.0f); + } + else { + float cone_aperture = DEG2RAD(360.0); + if (la->type == LA_SPOT) { + cone_aperture = min_ff(DEG2RAD(179.9), la->spotsize); + } + else if (la->type == LA_AREA) { + cone_aperture = DEG2RAD(179.9); + } + + if (this->shadow_id == LIGHT_NO_SHADOW) { + this->shadow_id = shadows.punctuals.alloc(); + } + + ShadowPunctual &shadow = shadows.punctuals[this->shadow_id]; + shadow.sync(this->type, + this->object_mat, + cone_aperture, + la->clipsta, + this->influence_radius_max, + la->bias * 0.05f); + } + } + else { + shadow_discard_safe(shadows); + } +#endif + + this->initialized = true; +} + +#if 0 +void Light::shadow_discard_safe(ShadowModule &shadows) +{ + if (shadow_id != LIGHT_NO_SHADOW) { + if (this->type != LIGHT_SUN) { + shadows.punctuals.free(shadow_id); + } + else { + shadows.directionals.free(shadow_id); + } + shadow_id = LIGHT_NO_SHADOW; + } +} +#endif + +/* Returns attenuation radius inverted & squared for easy bound checking inside the shader. */ +float Light::attenuation_radius_get(const ::Light *la, float light_threshold, float light_power) +{ + if (la->type == LA_SUN) { + return (light_power > 1e-5f) ? 1e16f : 0.0f; + } + + if (la->mode & LA_CUSTOM_ATTENUATION) { + return la->att_dist; + } + /* Compute the distance (using the inverse square law) + * at which the light power reaches the light_threshold. */ + /* TODO take area light scale into account. */ + return sqrtf(light_power / light_threshold); +} + +void Light::shape_parameters_set(const ::Light *la, const float scale[3]) +{ + if (la->type == LA_AREA) { + float area_size_y = (ELEM(la->area_shape, LA_AREA_RECT, LA_AREA_ELLIPSE)) ? la->area_sizey : + la->area_size; + _area_size_x = max_ff(0.003f, la->area_size * scale[0] * 0.5f); + _area_size_y = max_ff(0.003f, area_size_y * scale[1] * 0.5f); + /* For volume point lighting. */ + radius_squared = max_ff(0.001f, hypotf(_area_size_x, _area_size_y) * 0.5f); + radius_squared = square_f(radius_squared); + } + else { + if (la->type == LA_SPOT) { + /* Spot size & blend */ + spot_size_inv[0] = scale[2] / scale[0]; + spot_size_inv[1] = scale[2] / scale[1]; + float spot_size = cosf(la->spotsize * 0.5f); + float spot_blend = (1.0f - spot_size) * la->spotblend; + _spot_mul = 1.0f / max_ff(1e-8f, spot_blend); + _spot_bias = -spot_size * _spot_mul; + spot_tan = tanf(min_ff(la->spotsize * 0.5f, M_PI_2 - 0.0001f)); + } + + if (la->type == LA_SUN) { + _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f); + } + else { + _area_size_x = la->area_size; + } + _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x); + radius_squared = square_f(_area_size_x); + } +} + +float Light::shape_power_get(const ::Light *la) +{ + /* Make illumination power constant */ + switch (la->type) { + case LA_AREA: { + float area = _area_size_x * _area_size_y; + float power = 1.0f / (area * 4.0f * float(M_PI)); + /* FIXME : Empirical, Fit cycles power */ + power *= 0.8f; + if (ELEM(la->area_shape, LA_AREA_DISK, LA_AREA_ELLIPSE)) { + /* Scale power to account for the lower area of the ellipse compared to the surrounding + * rectangle. */ + power *= 4.0f / M_PI; + } + return power; + } + case LA_SPOT: + case LA_LOCAL: { + return 1.0f / (4.0f * square_f(_radius) * float(M_PI * M_PI)); + } + default: + case LA_SUN: { + float power = 1.0f / (square_f(_radius) * float(M_PI)); + /* Make illumination power closer to cycles for bigger radii. Cycles uses a cos^3 term that + * we cannot reproduce so we account for that by scaling the light power. This function is + * the result of a rough manual fitting. */ + /* Simplification of: power *= 1 + r²/2 */ + power += 1.0f / (2.0f * M_PI); + + return power; + } + } +} + +float Light::point_power_get(const ::Light *la) +{ + /* Volume light is evaluated as point lights. Remove the shape power. */ + switch (la->type) { + case LA_AREA: { + /* Match cycles. Empirical fit... must correspond to some constant. */ + float power = 0.0792f * M_PI; + + /* This corrects for area light most representative point trick. The fit was found by + * reducing the average error compared to cycles. */ + float area = _area_size_x * _area_size_y; + float tmp = M_PI_2 / (M_PI_2 + sqrtf(area)); + /* Lerp between 1.0 and the limit (1 / pi). */ + power *= tmp + (1.0f - tmp) * M_1_PI; + + return power; + } + case LA_SPOT: + case LA_LOCAL: { + /* Match cycles. Empirical fit... must correspond to some constant. */ + return 0.0792f; + } + default: + case LA_SUN: { + return 1.0f; + } + } +} + +void Light::debug_draw() +{ +#ifdef DEBUG + drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f)); +#endif +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name LightModule + * \{ */ + +void LightModule::begin_sync() +{ + use_scene_lights_ = inst_.use_scene_lights(); + + /* In begin_sync so it can be animated. */ + if (assign_if_different(light_threshold_, max_ff(1e-16f, inst_.scene->eevee.light_threshold))) { + inst_.sampling.reset(); + } + + sun_lights_len_ = 0; + local_lights_len_ = 0; +} + +void LightModule::sync_light(const Object *ob, ObjectHandle &handle) +{ + if (use_scene_lights_ == false) { + return; + } + Light &light = light_map_.lookup_or_add_default(handle.object_key); + light.used = true; + if (handle.recalc != 0 || !light.initialized) { + light.sync(/* inst_.shadows, */ ob, light_threshold_); + } + sun_lights_len_ += int(light.type == LIGHT_SUN); + local_lights_len_ += int(light.type != LIGHT_SUN); +} + +void LightModule::end_sync() +{ + // ShadowModule &shadows = inst_.shadows; + + /* NOTE: We resize this buffer before removing deleted lights. */ + int lights_allocated = ceil_to_multiple_u(max_ii(light_map_.size(), 1), LIGHT_CHUNK); + light_buf_.resize(lights_allocated); + + /* Track light deletion. */ + Vector deleted_keys; + /* Indices inside GPU data array. */ + int sun_lights_idx = 0; + int local_lights_idx = sun_lights_len_; + + /* Fill GPU data with scene data. */ + for (auto item : light_map_.items()) { + Light &light = item.value; + + if (!light.used) { + /* Deleted light. */ + deleted_keys.append(item.key); + // light.shadow_discard_safe(shadows); + continue; + } + + int dst_idx = (light.type == LIGHT_SUN) ? sun_lights_idx++ : local_lights_idx++; + /* Put all light data into global data SSBO. */ + light_buf_[dst_idx] = light; + +#if 0 + if (light.shadow_id != LIGHT_NO_SHADOW) { + if (light.type == LIGHT_SUN) { + light_buf_[dst_idx].shadow_data = shadows.directionals[light.shadow_id]; + } + else { + light_buf_[dst_idx].shadow_data = shadows.punctuals[light.shadow_id]; + } + } +#endif + /* Untag for next sync. */ + light.used = false; + } + /* This scene data buffer is then immutable after this point. */ + light_buf_.push_update(); + + for (auto key : deleted_keys) { + light_map_.remove(key); + } + + /* Update sampling on deletion or un-hidding (use_scene_lights). */ + if (assign_if_different(light_map_size_, light_map_.size())) { + inst_.sampling.reset(); + } + + /* If exceeding the limit, just trim off the excess to avoid glitchy rendering. */ + if (sun_lights_len_ + local_lights_len_ > CULLING_MAX_ITEM) { + sun_lights_len_ = min_ii(sun_lights_len_, CULLING_MAX_ITEM); + local_lights_len_ = min_ii(local_lights_len_, CULLING_MAX_ITEM - sun_lights_len_); + inst_.info = "Error: Too many lights in the scene."; + } + lights_len_ = sun_lights_len_ + local_lights_len_; + + /* Resize to the actual number of lights after pruning. */ + lights_allocated = ceil_to_multiple_u(max_ii(lights_len_, 1), LIGHT_CHUNK); + culling_key_buf_.resize(lights_allocated); + culling_zdist_buf_.resize(lights_allocated); + culling_light_buf_.resize(lights_allocated); + + { + /* Compute tile size and total word count. */ + uint word_per_tile = divide_ceil_u(max_ii(lights_len_, 1), 32); + int2 render_extent = inst_.film.render_extent_get(); + int2 tiles_extent; + /* Default to 32 as this is likely to be the maximum + * tile size used by hardware or compute shading. */ + uint tile_size = 16; + do { + tile_size *= 2; + tiles_extent = math::divide_ceil(render_extent, int2(tile_size)); + uint tile_count = tiles_extent.x * tiles_extent.y; + if (tile_count > max_tile_count_threshold) { + continue; + } + total_word_count_ = tile_count * word_per_tile; + + } while (total_word_count_ > max_word_count_threshold); + /* Keep aligned with storage buffer requirements. */ + total_word_count_ = ceil_to_multiple_u(total_word_count_, 32); + + culling_data_buf_.tile_word_len = word_per_tile; + culling_data_buf_.tile_size = tile_size; + culling_data_buf_.tile_x_len = tiles_extent.x; + culling_data_buf_.tile_y_len = tiles_extent.y; + culling_data_buf_.items_count = lights_len_; + culling_data_buf_.local_lights_len = local_lights_len_; + culling_data_buf_.sun_lights_len = sun_lights_len_; + } + culling_tile_buf_.resize(total_word_count_); + + culling_pass_sync(); + debug_pass_sync(); +} + +void LightModule::culling_pass_sync() +{ + uint safe_lights_len = max_ii(lights_len_, 1); + uint culling_select_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SELECT_GROUP_SIZE); + uint culling_sort_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SORT_GROUP_SIZE); + uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE); + + /* NOTE: We reference the buffers that may be resized or updated later. */ + { + DRW_PASS_CREATE(culling_select_ps_, DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_select_ps_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_); + DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_); + DRW_shgroup_storage_block(grp, "out_zdist_buf", culling_zdist_buf_); + DRW_shgroup_storage_block(grp, "out_key_buf", culling_key_buf_); + DRW_shgroup_call_compute(grp, culling_select_dispatch_size, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } + { + DRW_PASS_CREATE(culling_sort_ps_, DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SORT); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_sort_ps_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_); + DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_); + DRW_shgroup_storage_block(grp, "in_zdist_buf", culling_zdist_buf_); + DRW_shgroup_storage_block(grp, "in_key_buf", culling_key_buf_); + DRW_shgroup_call_compute(grp, culling_sort_dispatch_size, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } + { + DRW_PASS_CREATE(culling_zbin_ps_, DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_zbin_ps_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_); + DRW_shgroup_storage_block(grp, "out_zbin_buf", culling_zbin_buf_); + DRW_shgroup_call_compute(grp, 1, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } + { + DRW_PASS_CREATE(culling_tile_ps_, DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_TILE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_tile_ps_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_); + DRW_shgroup_storage_block(grp, "out_light_tile_buf", culling_tile_buf_); + DRW_shgroup_call_compute(grp, culling_tile_dispatch_size, 1, 1); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE); + } +} + +void LightModule::debug_pass_sync() +{ + if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) { + debug_draw_ps_ = nullptr; + return; + } + + debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", DRW_STATE_WRITE_COLOR); + GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG); + DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_); + DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_); + DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_); + DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &inst_.render_buffers.depth_tx); + DRW_shgroup_call_procedural_triangles(grp, nullptr, 1); +} + +void LightModule::set_view(const DRWView *view, const int2 extent) +{ + float far_z = DRW_view_far_distance_get(view); + float near_z = DRW_view_near_distance_get(view); + + culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z); + culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale; + culling_data_buf_.tile_to_uv_fac = (culling_data_buf_.tile_size / float2(extent)); + culling_data_buf_.visible_count = 0; + culling_data_buf_.push_update(); + + DRW_stats_group_start("Light Culling"); + + DRW_view_set_active(view); + DRW_draw_pass(culling_select_ps_); + DRW_draw_pass(culling_sort_ps_); + DRW_draw_pass(culling_zbin_ps_); + DRW_draw_pass(culling_tile_ps_); + + DRW_stats_group_end(); +} + +void LightModule::debug_draw(GPUFrameBuffer *view_fb) +{ + if (debug_draw_ps_ == nullptr) { + return; + } + GPU_framebuffer_bind(view_fb); + DRW_draw_pass(debug_draw_ps_); +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_light.hh b/source/blender/draw/engines/eevee_next/eevee_light.hh new file mode 100644 index 00000000000..c2d7aad34ae --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_light.hh @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The light module manages light data buffers and light culling system. + * + * The culling follows the principles of Tiled Culling + Z binning from: + * "Improved Culling for Tiled and Clustered Rendering" + * by Michal Drobot + * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + * + * The culling is separated in 4 compute phases: + * - View Culling (select pass): Create a z distance and a index buffer of visible lights. + * - Light sorting: Outputs visible lights sorted by Z distance. + * - Z binning: Compute the Z bins min/max light indices. + * - Tile intersection: Fine grained 2D culling of each lights outputting a bitmap per tile. + */ + +#pragma once + +#include "BLI_bitmap.h" +#include "BLI_vector.hh" +#include "DNA_light_types.h" + +#include "eevee_camera.hh" +#include "eevee_sampling.hh" +#include "eevee_shader.hh" +#include "eevee_shader_shared.hh" +#include "eevee_sync.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Light Object + * \{ */ + +struct Light : public LightData { + public: + bool initialized = false; + bool used = false; + + public: + Light() + { + shadow_id = LIGHT_NO_SHADOW; + } + + void sync(/* ShadowModule &shadows, */ const Object *ob, float threshold); + + // void shadow_discard_safe(ShadowModule &shadows); + + void debug_draw(); + + private: + float attenuation_radius_get(const ::Light *la, float light_threshold, float light_power); + void shape_parameters_set(const ::Light *la, const float scale[3]); + float shape_power_get(const ::Light *la); + float point_power_get(const ::Light *la); +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name LightModule + * \{ */ + +/** + * The light module manages light data buffers and light culling system. + */ +class LightModule { + // friend ShadowModule; + + private: + /* Keep tile count reasonable for memory usage and 2D culling performance. */ + static constexpr uint max_memory_threshold = 32 * 1024 * 1024; /* 32 MiB */ + static constexpr uint max_word_count_threshold = max_memory_threshold / sizeof(uint); + static constexpr uint max_tile_count_threshold = 8192; + + Instance &inst_; + + /** Map of light objects data. Converted to flat array each frame. */ + Map light_map_; + /** Flat array sent to GPU, populated from light_map_. Source buffer for light culling. */ + LightDataBuf light_buf_ = {"Lights_no_cull"}; + /** Recorded size of light_map_ (after pruning) to detect deletion. */ + int64_t light_map_size_ = 0; + /** Luminous intensity to consider the light boundary at. Used for culling. */ + float light_threshold_ = 0.01f; + /** If false, will prevent all scene light from being synced. */ + bool use_scene_lights_ = false; + /** Number of sun lights synced during the last sync. Used as offset. */ + int sun_lights_len_ = 0; + int local_lights_len_ = 0; + /** Sun plus local lights count for convenience. */ + int lights_len_ = 0; + + /** + * Light Culling + */ + + /** LightData buffer used for rendering. Filled by the culling pass. */ + LightDataBuf culling_light_buf_ = {"Lights_culled"}; + /** Culling infos. */ + LightCullingDataBuf culling_data_buf_ = {"LightCull_data"}; + /** Z-distance matching the key for each visible lights. Used for sorting. */ + LightCullingZdistBuf culling_zdist_buf_ = {"LightCull_zdist"}; + /** Key buffer containing only visible lights indices. Used for sorting. */ + LightCullingKeyBuf culling_key_buf_ = {"LightCull_key"}; + /** Zbins containing min and max light index for each Z bin. */ + LightCullingZbinBuf culling_zbin_buf_ = {"LightCull_zbin"}; + /** Bitmap of lights touching each tiles. */ + LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"}; + /** Culling compute passes. */ + DRWPass *culling_select_ps_ = nullptr; + DRWPass *culling_sort_ps_ = nullptr; + DRWPass *culling_zbin_ps_ = nullptr; + DRWPass *culling_tile_ps_ = nullptr; + /** Total number of words the tile buffer needs to contain for the render resolution. */ + uint total_word_count_ = 0; + + /** Debug Culling visualization. */ + DRWPass *debug_draw_ps_ = nullptr; + GPUTexture *input_depth_tx_ = nullptr; + + public: + LightModule(Instance &inst) : inst_(inst){}; + ~LightModule(){}; + + void begin_sync(); + void sync_light(const Object *ob, ObjectHandle &handle); + void end_sync(); + + /** + * Update acceleration structure for the given view. + */ + void set_view(const DRWView *view, const int2 extent); + + void debug_draw(GPUFrameBuffer *view_fb); + + void bind_resources(DRWShadingGroup *grp) + { + DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_); + DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_); +#if 0 + DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get()); + DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get()); +#endif + } + + private: + void culling_pass_sync(); + void debug_pass_sync(); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc index db169ec361f..fe7d02a855c 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc @@ -101,12 +101,12 @@ DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, G { RenderBuffers &rbufs = inst_.render_buffers; DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_; - // LightModule &lights = inst_.lights; + LightModule &lights = inst_.lights; // LightProbeModule &lightprobes = inst_.lightprobes; // RaytracingModule &raytracing = inst_.raytracing; // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass); - // lights.shgroup_resources(grp); + lights.bind_resources(grp); // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get()); // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get()); // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get()); @@ -163,12 +163,12 @@ DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_m GPUMaterial *gpumat) { RenderBuffers &rbufs = inst_.render_buffers; - // LightModule &lights = inst_.lights; + LightModule &lights = inst_.lights; // LightProbeModule &lightprobes = inst_.lightprobes; // RaytracingModule &raytracing = inst_.raytracing; // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_); - // lights.shgroup_resources(grp); + lights.bind_resources(grp); // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get()); // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get()); // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get()); diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index 357f2796a7e..a535d3407ac 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -124,6 +124,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_ return "eevee_depth_of_field_tiles_dilate_minmax"; case DOF_TILES_FLATTEN: return "eevee_depth_of_field_tiles_flatten"; + case LIGHT_CULLING_DEBUG: + return "eevee_light_culling_debug"; + case LIGHT_CULLING_SELECT: + return "eevee_light_culling_select"; + case LIGHT_CULLING_SORT: + return "eevee_light_culling_sort"; + case LIGHT_CULLING_TILE: + return "eevee_light_culling_tile"; + case LIGHT_CULLING_ZBIN: + return "eevee_light_culling_zbin"; /* To avoid compiler warning about missing case. */ case MAX_SHADER_TYPE: return ""; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index dd6b9c9d4ab..5b43a1abf43 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -47,6 +47,12 @@ enum eShaderType { DOF_TILES_DILATE_MINMAX, DOF_TILES_FLATTEN, + LIGHT_CULLING_DEBUG, + LIGHT_CULLING_SELECT, + LIGHT_CULLING_SORT, + LIGHT_CULLING_TILE, + LIGHT_CULLING_ZBIN, + MOTION_BLUR_GATHER, MOTION_BLUR_TILE_DILATE, MOTION_BLUR_TILE_FLATTEN_RENDER, diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh index fe36cb1a17c..885317fc673 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh @@ -30,6 +30,52 @@ constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER; #define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14 +/* -------------------------------------------------------------------- */ +/** \name Debug Mode + * \{ */ + +/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */ +enum eDebugMode : uint32_t { + DEBUG_NONE = 0u, + /** + * Gradient showing light evaluation hotspots. + */ + DEBUG_LIGHT_CULLING = 1u, + /** + * Tilemaps to screen. Is also present in other modes. + * - Black pixels, no pages allocated. + * - Green pixels, pages cached. + * - Red pixels, pages allocated. + */ + DEBUG_SHADOW_TILEMAPS = 2u, + /** + * Random color per pages. Validates page density allocation and sampling. + */ + DEBUG_SHADOW_PAGES = 3u, + /** + * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage. + * Black means not covered by any tilemaps LOD of the shadow. + */ + DEBUG_SHADOW_LOD = 4u, + /** + * Outputs white pixels for pages allocated and black pixels for unused pages. + * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work. + */ + DEBUG_SHADOW_PAGE_ALLOCATION = 5u, + /** + * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution. + * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option. + */ + DEBUG_SHADOW_TILE_ALLOCATION = 6u, + /** + * Visualize linear depth stored in the atlas regions of the active light. + * This way, one can check if the rendering, the copying and the shadow sampling functions works. + */ + DEBUG_SHADOW_SHADOW_DEPTH = 7u +}; + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Sampling * \{ */ @@ -459,6 +505,113 @@ static inline float circle_to_polygon_angle(float sides_count, float theta) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Light Culling + * \{ */ + +/* Number of items we can cull. Limited by how we store CullingZBin. */ +#define CULLING_MAX_ITEM 65536 +/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */ +#define CULLING_ZBIN_COUNT 4096 +/* Max tile map resolution per axes. */ +#define CULLING_TILE_RES 16 + +struct LightCullingData { + /** Scale applied to tile pixel coordinates to get target UV coordinate. */ + float2 tile_to_uv_fac; + /** Scale and bias applied to linear Z to get zbin. */ + float zbin_scale; + float zbin_bias; + /** Valid item count in the source data array. */ + uint items_count; + /** Items that are processed by the 2.5D culling. */ + uint local_lights_len; + /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */ + uint sun_lights_len; + /** Number of items that passes the first culling test. */ + uint visible_count; + /** Extent of one square tile in pixels. */ + float tile_size; + /** Number of tiles on the X/Y axis. */ + uint tile_x_len; + uint tile_y_len; + /** Number of word per tile. Depends on the maximum number of lights. */ + uint tile_word_len; +}; +BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Lights + * \{ */ + +#define LIGHT_NO_SHADOW -1 + +enum eLightType : uint32_t { + LIGHT_SUN = 0u, + LIGHT_POINT = 1u, + LIGHT_SPOT = 2u, + LIGHT_RECT = 3u, + LIGHT_ELLIPSE = 4u +}; + +static inline bool is_area_light(eLightType type) +{ + return type >= LIGHT_RECT; +} + +struct LightData { + /** Normalized object matrix. Last column contains data accessible using the following macros. */ + float4x4 object_mat; + /** Packed data in the last column of the object_mat. */ +#define _area_size_x object_mat[0][3] +#define _area_size_y object_mat[1][3] +#define _radius _area_size_x +#define _spot_mul object_mat[2][3] +#define _spot_bias object_mat[3][3] + /** Aliases for axes. */ +#ifndef USE_GPU_SHADER_CREATE_INFO +# define _right object_mat[0] +# define _up object_mat[1] +# define _back object_mat[2] +# define _position object_mat[3] +#else +# define _right object_mat[0].xyz +# define _up object_mat[1].xyz +# define _back object_mat[2].xyz +# define _position object_mat[3].xyz +#endif + /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */ + float influence_radius_invsqr_surface; + float influence_radius_invsqr_volume; + /** Maximum influence radius. Used for culling. */ + float influence_radius_max; + /** Index of the shadow struct on CPU. -1 means no shadow. */ + int shadow_id; + /** NOTE: It is ok to use float3 here. A float is declared right after it. + * float3 is also aligned to 16 bytes. */ + float3 color; + /** Power depending on shader type. */ + float diffuse_power; + float specular_power; + float volume_power; + float transmit_power; + /** Special radius factor for point lighting. */ + float radius_squared; + /** Light Type. */ + eLightType type; + /** Spot angle tangent. */ + float spot_tan; + /** Spot size. Aligned to size of float2. */ + float2 spot_size_inv; + /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */ + // ShadowData shadow_data; +}; +BLI_STATIC_ASSERT_ALIGN(LightData, 16) + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Ray-Tracing * \{ */ @@ -479,6 +632,34 @@ enum eClosureBits : uint32_t { /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Subsurface + * \{ */ + +#define SSS_SAMPLE_MAX 64 +#define SSS_BURLEY_TRUNCATE 16.0 +#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328 +#define SSS_TRANSMIT_LUT_SIZE 64.0 +#define SSS_TRANSMIT_LUT_RADIUS 1.218 +#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE)) +#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE)) +#define SSS_TRANSMIT_LUT_STEP_RES 64.0 + +struct SubsurfaceData { + /** xy: 2D sample position [-1..1], zw: sample_bounds. */ + /* NOTE(fclem) Using float4 for alignment. */ + float4 samples[SSS_SAMPLE_MAX]; + /** Sample index after which samples are not randomly rotated anymore. */ + int jitter_threshold; + /** Number of samples precomputed in the set. */ + int sample_len; + int _pad0; + int _pad1; +}; +BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16) + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Utility Texture * \{ */ @@ -518,6 +699,12 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer) using AOVsInfoDataBuf = draw::StorageBuffer; using CameraDataBuf = draw::UniformBuffer; +using LightDataBuf = draw::StorageArrayBuffer; +using LightCullingDataBuf = draw::StorageBuffer; +using LightCullingKeyBuf = draw::StorageArrayBuffer; +using LightCullingTileBuf = draw::StorageArrayBuffer; +using LightCullingZbinBuf = draw::StorageArrayBuffer; +using LightCullingZdistBuf = draw::StorageArrayBuffer; using DepthOfFieldDataBuf = draw::UniformBuffer; using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer; using DrawIndirectBuf = draw::StorageBuffer; diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc index c195f68380c..b7154465a70 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.cc +++ b/source/blender/draw/engines/eevee_next/eevee_view.cc @@ -118,6 +118,9 @@ void ShadingView::render() inst_.pipelines.world.render(); + /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */ + inst_.lights.set_view(render_view_, extent_); + // inst_.pipelines.deferred.render( // render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_); @@ -128,13 +131,14 @@ void ShadingView::render() inst_.pipelines.forward.render( render_view_, prepass_fb_, combined_fb_, rbufs.depth_tx, rbufs.combined_tx); - // inst_.lights.debug_draw(view_fb_); - // inst_.shadows.debug_draw(view_fb_); + inst_.lights.debug_draw(combined_fb_); GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx); inst_.film.accumulate(sub_view_, combined_final_tx); + // inst_.shadows.debug_draw(); + rbufs.release(); postfx_tx_.release(); @@ -176,13 +180,10 @@ void ShadingView::update_view() window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter)); DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr()); - /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop + /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop * out of the blurring radius. To fix this, use custom enlarged culling matrix. */ inst_.depth_of_field.jitter_apply(winmat, viewmat); DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr()); - - // inst_.lightprobes.set_view(render_view_, extent_); - // inst_.lights.set_view(render_view_, extent_, !inst_.use_scene_lights()); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl new file mode 100644 index 00000000000..321c99f7952 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl @@ -0,0 +1,52 @@ + +/** + * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots. + * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling + * pass is not conservative enough). + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_FragCoord.xy); + + float depth = texelFetch(depth_tx, texel, 0).r; + float vP_z = get_view_z_from_depth(depth); + vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth); + + float light_count = 0.0; + uint light_cull = 0u; + vec2 px = gl_FragCoord.xy; + LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx) + { + LightData light = light_buf[l_idx]; + light_cull |= 1u << l_idx; + light_count += 1.0; + } + LIGHT_FOREACH_END + + uint light_nocull = 0u; + LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(light_cull_buf, l_idx) + { + LightData light = light_buf[l_idx]; + vec3 L; + float dist; + light_vector_get(light, P, L, dist); + if (light_attenuation(light_buf[l_idx], L, dist) > 0.0) { + light_nocull |= 1u << l_idx; + } + } + LIGHT_FOREACH_END + + if ((light_cull & light_nocull) != light_nocull) { + /* ERROR. Some lights were culled incorrectly. */ + out_debug_color = vec4(0.0, 1.0, 0.0, 1.0); + } + else { + out_debug_color = vec4(heatmap_gradient(light_count / 4.0), 1.0); + } +} \ No newline at end of file diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl new file mode 100644 index 00000000000..9c12b0e50e6 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl @@ -0,0 +1,62 @@ + +/** + * Select the visible items inside the active view and put them inside the sorting buffer. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) + +void main() +{ + uint l_idx = gl_GlobalInvocationID.x; + if (l_idx >= light_cull_buf.items_count) { + return; + } + + LightData light = in_light_buf[l_idx]; + + /* Do not select 0 power lights. */ + if (light.influence_radius_max < 1e-8) { + return; + } + + /* Sun lights are packed at the end of the array. Perform early copy. */ + if (light.type == LIGHT_SUN) { + /* NOTE: We know the index because sun lights are packed at the start of the input buffer. */ + out_light_buf[light_cull_buf.local_lights_len + l_idx] = light; + return; + } + + Sphere sphere; + switch (light.type) { + case LIGHT_SPOT: + /* Only for < ~170° Cone due to plane extraction precision. */ + if (light.spot_tan < 10.0) { + Pyramid pyramid = shape_pyramid_non_oblique( + light._position, + light._position - light._back * light.influence_radius_max, + light._right * light.influence_radius_max * light.spot_tan / light.spot_size_inv.x, + light._up * light.influence_radius_max * light.spot_tan / light.spot_size_inv.y); + if (!intersect_view(pyramid)) { + return; + } + } + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + sphere = Sphere(light._position, light.influence_radius_max); + break; + } + + /* TODO(fclem): HiZ culling? Could be quite beneficial given the nature of the 2.5D culling. */ + + /* TODO(fclem): Small light culling / fading? */ + + if (intersect_view(sphere)) { + uint index = atomicAdd(light_cull_buf.visible_count, 1u); + + out_zdist_buf[index] = dot(cameraForward, light._position) - dot(cameraForward, cameraPos); + out_key_buf[index] = l_idx; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl new file mode 100644 index 00000000000..daf2016cd35 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl @@ -0,0 +1,57 @@ + +/** + * Sort the lights by their Z distance to the camera. + * Outputs ordered light buffer. + * One thread processes one Light entity. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +shared float zdists_cache[gl_WorkGroupSize.x]; + +void main() +{ + uint src_index = gl_GlobalInvocationID.x; + bool valid_thread = true; + + if (src_index >= light_cull_buf.visible_count) { + /* Do not return because we use barriers later on (which need uniform control flow). + * Just process the same last item but avoid insertion. */ + src_index = light_cull_buf.visible_count - 1; + valid_thread = false; + } + + float local_zdist = in_zdist_buf[src_index]; + + int prefix_sum = 0; + /* Iterate over the whole key buffer. */ + uint iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x); + for (uint i = 0u; i < iter; i++) { + uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x; + /* NOTE: This will load duplicated values, but they will be discarded. */ + index = min(index, light_cull_buf.visible_count - 1); + zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index]; + + barrier(); + + /* Iterate over the cache line. */ + uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i); + for (uint j = 0u; j < line_end; j++) { + if (zdists_cache[j] < local_zdist) { + prefix_sum++; + } + else if (zdists_cache[j] == local_zdist) { + /* Same depth, use index to order and avoid same prefix for 2 different lights. */ + if ((gl_WorkGroupSize.x * i + j) < src_index) { + prefix_sum++; + } + } + } + } + + if (valid_thread) { + /* Copy sorted light to render light buffer. */ + uint input_index = in_key_buf[src_index]; + out_light_buf[prefix_sum] = in_light_buf[input_index]; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl new file mode 100644 index 00000000000..37705e22b22 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl @@ -0,0 +1,188 @@ + +/** + * 2D Culling pass for lights. + * We iterate over all items and check if they intersect with the tile frustum. + * Dispatch one thread per word. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Culling shapes extraction + * \{ */ + +struct CullingTile { + IsectFrustum frustum; + vec4 bounds; +}; + +/* Corners are expected to be in viewspace so that the cone is starting from the origin. + * Corner order does not matter. */ +vec4 tile_bound_cone(vec3 v00, vec3 v01, vec3 v10, vec3 v11) +{ + v00 = normalize(v00); + v01 = normalize(v01); + v10 = normalize(v10); + v11 = normalize(v11); + vec3 center = normalize(v00 + v01 + v10 + v11); + float angle_cosine = dot(center, v00); + angle_cosine = max(angle_cosine, dot(center, v01)); + angle_cosine = max(angle_cosine, dot(center, v10)); + angle_cosine = max(angle_cosine, dot(center, v11)); + return vec4(center, angle_cosine); +} + +/* Corners are expected to be in viewspace. Returns Z-aligned bounding cylinder. + * Corner order does not matter. */ +vec4 tile_bound_cylinder(vec3 v00, vec3 v01, vec3 v10, vec3 v11) +{ + vec3 center = (v00 + v01 + v10 + v11) * 0.25; + vec4 corners_dist; + float dist_sqr = distance_squared(center, v00); + dist_sqr = max(dist_sqr, distance_squared(center, v01)); + dist_sqr = max(dist_sqr, distance_squared(center, v10)); + dist_sqr = max(dist_sqr, distance_squared(center, v11)); + /* Return a cone. Later converted to cylinder. */ + return vec4(center, sqrt(dist_sqr)); +} + +vec2 tile_to_ndc(vec2 tile_co, vec2 offset) +{ + /* Add a margin to prevent culling too much if the frustum becomes too much unstable. */ + const float margin = 0.02; + tile_co += margin * (offset * 2.0 - 1.0); + + tile_co += offset; + return tile_co * light_cull_buf.tile_to_uv_fac * 2.0 - 1.0; +} + +CullingTile tile_culling_get(uvec2 tile_co) +{ + vec2 ftile = vec2(tile_co); + /* Culling frustum corners for this tile. */ + vec3 corners[8]; + /* Follow same corners order as view frustum. */ + corners[1].xy = corners[0].xy = tile_to_ndc(ftile, vec2(0, 0)); + corners[5].xy = corners[4].xy = tile_to_ndc(ftile, vec2(1, 0)); + corners[6].xy = corners[7].xy = tile_to_ndc(ftile, vec2(1, 1)); + corners[2].xy = corners[3].xy = tile_to_ndc(ftile, vec2(0, 1)); + corners[1].z = corners[5].z = corners[6].z = corners[2].z = -1.0; + corners[0].z = corners[4].z = corners[7].z = corners[3].z = 1.0; + + for (int i = 0; i < 8; i++) { + /* Culling in view space for precision. */ + corners[i] = project_point(ProjectionMatrixInverse, corners[i]); + } + + bool is_persp = ProjectionMatrix[3][3] == 0.0; + CullingTile tile; + tile.bounds = (is_persp) ? tile_bound_cone(corners[0], corners[4], corners[7], corners[3]) : + tile_bound_cylinder(corners[0], corners[4], corners[7], corners[3]); + + tile.frustum = isect_data_setup(shape_frustum(corners)); + return tile; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Intersection Tests + * \{ */ + +bool intersect(CullingTile tile, Sphere sphere) +{ + bool isect = true; + /* Test tile intersection using bounding cone or bounding cylinder. + * This has less false positive cases when the sphere is large. */ + if (ProjectionMatrix[3][3] == 0.0) { + isect = intersect(shape_cone(tile.bounds.xyz, tile.bounds.w), sphere); + } + else { + /* Simplify to a 2D circle test on the view Z axis plane. */ + isect = intersect(shape_circle(tile.bounds.xy, tile.bounds.w), + shape_circle(sphere.center.xy, sphere.radius)); + } + /* Refine using frustum test. If the sphere is small it avoids intersection + * with a neighbor tile. */ + if (isect) { + isect = intersect(tile.frustum, sphere); + } + return isect; +} + +bool intersect(CullingTile tile, Box bbox) +{ + return intersect(tile.frustum, bbox); +} + +bool intersect(CullingTile tile, Pyramid pyramid) +{ + return intersect(tile.frustum, pyramid); +} + +/** \} */ + +void main() +{ + uint word_idx = gl_GlobalInvocationID.x % light_cull_buf.tile_word_len; + uint tile_idx = gl_GlobalInvocationID.x / light_cull_buf.tile_word_len; + uvec2 tile_co = uvec2(tile_idx % light_cull_buf.tile_x_len, + tile_idx / light_cull_buf.tile_x_len); + + if (tile_co.y >= light_cull_buf.tile_y_len) { + return; + } + + /* TODO(fclem): We could stop the tile at the HiZ depth. */ + CullingTile tile = tile_culling_get(tile_co); + + uint l_idx = word_idx * 32u; + uint l_end = min(l_idx + 32u, light_cull_buf.visible_count); + uint word = 0u; + for (; l_idx < l_end; l_idx++) { + LightData light = light_buf[l_idx]; + + /* Culling in view space for precision and simplicity. */ + vec3 vP = transform_point(ViewMatrix, light._position); + vec3 v_right = transform_direction(ViewMatrix, light._right); + vec3 v_up = transform_direction(ViewMatrix, light._up); + vec3 v_back = transform_direction(ViewMatrix, light._back); + float radius = light.influence_radius_max; + + Sphere sphere = shape_sphere(vP, radius); + bool intersect_tile = intersect(tile, sphere); + + switch (light.type) { + case LIGHT_SPOT: + /* Only for < ~170° Cone due to plane extraction precision. */ + if (light.spot_tan < 10.0) { + Pyramid pyramid = shape_pyramid_non_oblique( + vP, + vP - v_back * radius, + v_right * radius * light.spot_tan / light.spot_size_inv.x, + v_up * radius * light.spot_tan / light.spot_size_inv.y); + intersect_tile = intersect_tile && intersect(tile, pyramid); + break; + } + /* Fallthrough to the hemispheric case. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + vec3 v000 = vP - v_right * radius - v_up * radius; + vec3 v100 = v000 + v_right * (radius * 2.0); + vec3 v010 = v000 + v_up * (radius * 2.0); + vec3 v001 = v000 - v_back * radius; + Box bbox = shape_box(v000, v100, v010, v001); + intersect_tile = intersect_tile && intersect(tile, bbox); + default: + break; + } + + if (intersect_tile) { + word |= 1u << (l_idx % 32u); + } + } + + out_light_tile_buf[gl_GlobalInvocationID.x] = word; +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl new file mode 100644 index 00000000000..d96f191fb77 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl @@ -0,0 +1,56 @@ + +/** + * Create the Zbins from Z-sorted lights. + * Perform min-max operation in LDS memory for speed. + * For this reason, we only dispatch 1 thread group. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* Fits the limit of 32KB. */ +shared uint zbin_max[CULLING_ZBIN_COUNT]; +shared uint zbin_min[CULLING_ZBIN_COUNT]; + +void main() +{ + const uint zbin_iter = CULLING_ZBIN_COUNT / gl_WorkGroupSize.x; + const uint zbin_local = gl_LocalInvocationID.x * zbin_iter; + + uint src_index = gl_GlobalInvocationID.x; + + for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) { + zbin_max[l] = 0x0u; + zbin_min[l] = ~0x0u; + } + barrier(); + + uint light_iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x); + for (uint i = 0u; i < light_iter; i++) { + uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x; + if (index >= light_cull_buf.visible_count) { + continue; + } + vec3 P = light_buf[index]._position; + /* TODO(fclem): Could have better bounds for spot and area lights. */ + float radius = light_buf[index].influence_radius_max; + float z_dist = dot(cameraForward, P) - dot(cameraForward, cameraPos); + int z_min = culling_z_to_zbin( + light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist + radius); + int z_max = culling_z_to_zbin( + light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist - radius); + z_min = clamp(z_min, 0, CULLING_ZBIN_COUNT - 1); + z_max = clamp(z_max, 0, CULLING_ZBIN_COUNT - 1); + /* Register to Z bins. */ + for (int z = z_min; z <= z_max; z++) { + atomicMin(zbin_min[z], index); + atomicMax(zbin_max[z], index); + } + } + barrier(); + + /* Write result to zbins buffer. Pack min & max into 1 uint. */ + for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) { + out_zbin_buf[l] = (zbin_max[l] << 16u) | (zbin_min[l] & 0xFFFFu); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl new file mode 100644 index 00000000000..d4abdd43aa4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl @@ -0,0 +1,129 @@ + +/** + * The resources expected to be defined are: + * - light_buf + * - light_zbin_buf + * - light_cull_buf + * - light_tile_buf + * - shadow_atlas_tx + * - shadow_tilemaps_tx + * - sss_transmittance_tx + * - utility_tx + */ + +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl) + +/* TODO(fclem): We could reduce register pressure by only having static branches for sun lights. */ +void light_eval_ex(ClosureDiffuse diffuse, + ClosureReflection reflection, + const bool is_directional, + vec3 P, + vec3 V, + float vP_z, + float thickness, + vec4 ltc_mat, + uint l_idx, + inout vec3 out_diffuse, + inout vec3 out_specular) +{ + LightData light = light_buf[l_idx]; + vec3 L; + float dist; + light_vector_get(light, P, L, dist); + + float visibility = light_attenuation(light, L, dist); + +#if 0 /* TODO(fclem): Shadows */ + if ((light.shadow_id != LIGHT_NO_SHADOW) && (visibility > 0.0)) { + vec3 lL = light_world_to_local(light, -L) * dist; + + float shadow_delta = shadow_delta_get( + shadow_atlas_tx, shadow_tilemaps_tx, light, light.shadow_data, lL, dist, P); + +# ifdef SSS_TRANSMITTANCE + /* Transmittance evaluation first to use initial visibility. */ + if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) { + float delta = max(thickness, shadow_delta); + + vec3 intensity = visibility * light.transmit_power * + light_translucent(sss_transmittance_tx, + is_directional, + light, + diffuse.N, + L, + dist, + diffuse.sss_radius, + delta); + out_diffuse += light.color * intensity; + } +# endif + + visibility *= float(shadow_delta - light.shadow_data.bias <= 0.0); + } +#endif + + if (visibility < 1e-6) { + return; + } + + if (light.diffuse_power > 0.0) { + float intensity = visibility * light.diffuse_power * + light_diffuse(utility_tx, is_directional, light, diffuse.N, V, L, dist); + out_diffuse += light.color * intensity; + } + + if (light.specular_power > 0.0) { + float intensity = visibility * light.specular_power * + light_ltc( + utility_tx, is_directional, light, reflection.N, V, L, dist, ltc_mat); + out_specular += light.color * intensity; + } +} + +void light_eval(ClosureDiffuse diffuse, + ClosureReflection reflection, + vec3 P, + vec3 V, + float vP_z, + float thickness, + inout vec3 out_diffuse, + inout vec3 out_specular) +{ + vec2 uv = vec2(reflection.roughness, safe_sqrt(1.0 - dot(reflection.N, V))); + uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS; + vec4 ltc_mat = utility_tx_sample(utility_tx, uv, UTIL_LTC_MAT_LAYER); + + LIGHT_FOREACH_BEGIN_DIRECTIONAL(light_cull_buf, l_idx) + { + light_eval_ex(diffuse, + reflection, + true, + P, + V, + vP_z, + thickness, + ltc_mat, + l_idx, + out_diffuse, + out_specular); + } + LIGHT_FOREACH_END + + vec2 px = gl_FragCoord.xy; + LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx) + { + light_eval_ex(diffuse, + reflection, + false, + P, + V, + vP_z, + thickness, + ltc_mat, + l_idx, + out_diffuse, + out_specular); + } + LIGHT_FOREACH_END +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl new file mode 100644 index 00000000000..22a5f98e6c3 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl @@ -0,0 +1,72 @@ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max) +{ + uint word_start = word_index * 32u; + uint word_end = word_start + 31u; + uint local_min = max(zbin_min, word_start); + uint local_max = min(zbin_max, word_end); + uint mask_width = local_max - local_min + 1; + return bit_field_mask(mask_width, local_min); +} + +int culling_z_to_zbin(float scale, float bias, float z) +{ + return int(z * scale + bias); +} + +/* Waiting to implement extensions support. We need: + * - GL_KHR_shader_subgroup_ballot + * - GL_KHR_shader_subgroup_arithmetic + * or + * - Vulkan 1.1 + */ +#if 1 +# define subgroupMin(a) a +# define subgroupMax(a) a +# define subgroupOr(a) a +# define subgroupBroadcastFirst(a) a +#endif + +#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \ + { \ + { \ + for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) { + +#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \ + { \ + uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \ + uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \ + _culling.tile_word_len; \ + int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \ + zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \ + uint zbin_data = _zbins[zbin_index]; \ + uint min_index = zbin_data & 0xFFFFu; \ + uint max_index = zbin_data >> 16u; \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ + max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ + /* Same as divide by 32 but avoid interger division. */ \ + uint word_min = min_index >> 5u; \ + uint word_max = max_index >> 5u; \ + for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \ + uint word = _words[tile_word_offset + word_idx]; \ + word &= zbin_mask(word_idx, min_index, max_index); \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + word = subgroupBroadcastFirst(subgroupOr(word)); \ + int bit_index; \ + while ((bit_index = findLSB(word)) != -1) { \ + word &= ~1u << uint(bit_index); \ + uint _item_index = word_idx * 32u + bit_index; + +/* No culling. Iterate over all items. */ +#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \ + { \ + { \ + for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) { + +#define LIGHT_FOREACH_END \ + } \ + } \ + } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl new file mode 100644 index 00000000000..58608f6e1f0 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl @@ -0,0 +1,209 @@ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_ltc_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Light Functions + * \{ */ + +void light_vector_get(LightData ld, vec3 P, out vec3 L, out float dist) +{ + if (ld.type == LIGHT_SUN) { + L = ld._back; + dist = 1.0; + } + else { + L = ld._position - P; + dist = inversesqrt(len_squared(L)); + L *= dist; + dist = 1.0 / dist; + } +} + +/* Rotate vector to light's local space. Does not translate. */ +vec3 light_world_to_local(LightData ld, vec3 L) +{ + /* Avoid relying on compiler to optimize this. + * vec3 lL = transpose(mat3(ld.object_mat)) * L; */ + vec3 lL; + lL.x = dot(ld.object_mat[0].xyz, L); + lL.y = dot(ld.object_mat[1].xyz, L); + lL.z = dot(ld.object_mat[2].xyz, L); + return lL; +} + +/* From Frostbite PBR Course + * Distance based attenuation + * http://www.frostbite.com/wp-content/uploads/2014/11/course_notes_moving_frostbite_to_pbr.pdf */ +float light_influence_attenuation(float dist, float inv_sqr_influence) +{ + float factor = sqr(dist) * inv_sqr_influence; + float fac = saturate(1.0 - sqr(factor)); + return sqr(fac); +} + +float light_spot_attenuation(LightData ld, vec3 L) +{ + vec3 lL = light_world_to_local(ld, L); + float ellipse = inversesqrt(1.0 + len_squared(lL.xy * ld.spot_size_inv / lL.z)); + float spotmask = smoothstep(0.0, 1.0, ellipse * ld._spot_mul + ld._spot_bias); + return spotmask; +} + +float light_attenuation(LightData ld, vec3 L, float dist) +{ + float vis = 1.0; + if (ld.type == LIGHT_SPOT) { + vis *= light_spot_attenuation(ld, L); + } + if (ld.type >= LIGHT_SPOT) { + vis *= step(0.0, -dot(L, -ld._back)); + } + if (ld.type != LIGHT_SUN) { +#ifdef VOLUME_LIGHTING + vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_volume); +#else + vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_surface); +#endif + } + return vis; +} + +/* Cheaper alternative than evaluating the LTC. + * The result needs to be multiplied by BSDF or Phase Function. */ +float light_point_light(LightData ld, const bool is_directional, vec3 L, float dist) +{ + if (is_directional) { + return 1.0; + } + /** + * Using "Point Light Attenuation Without Singularity" from Cem Yuksel + * http://www.cemyuksel.com/research/pointlightattenuation/pointlightattenuation.pdf + * http://www.cemyuksel.com/research/pointlightattenuation/ + **/ + float d_sqr = sqr(dist); + float r_sqr = ld.radius_squared; + /* Using reformulation that has better numerical percision. */ + float power = 2.0 / (d_sqr + r_sqr + dist * sqrt(d_sqr + r_sqr)); + + if (is_area_light(ld.type)) { + /* Modulate by light plane orientation / solid angle. */ + power *= saturate(dot(ld._back, L)); + } + return power; +} + +float light_diffuse(sampler2DArray utility_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 V, + vec3 L, + float dist) +{ + if (is_directional || !is_area_light(ld.type)) { + float radius = ld._radius / dist; + return ltc_evaluate_disk_simple(utility_tx, radius, dot(N, L)); + } + else if (ld.type == LIGHT_RECT) { + vec3 corners[4]; + corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y; + corners[2] = -corners[0]; + corners[3] = -corners[1]; + + corners[0] = normalize(L * dist + corners[0]); + corners[1] = normalize(L * dist + corners[1]); + corners[2] = normalize(L * dist + corners[2]); + corners[3] = normalize(L * dist + corners[3]); + + return ltc_evaluate_quad(utility_tx, corners, N); + } + else /* (ld.type == LIGHT_ELLIPSE) */ { + vec3 points[3]; + points[0] = ld._right * -ld._area_size_x + ld._up * -ld._area_size_y; + points[1] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + points[2] = -points[0]; + + points[0] += L * dist; + points[1] += L * dist; + points[2] += L * dist; + + return ltc_evaluate_disk(utility_tx, N, V, mat3(1.0), points); + } +} + +float light_ltc(sampler2DArray utility_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 V, + vec3 L, + float dist, + vec4 ltc_mat) +{ + if (is_directional || ld.type != LIGHT_RECT) { + vec3 Px = ld._right; + vec3 Py = ld._up; + + if (is_directional || !is_area_light(ld.type)) { + make_orthonormal_basis(L, Px, Py); + } + + vec3 points[3]; + points[0] = Px * -ld._area_size_x + Py * -ld._area_size_y; + points[1] = Px * ld._area_size_x + Py * -ld._area_size_y; + points[2] = -points[0]; + + points[0] += L * dist; + points[1] += L * dist; + points[2] += L * dist; + + return ltc_evaluate_disk(utility_tx, N, V, ltc_matrix(ltc_mat), points); + } + else { + vec3 corners[4]; + corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y; + corners[2] = -corners[0]; + corners[3] = -corners[1]; + + corners[0] += L * dist; + corners[1] += L * dist; + corners[2] += L * dist; + corners[3] += L * dist; + + ltc_transform_quad(N, V, ltc_matrix(ltc_mat), corners); + + return ltc_evaluate_quad(utility_tx, corners, vec3(0.0, 0.0, 1.0)); + } +} + +vec3 light_translucent(sampler1D transmittance_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 L, + float dist, + vec3 sss_radius, + float delta) +{ + /* TODO(fclem): We should compute the power at the entry point. */ + /* NOTE(fclem): we compute the light attenuation using the light vector but the transmittance + * using the shadow depth delta. */ + float power = light_point_light(ld, is_directional, L, dist); + /* Do not add more energy on front faces. Also apply lambertian BSDF. */ + power *= max(0.0, dot(-N, L)) * M_1_PI; + + sss_radius *= SSS_TRANSMIT_LUT_RADIUS; + vec3 channels_co = saturate(delta / sss_radius) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS; + + vec3 translucency; + translucency.x = (sss_radius.x > 0.0) ? texture(transmittance_tx, channels_co.x).r : 0.0; + translucency.y = (sss_radius.y > 0.0) ? texture(transmittance_tx, channels_co.y).r : 0.0; + translucency.z = (sss_radius.z > 0.0) ? texture(transmittance_tx, channels_co.z).r : 0.0; + return translucency * power; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl new file mode 100644 index 00000000000..57e92b0b9b4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl @@ -0,0 +1,299 @@ + +/** + * Adapted from : + * Real-Time Polygonal-Light Shading with Linearly Transformed Cosines. + * Eric Heitz, Jonathan Dupuy, Stephen Hill and David Neubelt. + * ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2016) 35(4), 2016. + * Project page: https://eheitzresearch.wordpress.com/415-2/ + */ + +/* Diffuse *clipped* sphere integral. */ +float ltc_diffuse_sphere_integral(sampler2DArray utility_tx, float avg_dir_z, float form_factor) +{ +#if 1 + /* use tabulated horizon-clipped sphere */ + vec2 uv = vec2(avg_dir_z * 0.5 + 0.5, form_factor); + uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS; + + return texture(utility_tx, vec3(uv, UTIL_DISK_INTEGRAL_LAYER))[UTIL_DISK_INTEGRAL_COMP]; +#else + /* Cheap approximation. Less smooth and have energy issues. */ + return max((form_factor * form_factor + avg_dir_z) / (form_factor + 1.0), 0.0); +#endif +} + +/** + * An extended version of the implementation from + * "How to solve a cubic equation, revisited" + * http://momentsingraphics.de/?p=105 + */ +vec3 ltc_solve_cubic(vec4 coefs) +{ + /* Normalize the polynomial */ + coefs.xyz /= coefs.w; + /* Divide middle coefficients by three */ + coefs.yz /= 3.0; + + float A = coefs.w; + float B = coefs.z; + float C = coefs.y; + float D = coefs.x; + + /* Compute the Hessian and the discriminant */ + vec3 delta = vec3(-coefs.zy * coefs.zz + coefs.yx, dot(vec2(coefs.z, -coefs.y), coefs.xy)); + + /* Discriminant */ + float discr = dot(vec2(4.0 * delta.x, -delta.y), delta.zy); + + /* Clamping avoid NaN output on some platform. (see T67060) */ + float sqrt_discr = sqrt(clamp(discr, 0.0, FLT_MAX)); + + vec2 xlc, xsc; + + /* Algorithm A */ + { + float A_a = 1.0; + float C_a = delta.x; + float D_a = -2.0 * B * delta.x + delta.y; + + /* Take the cubic root of a normalized complex number */ + float theta = atan(sqrt_discr, -D_a) / 3.0; + + float _2_sqrt_C_a = 2.0 * sqrt(-C_a); + float x_1a = _2_sqrt_C_a * cos(theta); + float x_3a = _2_sqrt_C_a * cos(theta + (2.0 / 3.0) * M_PI); + + float xl; + if ((x_1a + x_3a) > 2.0 * B) { + xl = x_1a; + } + else { + xl = x_3a; + } + + xlc = vec2(xl - B, A); + } + + /* Algorithm D */ + { + float A_d = D; + float C_d = delta.z; + float D_d = -D * delta.y + 2.0 * C * delta.z; + + /* Take the cubic root of a normalized complex number */ + float theta = atan(D * sqrt_discr, -D_d) / 3.0; + + float _2_sqrt_C_d = 2.0 * sqrt(-C_d); + float x_1d = _2_sqrt_C_d * cos(theta); + float x_3d = _2_sqrt_C_d * cos(theta + (2.0 / 3.0) * M_PI); + + float xs; + if (x_1d + x_3d < 2.0 * C) { + xs = x_1d; + } + else { + xs = x_3d; + } + + xsc = vec2(-D, xs + C); + } + + float E = xlc.y * xsc.y; + float F = -xlc.x * xsc.y - xlc.y * xsc.x; + float G = xlc.x * xsc.x; + + vec2 xmc = vec2(C * F - B * G, -B * F + C * E); + + vec3 root = vec3(xsc.x / xsc.y, xmc.x / xmc.y, xlc.x / xlc.y); + + if (root.x < root.y && root.x < root.z) { + root.xyz = root.yxz; + } + else if (root.z < root.x && root.z < root.y) { + root.xyz = root.xzy; + } + + return root; +} + +/* from Real-Time Area Lighting: a Journey from Research to Production + * Stephen Hill and Eric Heitz */ +vec3 ltc_edge_integral_vec(vec3 v1, vec3 v2) +{ + float x = dot(v1, v2); + float y = abs(x); + + float a = 0.8543985 + (0.4965155 + 0.0145206 * y) * y; + float b = 3.4175940 + (4.1616724 + y) * y; + float v = a / b; + + float theta_sintheta = (x > 0.0) ? v : 0.5 * inversesqrt(max(1.0 - x * x, 1e-7)) - v; + + return cross(v1, v2) * theta_sintheta; +} + +mat3 ltc_matrix(vec4 lut) +{ + /* Load inverse matrix. */ + return mat3(vec3(lut.x, 0, lut.y), vec3(0, 1, 0), vec3(lut.z, 0, lut.w)); +} + +void ltc_transform_quad(vec3 N, vec3 V, mat3 Minv, inout vec3 corners[4]) +{ + /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */ + V = normalize(V + 1e-8); + + /* Construct orthonormal basis around N. */ + vec3 T1, T2; + T1 = normalize(V - N * dot(N, V)); + T2 = cross(N, T1); + + /* Rotate area light in (T1, T2, R) basis. */ + Minv = Minv * transpose(mat3(T1, T2, N)); + + /* Apply LTC inverse matrix. */ + corners[0] = normalize(Minv * corners[0]); + corners[1] = normalize(Minv * corners[1]); + corners[2] = normalize(Minv * corners[2]); + corners[3] = normalize(Minv * corners[3]); +} + +/* If corners have already pass through ltc_transform_quad(), + * then N **MUST** be vec3(0.0, 0.0, 1.0), corresponding to the Up axis of the shading basis. */ +float ltc_evaluate_quad(sampler2DArray utility_tx, vec3 corners[4], vec3 N) +{ + /* Approximation using a sphere of the same solid angle than the quad. + * Finding the clipped sphere diffuse integral is easier than clipping the quad. */ + vec3 avg_dir; + avg_dir = ltc_edge_integral_vec(corners[0], corners[1]); + avg_dir += ltc_edge_integral_vec(corners[1], corners[2]); + avg_dir += ltc_edge_integral_vec(corners[2], corners[3]); + avg_dir += ltc_edge_integral_vec(corners[3], corners[0]); + + float form_factor = length(avg_dir); + float avg_dir_z = dot(N, avg_dir / form_factor); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir_z, form_factor); +} + +/* If disk does not need to be transformed and is already front facing. */ +float ltc_evaluate_disk_simple(sampler2DArray utility_tx, float disk_radius, float NL) +{ + float r_sqr = disk_radius * disk_radius; + float one_r_sqr = 1.0 + r_sqr; + float form_factor = r_sqr * inversesqrt(one_r_sqr * one_r_sqr); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, NL, form_factor); +} + +/* disk_points are WS vectors from the shading point to the disk "bounding domain" */ +float ltc_evaluate_disk(sampler2DArray utility_tx, vec3 N, vec3 V, mat3 Minv, vec3 disk_points[3]) +{ + /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */ + V = normalize(V + 1e-8); + + /* construct orthonormal basis around N */ + vec3 T1, T2; + T1 = normalize(V - N * dot(V, N)); + T2 = cross(N, T1); + + /* rotate area light in (T1, T2, R) basis */ + mat3 R = transpose(mat3(T1, T2, N)); + + /* Intermediate step: init ellipse. */ + vec3 L_[3]; + L_[0] = mul(R, disk_points[0]); + L_[1] = mul(R, disk_points[1]); + L_[2] = mul(R, disk_points[2]); + + vec3 C = 0.5 * (L_[0] + L_[2]); + vec3 V1 = 0.5 * (L_[1] - L_[2]); + vec3 V2 = 0.5 * (L_[1] - L_[0]); + + /* Transform ellipse by Minv. */ + C = Minv * C; + V1 = Minv * V1; + V2 = Minv * V2; + + /* Compute eigenvectors of new ellipse. */ + + float d11 = dot(V1, V1); + float d22 = dot(V2, V2); + float d12 = dot(V1, V2); + float a, b; /* Eigenvalues */ + const float threshold = 0.0007; /* Can be adjusted. Fix artifacts. */ + if (abs(d12) / sqrt(d11 * d22) > threshold) { + float tr = d11 + d22; + float det = -d12 * d12 + d11 * d22; + + /* use sqrt matrix to solve for eigenvalues */ + det = sqrt(det); + float u = 0.5 * sqrt(tr - 2.0 * det); + float v = 0.5 * sqrt(tr + 2.0 * det); + float e_max = (u + v); + float e_min = (u - v); + e_max *= e_max; + e_min *= e_min; + + vec3 V1_, V2_; + if (d11 > d22) { + V1_ = d12 * V1 + (e_max - d11) * V2; + V2_ = d12 * V1 + (e_min - d11) * V2; + } + else { + V1_ = d12 * V2 + (e_max - d22) * V1; + V2_ = d12 * V2 + (e_min - d22) * V1; + } + + a = 1.0 / e_max; + b = 1.0 / e_min; + V1 = normalize(V1_); + V2 = normalize(V2_); + } + else { + a = 1.0 / d11; + b = 1.0 / d22; + V1 *= sqrt(a); + V2 *= sqrt(b); + } + + /* Now find front facing ellipse with same solid angle. */ + + vec3 V3 = normalize(cross(V1, V2)); + if (dot(C, V3) < 0.0) { + V3 *= -1.0; + } + + float L = dot(V3, C); + float inv_L = 1.0 / L; + float x0 = dot(V1, C) * inv_L; + float y0 = dot(V2, C) * inv_L; + + float L_sqr = L * L; + a *= L_sqr; + b *= L_sqr; + + float t = 1.0 + x0 * x0; + float c0 = a * b; + float c1 = c0 * (t + y0 * y0) - a - b; + float c2 = (1.0 - a * t) - b * (1.0 + y0 * y0); + float c3 = 1.0; + + vec3 roots = ltc_solve_cubic(vec4(c0, c1, c2, c3)); + float e1 = roots.x; + float e2 = roots.y; + float e3 = roots.z; + + vec3 avg_dir = vec3(a * x0 / (a - e2), b * y0 / (b - e2), 1.0); + + mat3 rotate = mat3(V1, V2, V3); + + avg_dir = rotate * avg_dir; + avg_dir = normalize(avg_dir); + + /* L1, L2 are the extends of the front facing ellipse. */ + float L1 = sqrt(-e2 / e3); + float L2 = sqrt(-e2 / e1); + + /* Find the sphere and compute lighting. */ + float form_factor = max(0.0, L1 * L2 * inversesqrt((1.0 + L1 * L1) * (1.0 + L2 * L2))); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir.z, form_factor); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh new file mode 100644 index 00000000000..56fda25ed13 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +/* -------------------------------------------------------------------- */ +/** \name Shared + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_data) + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(2, Qualifier::READ, "uint", "light_zbin_buf[]") + .storage_buf(3, Qualifier::READ, "uint", "light_tile_buf[]"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Culling + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_culling_select) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_SELECT_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]") + .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]") + .storage_buf(3, Qualifier::WRITE, "float", "out_zdist_buf[]") + .storage_buf(4, Qualifier::WRITE, "uint", "out_key_buf[]") + .compute_source("eevee_light_culling_select_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_sort) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]") + .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]") + .storage_buf(3, Qualifier::READ, "float", "in_zdist_buf[]") + .storage_buf(4, Qualifier::READ, "uint", "in_key_buf[]") + .local_group_size(CULLING_SORT_GROUP_SIZE) + .compute_source("eevee_light_culling_sort_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_zbin) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_ZBIN_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(2, Qualifier::WRITE, "uint", "out_zbin_buf[]") + .compute_source("eevee_light_culling_zbin_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_tile) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_TILE_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(2, Qualifier::WRITE, "uint", "out_light_tile_buf[]") + .compute_source("eevee_light_culling_tile_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_culling_debug) + .do_static_compilation(true) + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .fragment_out(0, Type::VEC4, "out_debug_color") + .additional_info("eevee_shared", "draw_view") + .fragment_source("eevee_light_culling_debug_frag.glsl") + .additional_info("draw_fullscreen", "eevee_light_data"); + +/** \} */ diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 65a6a2dc6b7..1d67b5be4fb 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -495,6 +495,7 @@ set(SRC_SHADER_CREATE_INFOS ../draw/engines/basic/shaders/infos/basic_depth_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh -- cgit v1.2.3