Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--release/scripts/startup/bl_ui/properties_data_light.py12
-rw-r--r--source/blender/draw/CMakeLists.txt10
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_defines.hh13
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.cc9
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.hh7
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_light.cc499
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_light.hh164
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_pipeline.cc8
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.cc10
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.hh6
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader_shared.hh187
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_view.cc13
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl52
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl62
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl188
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl56
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl129
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl72
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl209
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl299
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh76
-rw-r--r--source/blender/gpu/CMakeLists.txt1
23 files changed, 2115 insertions, 24 deletions
diff --git a/release/scripts/startup/bl_ui/properties_data_light.py b/release/scripts/startup/bl_ui/properties_data_light.py
index df3ad43e6de..2980592ee0b 100644
--- a/release/scripts/startup/bl_ui/properties_data_light.py
+++ b/release/scripts/startup/bl_ui/properties_data_light.py
@@ -18,7 +18,7 @@ class DataButtonsPanel:
class DATA_PT_context_light(DataButtonsPanel, Panel):
bl_label = ""
bl_options = {'HIDE_HEADER'}
- COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
+ COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
def draw(self, context):
layout = self.layout
@@ -36,7 +36,7 @@ class DATA_PT_context_light(DataButtonsPanel, Panel):
class DATA_PT_preview(DataButtonsPanel, Panel):
bl_label = "Preview"
bl_options = {'DEFAULT_CLOSED'}
- COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE'}
+ COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'}
def draw(self, context):
self.layout.template_preview(context.light)
@@ -62,7 +62,7 @@ class DATA_PT_light(DataButtonsPanel, Panel):
class DATA_PT_EEVEE_light(DataButtonsPanel, Panel):
bl_label = "Light"
- COMPAT_ENGINES = {'BLENDER_EEVEE'}
+ COMPAT_ENGINES = {'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'}
def draw(self, context):
layout = self.layout
@@ -108,7 +108,7 @@ class DATA_PT_EEVEE_light_distance(DataButtonsPanel, Panel):
bl_label = "Custom Distance"
bl_parent_id = "DATA_PT_EEVEE_light"
bl_options = {'DEFAULT_CLOSED'}
- COMPAT_ENGINES = {'BLENDER_EEVEE'}
+ COMPAT_ENGINES = {'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE'}
@classmethod
def poll(cls, context):
@@ -256,7 +256,7 @@ class DATA_PT_area(DataButtonsPanel, Panel):
class DATA_PT_spot(DataButtonsPanel, Panel):
bl_label = "Spot Shape"
bl_parent_id = "DATA_PT_EEVEE_light"
- COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
+ COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
@classmethod
def poll(cls, context):
@@ -301,7 +301,7 @@ class DATA_PT_falloff_curve(DataButtonsPanel, Panel):
class DATA_PT_custom_props_light(DataButtonsPanel, PropertyPanel, Panel):
- COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
+ COMPAT_ENGINES = {'BLENDER_RENDER', 'BLENDER_EEVEE_NEXT', 'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
_context_path = "object.data"
_property_type = bpy.types.Light
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 02b8db9cf0e..322b2e78caa 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -140,6 +140,7 @@ set(SRC
engines/eevee_next/eevee_engine.cc
engines/eevee_next/eevee_film.cc
engines/eevee_next/eevee_instance.cc
+ engines/eevee_next/eevee_light.cc
engines/eevee_next/eevee_material.cc
engines/eevee_next/eevee_motion_blur.cc
engines/eevee_next/eevee_pipeline.cc
@@ -391,6 +392,15 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_geom_gpencil_vert.glsl
engines/eevee_next/shaders/eevee_geom_mesh_vert.glsl
engines/eevee_next/shaders/eevee_geom_world_vert.glsl
+ engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
+ engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
+ engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
+ engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
+ engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
+ engines/eevee_next/shaders/eevee_light_eval_lib.glsl
+ engines/eevee_next/shaders/eevee_light_iter_lib.glsl
+ engines/eevee_next/shaders/eevee_light_lib.glsl
+ engines/eevee_next/shaders/eevee_ltc_lib.glsl
engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index c1e901845f1..96c5095317d 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -11,12 +11,13 @@
#pragma once
-/**
- * Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536.
- * Current limiting factor is the sorting phase which is single pass and only sort within a
- * thread-group which maximum size is 1024.
- */
-#define CULLING_BATCH_SIZE 1024
+/* Avoid too much overhead caused by resizing the light buffers too many time. */
+#define LIGHT_CHUNK 256
+
+#define CULLING_SELECT_GROUP_SIZE 256
+#define CULLING_SORT_GROUP_SIZE 256
+#define CULLING_ZBIN_GROUP_SIZE 1024
+#define CULLING_TILE_GROUP_SIZE 1024
/**
* IMPORTANT: Some data packing are tweaked for these values.
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc
index df7a9ba7702..57786adb657 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@@ -53,6 +53,10 @@ void Instance::init(const int2 &output_res,
v3d = v3d_;
rv3d = rv3d_;
+ if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
+ sampling.reset();
+ }
+
info = "";
update_eval_members();
@@ -96,6 +100,7 @@ void Instance::begin_sync()
{
materials.begin_sync();
velocity.begin_sync(); /* NOTE: Also syncs camera. */
+ lights.begin_sync();
gpencil_engine_enabled = false;
@@ -109,7 +114,7 @@ void Instance::begin_sync()
void Instance::object_sync(Object *ob)
{
- const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH);
+ const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH, OB_LAMP);
const int ob_visibility = DRW_object_visibility_in_active_context(ob);
const bool partsys_is_visible = (ob_visibility & OB_VISIBLE_PARTICLES) != 0 &&
(ob->type == OB_MESH);
@@ -133,6 +138,7 @@ void Instance::object_sync(Object *ob)
if (object_is_visible) {
switch (ob->type) {
case OB_LAMP:
+ lights.sync_light(ob, ob_handle);
break;
case OB_MESH:
case OB_CURVES_LEGACY:
@@ -172,6 +178,7 @@ void Instance::object_sync_render(void *instance_,
void Instance::end_sync()
{
velocity.end_sync();
+ lights.end_sync();
sampling.end_sync();
film.end_sync();
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh
index 60dffd7c5ec..d52e4a8e43b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh
@@ -18,6 +18,7 @@
#include "eevee_camera.hh"
#include "eevee_depth_of_field.hh"
#include "eevee_film.hh"
+#include "eevee_light.hh"
#include "eevee_material.hh"
#include "eevee_motion_blur.hh"
#include "eevee_pipeline.hh"
@@ -43,6 +44,7 @@ class Instance {
SyncModule sync;
MaterialModule materials;
PipelineModule pipelines;
+ LightModule lights;
VelocityModule velocity;
MotionBlurModule motion_blur;
DepthOfField depth_of_field;
@@ -71,8 +73,10 @@ class Instance {
/** True if the grease pencil engine might be running. */
bool gpencil_engine_enabled;
- /* Info string displayed at the top of the render / viewport. */
+ /** Info string displayed at the top of the render / viewport. */
std::string info = "";
+ /** Debug mode from debug value. */
+ eDebugMode debug_mode = eDebugMode::DEBUG_NONE;
public:
Instance()
@@ -80,6 +84,7 @@ class Instance {
sync(*this),
materials(*this),
pipelines(*this),
+ lights(*this),
velocity(*this),
motion_blur(*this),
depth_of_field(*this),
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc
new file mode 100644
index 00000000000..dbbf481f3f4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@@ -0,0 +1,499 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ */
+
+#include "draw_debug.hh"
+
+#include "eevee_instance.hh"
+
+#include "eevee_light.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name LightData
+ * \{ */
+
+static eLightType to_light_type(short blender_light_type, short blender_area_type)
+{
+ switch (blender_light_type) {
+ default:
+ case LA_LOCAL:
+ return LIGHT_POINT;
+ case LA_SUN:
+ return LIGHT_SUN;
+ case LA_SPOT:
+ return LIGHT_SPOT;
+ case LA_AREA:
+ return ELEM(blender_area_type, LA_AREA_DISK, LA_AREA_ELLIPSE) ? LIGHT_ELLIPSE : LIGHT_RECT;
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+void Light::sync(/* ShadowModule &shadows , */ const Object *ob, float threshold)
+{
+ const ::Light *la = (const ::Light *)ob->data;
+ float scale[3];
+
+ float max_power = max_fff(la->r, la->g, la->b) * fabsf(la->energy / 100.0f);
+ float surface_max_power = max_ff(la->diff_fac, la->spec_fac) * max_power;
+ float volume_max_power = la->volume_fac * max_power;
+
+ float influence_radius_surface = attenuation_radius_get(la, threshold, surface_max_power);
+ float influence_radius_volume = attenuation_radius_get(la, threshold, volume_max_power);
+
+ this->influence_radius_max = max_ff(influence_radius_surface, influence_radius_volume);
+ this->influence_radius_invsqr_surface = 1.0f / square_f(max_ff(influence_radius_surface, 1e-8f));
+ this->influence_radius_invsqr_volume = 1.0f / square_f(max_ff(influence_radius_volume, 1e-8f));
+
+ this->color = float3(&la->r) * la->energy;
+ normalize_m4_m4_ex(this->object_mat.ptr(), ob->obmat, scale);
+ /* Make sure we have consistent handedness (in case of negatively scaled Z axis). */
+ float3 cross = math::cross(float3(this->_right), float3(this->_up));
+ if (math::dot(cross, float3(this->_back)) < 0.0f) {
+ negate_v3(this->_up);
+ }
+
+ shape_parameters_set(la, scale);
+
+ float shape_power = shape_power_get(la);
+ float point_power = point_power_get(la);
+ this->diffuse_power = la->diff_fac * shape_power;
+ this->transmit_power = la->diff_fac * point_power;
+ this->specular_power = la->spec_fac * shape_power;
+ this->volume_power = la->volume_fac * point_power;
+
+ eLightType new_type = to_light_type(la->type, la->area_shape);
+ if (this->type != new_type) {
+ /* shadow_discard_safe(shadows); */
+ this->type = new_type;
+ }
+
+#if 0
+ if (la->mode & LA_SHADOW) {
+ if (la->type == LA_SUN) {
+ if (this->shadow_id == LIGHT_NO_SHADOW) {
+ this->shadow_id = shadows.directionals.alloc();
+ }
+
+ ShadowDirectional &shadow = shadows.directionals[this->shadow_id];
+ shadow.sync(this->object_mat, la->bias * 0.05f, 1.0f);
+ }
+ else {
+ float cone_aperture = DEG2RAD(360.0);
+ if (la->type == LA_SPOT) {
+ cone_aperture = min_ff(DEG2RAD(179.9), la->spotsize);
+ }
+ else if (la->type == LA_AREA) {
+ cone_aperture = DEG2RAD(179.9);
+ }
+
+ if (this->shadow_id == LIGHT_NO_SHADOW) {
+ this->shadow_id = shadows.punctuals.alloc();
+ }
+
+ ShadowPunctual &shadow = shadows.punctuals[this->shadow_id];
+ shadow.sync(this->type,
+ this->object_mat,
+ cone_aperture,
+ la->clipsta,
+ this->influence_radius_max,
+ la->bias * 0.05f);
+ }
+ }
+ else {
+ shadow_discard_safe(shadows);
+ }
+#endif
+
+ this->initialized = true;
+}
+
+#if 0
+void Light::shadow_discard_safe(ShadowModule &shadows)
+{
+ if (shadow_id != LIGHT_NO_SHADOW) {
+ if (this->type != LIGHT_SUN) {
+ shadows.punctuals.free(shadow_id);
+ }
+ else {
+ shadows.directionals.free(shadow_id);
+ }
+ shadow_id = LIGHT_NO_SHADOW;
+ }
+}
+#endif
+
+/* Returns attenuation radius inverted & squared for easy bound checking inside the shader. */
+float Light::attenuation_radius_get(const ::Light *la, float light_threshold, float light_power)
+{
+ if (la->type == LA_SUN) {
+ return (light_power > 1e-5f) ? 1e16f : 0.0f;
+ }
+
+ if (la->mode & LA_CUSTOM_ATTENUATION) {
+ return la->att_dist;
+ }
+ /* Compute the distance (using the inverse square law)
+ * at which the light power reaches the light_threshold. */
+ /* TODO take area light scale into account. */
+ return sqrtf(light_power / light_threshold);
+}
+
+void Light::shape_parameters_set(const ::Light *la, const float scale[3])
+{
+ if (la->type == LA_AREA) {
+ float area_size_y = (ELEM(la->area_shape, LA_AREA_RECT, LA_AREA_ELLIPSE)) ? la->area_sizey :
+ la->area_size;
+ _area_size_x = max_ff(0.003f, la->area_size * scale[0] * 0.5f);
+ _area_size_y = max_ff(0.003f, area_size_y * scale[1] * 0.5f);
+ /* For volume point lighting. */
+ radius_squared = max_ff(0.001f, hypotf(_area_size_x, _area_size_y) * 0.5f);
+ radius_squared = square_f(radius_squared);
+ }
+ else {
+ if (la->type == LA_SPOT) {
+ /* Spot size & blend */
+ spot_size_inv[0] = scale[2] / scale[0];
+ spot_size_inv[1] = scale[2] / scale[1];
+ float spot_size = cosf(la->spotsize * 0.5f);
+ float spot_blend = (1.0f - spot_size) * la->spotblend;
+ _spot_mul = 1.0f / max_ff(1e-8f, spot_blend);
+ _spot_bias = -spot_size * _spot_mul;
+ spot_tan = tanf(min_ff(la->spotsize * 0.5f, M_PI_2 - 0.0001f));
+ }
+
+ if (la->type == LA_SUN) {
+ _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f);
+ }
+ else {
+ _area_size_x = la->area_size;
+ }
+ _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x);
+ radius_squared = square_f(_area_size_x);
+ }
+}
+
+float Light::shape_power_get(const ::Light *la)
+{
+ /* Make illumination power constant */
+ switch (la->type) {
+ case LA_AREA: {
+ float area = _area_size_x * _area_size_y;
+ float power = 1.0f / (area * 4.0f * float(M_PI));
+ /* FIXME : Empirical, Fit cycles power */
+ power *= 0.8f;
+ if (ELEM(la->area_shape, LA_AREA_DISK, LA_AREA_ELLIPSE)) {
+ /* Scale power to account for the lower area of the ellipse compared to the surrounding
+ * rectangle. */
+ power *= 4.0f / M_PI;
+ }
+ return power;
+ }
+ case LA_SPOT:
+ case LA_LOCAL: {
+ return 1.0f / (4.0f * square_f(_radius) * float(M_PI * M_PI));
+ }
+ default:
+ case LA_SUN: {
+ float power = 1.0f / (square_f(_radius) * float(M_PI));
+ /* Make illumination power closer to cycles for bigger radii. Cycles uses a cos^3 term that
+ * we cannot reproduce so we account for that by scaling the light power. This function is
+ * the result of a rough manual fitting. */
+ /* Simplification of: power *= 1 + r²/2 */
+ power += 1.0f / (2.0f * M_PI);
+
+ return power;
+ }
+ }
+}
+
+float Light::point_power_get(const ::Light *la)
+{
+ /* Volume light is evaluated as point lights. Remove the shape power. */
+ switch (la->type) {
+ case LA_AREA: {
+ /* Match cycles. Empirical fit... must correspond to some constant. */
+ float power = 0.0792f * M_PI;
+
+ /* This corrects for area light most representative point trick. The fit was found by
+ * reducing the average error compared to cycles. */
+ float area = _area_size_x * _area_size_y;
+ float tmp = M_PI_2 / (M_PI_2 + sqrtf(area));
+ /* Lerp between 1.0 and the limit (1 / pi). */
+ power *= tmp + (1.0f - tmp) * M_1_PI;
+
+ return power;
+ }
+ case LA_SPOT:
+ case LA_LOCAL: {
+ /* Match cycles. Empirical fit... must correspond to some constant. */
+ return 0.0792f;
+ }
+ default:
+ case LA_SUN: {
+ return 1.0f;
+ }
+ }
+}
+
+void Light::debug_draw()
+{
+#ifdef DEBUG
+ drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+void LightModule::begin_sync()
+{
+ use_scene_lights_ = inst_.use_scene_lights();
+
+ /* In begin_sync so it can be animated. */
+ if (assign_if_different(light_threshold_, max_ff(1e-16f, inst_.scene->eevee.light_threshold))) {
+ inst_.sampling.reset();
+ }
+
+ sun_lights_len_ = 0;
+ local_lights_len_ = 0;
+}
+
+void LightModule::sync_light(const Object *ob, ObjectHandle &handle)
+{
+ if (use_scene_lights_ == false) {
+ return;
+ }
+ Light &light = light_map_.lookup_or_add_default(handle.object_key);
+ light.used = true;
+ if (handle.recalc != 0 || !light.initialized) {
+ light.sync(/* inst_.shadows, */ ob, light_threshold_);
+ }
+ sun_lights_len_ += int(light.type == LIGHT_SUN);
+ local_lights_len_ += int(light.type != LIGHT_SUN);
+}
+
+void LightModule::end_sync()
+{
+ // ShadowModule &shadows = inst_.shadows;
+
+ /* NOTE: We resize this buffer before removing deleted lights. */
+ int lights_allocated = ceil_to_multiple_u(max_ii(light_map_.size(), 1), LIGHT_CHUNK);
+ light_buf_.resize(lights_allocated);
+
+ /* Track light deletion. */
+ Vector<ObjectKey, 0> deleted_keys;
+ /* Indices inside GPU data array. */
+ int sun_lights_idx = 0;
+ int local_lights_idx = sun_lights_len_;
+
+ /* Fill GPU data with scene data. */
+ for (auto item : light_map_.items()) {
+ Light &light = item.value;
+
+ if (!light.used) {
+ /* Deleted light. */
+ deleted_keys.append(item.key);
+ // light.shadow_discard_safe(shadows);
+ continue;
+ }
+
+ int dst_idx = (light.type == LIGHT_SUN) ? sun_lights_idx++ : local_lights_idx++;
+ /* Put all light data into global data SSBO. */
+ light_buf_[dst_idx] = light;
+
+#if 0
+ if (light.shadow_id != LIGHT_NO_SHADOW) {
+ if (light.type == LIGHT_SUN) {
+ light_buf_[dst_idx].shadow_data = shadows.directionals[light.shadow_id];
+ }
+ else {
+ light_buf_[dst_idx].shadow_data = shadows.punctuals[light.shadow_id];
+ }
+ }
+#endif
+ /* Untag for next sync. */
+ light.used = false;
+ }
+ /* This scene data buffer is then immutable after this point. */
+ light_buf_.push_update();
+
+ for (auto key : deleted_keys) {
+ light_map_.remove(key);
+ }
+
+ /* Update sampling on deletion or un-hidding (use_scene_lights). */
+ if (assign_if_different(light_map_size_, light_map_.size())) {
+ inst_.sampling.reset();
+ }
+
+ /* If exceeding the limit, just trim off the excess to avoid glitchy rendering. */
+ if (sun_lights_len_ + local_lights_len_ > CULLING_MAX_ITEM) {
+ sun_lights_len_ = min_ii(sun_lights_len_, CULLING_MAX_ITEM);
+ local_lights_len_ = min_ii(local_lights_len_, CULLING_MAX_ITEM - sun_lights_len_);
+ inst_.info = "Error: Too many lights in the scene.";
+ }
+ lights_len_ = sun_lights_len_ + local_lights_len_;
+
+ /* Resize to the actual number of lights after pruning. */
+ lights_allocated = ceil_to_multiple_u(max_ii(lights_len_, 1), LIGHT_CHUNK);
+ culling_key_buf_.resize(lights_allocated);
+ culling_zdist_buf_.resize(lights_allocated);
+ culling_light_buf_.resize(lights_allocated);
+
+ {
+ /* Compute tile size and total word count. */
+ uint word_per_tile = divide_ceil_u(max_ii(lights_len_, 1), 32);
+ int2 render_extent = inst_.film.render_extent_get();
+ int2 tiles_extent;
+ /* Default to 32 as this is likely to be the maximum
+ * tile size used by hardware or compute shading. */
+ uint tile_size = 16;
+ do {
+ tile_size *= 2;
+ tiles_extent = math::divide_ceil(render_extent, int2(tile_size));
+ uint tile_count = tiles_extent.x * tiles_extent.y;
+ if (tile_count > max_tile_count_threshold) {
+ continue;
+ }
+ total_word_count_ = tile_count * word_per_tile;
+
+ } while (total_word_count_ > max_word_count_threshold);
+ /* Keep aligned with storage buffer requirements. */
+ total_word_count_ = ceil_to_multiple_u(total_word_count_, 32);
+
+ culling_data_buf_.tile_word_len = word_per_tile;
+ culling_data_buf_.tile_size = tile_size;
+ culling_data_buf_.tile_x_len = tiles_extent.x;
+ culling_data_buf_.tile_y_len = tiles_extent.y;
+ culling_data_buf_.items_count = lights_len_;
+ culling_data_buf_.local_lights_len = local_lights_len_;
+ culling_data_buf_.sun_lights_len = sun_lights_len_;
+ }
+ culling_tile_buf_.resize(total_word_count_);
+
+ culling_pass_sync();
+ debug_pass_sync();
+}
+
+void LightModule::culling_pass_sync()
+{
+ uint safe_lights_len = max_ii(lights_len_, 1);
+ uint culling_select_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SELECT_GROUP_SIZE);
+ uint culling_sort_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SORT_GROUP_SIZE);
+ uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE);
+
+ /* NOTE: We reference the buffers that may be resized or updated later. */
+ {
+ DRW_PASS_CREATE(culling_select_ps_, DRW_STATE_NO_DRAW);
+ GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_select_ps_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_);
+ DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_);
+ DRW_shgroup_storage_block(grp, "out_zdist_buf", culling_zdist_buf_);
+ DRW_shgroup_storage_block(grp, "out_key_buf", culling_key_buf_);
+ DRW_shgroup_call_compute(grp, culling_select_dispatch_size, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ DRW_PASS_CREATE(culling_sort_ps_, DRW_STATE_NO_DRAW);
+ GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SORT);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_sort_ps_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_);
+ DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_);
+ DRW_shgroup_storage_block(grp, "in_zdist_buf", culling_zdist_buf_);
+ DRW_shgroup_storage_block(grp, "in_key_buf", culling_key_buf_);
+ DRW_shgroup_call_compute(grp, culling_sort_dispatch_size, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ DRW_PASS_CREATE(culling_zbin_ps_, DRW_STATE_NO_DRAW);
+ GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_zbin_ps_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_);
+ DRW_shgroup_storage_block(grp, "out_zbin_buf", culling_zbin_buf_);
+ DRW_shgroup_call_compute(grp, 1, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ DRW_PASS_CREATE(culling_tile_ps_, DRW_STATE_NO_DRAW);
+ GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_TILE);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_tile_ps_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_);
+ DRW_shgroup_storage_block(grp, "out_light_tile_buf", culling_tile_buf_);
+ DRW_shgroup_call_compute(grp, culling_tile_dispatch_size, 1, 1);
+ DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
+ }
+}
+
+void LightModule::debug_pass_sync()
+{
+ if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) {
+ debug_draw_ps_ = nullptr;
+ return;
+ }
+
+ debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", DRW_STATE_WRITE_COLOR);
+ GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
+ DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_);
+ DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &inst_.render_buffers.depth_tx);
+ DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
+}
+
+void LightModule::set_view(const DRWView *view, const int2 extent)
+{
+ float far_z = DRW_view_far_distance_get(view);
+ float near_z = DRW_view_near_distance_get(view);
+
+ culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
+ culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale;
+ culling_data_buf_.tile_to_uv_fac = (culling_data_buf_.tile_size / float2(extent));
+ culling_data_buf_.visible_count = 0;
+ culling_data_buf_.push_update();
+
+ DRW_stats_group_start("Light Culling");
+
+ DRW_view_set_active(view);
+ DRW_draw_pass(culling_select_ps_);
+ DRW_draw_pass(culling_sort_ps_);
+ DRW_draw_pass(culling_zbin_ps_);
+ DRW_draw_pass(culling_tile_ps_);
+
+ DRW_stats_group_end();
+}
+
+void LightModule::debug_draw(GPUFrameBuffer *view_fb)
+{
+ if (debug_draw_ps_ == nullptr) {
+ return;
+ }
+ GPU_framebuffer_bind(view_fb);
+ DRW_draw_pass(debug_draw_ps_);
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.hh b/source/blender/draw/engines/eevee_next/eevee_light.hh
new file mode 100644
index 00000000000..c2d7aad34ae
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.hh
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ *
+ * The culling follows the principles of Tiled Culling + Z binning from:
+ * "Improved Culling for Tiled and Clustered Rendering"
+ * by Michal Drobot
+ * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
+ *
+ * The culling is separated in 4 compute phases:
+ * - View Culling (select pass): Create a z distance and a index buffer of visible lights.
+ * - Light sorting: Outputs visible lights sorted by Z distance.
+ * - Z binning: Compute the Z bins min/max light indices.
+ * - Tile intersection: Fine grained 2D culling of each lights outputting a bitmap per tile.
+ */
+
+#pragma once
+
+#include "BLI_bitmap.h"
+#include "BLI_vector.hh"
+#include "DNA_light_types.h"
+
+#include "eevee_camera.hh"
+#include "eevee_sampling.hh"
+#include "eevee_shader.hh"
+#include "eevee_shader_shared.hh"
+#include "eevee_sync.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+struct Light : public LightData {
+ public:
+ bool initialized = false;
+ bool used = false;
+
+ public:
+ Light()
+ {
+ shadow_id = LIGHT_NO_SHADOW;
+ }
+
+ void sync(/* ShadowModule &shadows, */ const Object *ob, float threshold);
+
+ // void shadow_discard_safe(ShadowModule &shadows);
+
+ void debug_draw();
+
+ private:
+ float attenuation_radius_get(const ::Light *la, float light_threshold, float light_power);
+ void shape_parameters_set(const ::Light *la, const float scale[3]);
+ float shape_power_get(const ::Light *la);
+ float point_power_get(const ::Light *la);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+/**
+ * The light module manages light data buffers and light culling system.
+ */
+class LightModule {
+ // friend ShadowModule;
+
+ private:
+ /* Keep tile count reasonable for memory usage and 2D culling performance. */
+ static constexpr uint max_memory_threshold = 32 * 1024 * 1024; /* 32 MiB */
+ static constexpr uint max_word_count_threshold = max_memory_threshold / sizeof(uint);
+ static constexpr uint max_tile_count_threshold = 8192;
+
+ Instance &inst_;
+
+ /** Map of light objects data. Converted to flat array each frame. */
+ Map<ObjectKey, Light> light_map_;
+ /** Flat array sent to GPU, populated from light_map_. Source buffer for light culling. */
+ LightDataBuf light_buf_ = {"Lights_no_cull"};
+ /** Recorded size of light_map_ (after pruning) to detect deletion. */
+ int64_t light_map_size_ = 0;
+ /** Luminous intensity to consider the light boundary at. Used for culling. */
+ float light_threshold_ = 0.01f;
+ /** If false, will prevent all scene light from being synced. */
+ bool use_scene_lights_ = false;
+ /** Number of sun lights synced during the last sync. Used as offset. */
+ int sun_lights_len_ = 0;
+ int local_lights_len_ = 0;
+ /** Sun plus local lights count for convenience. */
+ int lights_len_ = 0;
+
+ /**
+ * Light Culling
+ */
+
+ /** LightData buffer used for rendering. Filled by the culling pass. */
+ LightDataBuf culling_light_buf_ = {"Lights_culled"};
+ /** Culling infos. */
+ LightCullingDataBuf culling_data_buf_ = {"LightCull_data"};
+ /** Z-distance matching the key for each visible lights. Used for sorting. */
+ LightCullingZdistBuf culling_zdist_buf_ = {"LightCull_zdist"};
+ /** Key buffer containing only visible lights indices. Used for sorting. */
+ LightCullingKeyBuf culling_key_buf_ = {"LightCull_key"};
+ /** Zbins containing min and max light index for each Z bin. */
+ LightCullingZbinBuf culling_zbin_buf_ = {"LightCull_zbin"};
+ /** Bitmap of lights touching each tiles. */
+ LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"};
+ /** Culling compute passes. */
+ DRWPass *culling_select_ps_ = nullptr;
+ DRWPass *culling_sort_ps_ = nullptr;
+ DRWPass *culling_zbin_ps_ = nullptr;
+ DRWPass *culling_tile_ps_ = nullptr;
+ /** Total number of words the tile buffer needs to contain for the render resolution. */
+ uint total_word_count_ = 0;
+
+ /** Debug Culling visualization. */
+ DRWPass *debug_draw_ps_ = nullptr;
+ GPUTexture *input_depth_tx_ = nullptr;
+
+ public:
+ LightModule(Instance &inst) : inst_(inst){};
+ ~LightModule(){};
+
+ void begin_sync();
+ void sync_light(const Object *ob, ObjectHandle &handle);
+ void end_sync();
+
+ /**
+ * Update acceleration structure for the given view.
+ */
+ void set_view(const DRWView *view, const int2 extent);
+
+ void debug_draw(GPUFrameBuffer *view_fb);
+
+ void bind_resources(DRWShadingGroup *grp)
+ {
+ DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_);
+#if 0
+ DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
+ DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get());
+#endif
+ }
+
+ private:
+ void culling_pass_sync();
+ void debug_pass_sync();
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
index db169ec361f..fe7d02a855c 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
@@ -101,12 +101,12 @@ DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, G
{
RenderBuffers &rbufs = inst_.render_buffers;
DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
- // LightModule &lights = inst_.lights;
+ LightModule &lights = inst_.lights;
// LightProbeModule &lightprobes = inst_.lightprobes;
// RaytracingModule &raytracing = inst_.raytracing;
// eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
- // lights.shgroup_resources(grp);
+ lights.bind_resources(grp);
// DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
// DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
// DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
@@ -163,12 +163,12 @@ DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_m
GPUMaterial *gpumat)
{
RenderBuffers &rbufs = inst_.render_buffers;
- // LightModule &lights = inst_.lights;
+ LightModule &lights = inst_.lights;
// LightProbeModule &lightprobes = inst_.lightprobes;
// RaytracingModule &raytracing = inst_.raytracing;
// eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
- // lights.shgroup_resources(grp);
+ lights.bind_resources(grp);
// DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
// DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
// DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index 357f2796a7e..a535d3407ac 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -124,6 +124,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_depth_of_field_tiles_dilate_minmax";
case DOF_TILES_FLATTEN:
return "eevee_depth_of_field_tiles_flatten";
+ case LIGHT_CULLING_DEBUG:
+ return "eevee_light_culling_debug";
+ case LIGHT_CULLING_SELECT:
+ return "eevee_light_culling_select";
+ case LIGHT_CULLING_SORT:
+ return "eevee_light_culling_sort";
+ case LIGHT_CULLING_TILE:
+ return "eevee_light_culling_tile";
+ case LIGHT_CULLING_ZBIN:
+ return "eevee_light_culling_zbin";
/* To avoid compiler warning about missing case. */
case MAX_SHADER_TYPE:
return "";
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh
index dd6b9c9d4ab..5b43a1abf43 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@@ -47,6 +47,12 @@ enum eShaderType {
DOF_TILES_DILATE_MINMAX,
DOF_TILES_FLATTEN,
+ LIGHT_CULLING_DEBUG,
+ LIGHT_CULLING_SELECT,
+ LIGHT_CULLING_SORT,
+ LIGHT_CULLING_TILE,
+ LIGHT_CULLING_ZBIN,
+
MOTION_BLUR_GATHER,
MOTION_BLUR_TILE_DILATE,
MOTION_BLUR_TILE_FLATTEN_RENDER,
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index fe36cb1a17c..885317fc673 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -31,6 +31,52 @@ constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
#define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14
/* -------------------------------------------------------------------- */
+/** \name Debug Mode
+ * \{ */
+
+/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */
+enum eDebugMode : uint32_t {
+ DEBUG_NONE = 0u,
+ /**
+ * Gradient showing light evaluation hotspots.
+ */
+ DEBUG_LIGHT_CULLING = 1u,
+ /**
+ * Tilemaps to screen. Is also present in other modes.
+ * - Black pixels, no pages allocated.
+ * - Green pixels, pages cached.
+ * - Red pixels, pages allocated.
+ */
+ DEBUG_SHADOW_TILEMAPS = 2u,
+ /**
+ * Random color per pages. Validates page density allocation and sampling.
+ */
+ DEBUG_SHADOW_PAGES = 3u,
+ /**
+ * Outputs random color per tilemap (or tilemap level). Validates tilemaps coverage.
+ * Black means not covered by any tilemaps LOD of the shadow.
+ */
+ DEBUG_SHADOW_LOD = 4u,
+ /**
+ * Outputs white pixels for pages allocated and black pixels for unused pages.
+ * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work.
+ */
+ DEBUG_SHADOW_PAGE_ALLOCATION = 5u,
+ /**
+ * Outputs the tilemap atlas. Default tilemap is too big for the usual screen resolution.
+ * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+ */
+ DEBUG_SHADOW_TILE_ALLOCATION = 6u,
+ /**
+ * Visualize linear depth stored in the atlas regions of the active light.
+ * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+ */
+ DEBUG_SHADOW_SHADOW_DEPTH = 7u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Sampling
* \{ */
@@ -460,6 +506,113 @@ static inline float circle_to_polygon_angle(float sides_count, float theta)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Light Culling
+ * \{ */
+
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */
+#define CULLING_ZBIN_COUNT 4096
+/* Max tile map resolution per axes. */
+#define CULLING_TILE_RES 16
+
+struct LightCullingData {
+ /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+ float2 tile_to_uv_fac;
+ /** Scale and bias applied to linear Z to get zbin. */
+ float zbin_scale;
+ float zbin_bias;
+ /** Valid item count in the source data array. */
+ uint items_count;
+ /** Items that are processed by the 2.5D culling. */
+ uint local_lights_len;
+ /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */
+ uint sun_lights_len;
+ /** Number of items that passes the first culling test. */
+ uint visible_count;
+ /** Extent of one square tile in pixels. */
+ float tile_size;
+ /** Number of tiles on the X/Y axis. */
+ uint tile_x_len;
+ uint tile_y_len;
+ /** Number of word per tile. Depends on the maximum number of lights. */
+ uint tile_word_len;
+};
+BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Lights
+ * \{ */
+
+#define LIGHT_NO_SHADOW -1
+
+enum eLightType : uint32_t {
+ LIGHT_SUN = 0u,
+ LIGHT_POINT = 1u,
+ LIGHT_SPOT = 2u,
+ LIGHT_RECT = 3u,
+ LIGHT_ELLIPSE = 4u
+};
+
+static inline bool is_area_light(eLightType type)
+{
+ return type >= LIGHT_RECT;
+}
+
+struct LightData {
+ /** Normalized object matrix. Last column contains data accessible using the following macros. */
+ float4x4 object_mat;
+ /** Packed data in the last column of the object_mat. */
+#define _area_size_x object_mat[0][3]
+#define _area_size_y object_mat[1][3]
+#define _radius _area_size_x
+#define _spot_mul object_mat[2][3]
+#define _spot_bias object_mat[3][3]
+ /** Aliases for axes. */
+#ifndef USE_GPU_SHADER_CREATE_INFO
+# define _right object_mat[0]
+# define _up object_mat[1]
+# define _back object_mat[2]
+# define _position object_mat[3]
+#else
+# define _right object_mat[0].xyz
+# define _up object_mat[1].xyz
+# define _back object_mat[2].xyz
+# define _position object_mat[3].xyz
+#endif
+ /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */
+ float influence_radius_invsqr_surface;
+ float influence_radius_invsqr_volume;
+ /** Maximum influence radius. Used for culling. */
+ float influence_radius_max;
+ /** Index of the shadow struct on CPU. -1 means no shadow. */
+ int shadow_id;
+ /** NOTE: It is ok to use float3 here. A float is declared right after it.
+ * float3 is also aligned to 16 bytes. */
+ float3 color;
+ /** Power depending on shader type. */
+ float diffuse_power;
+ float specular_power;
+ float volume_power;
+ float transmit_power;
+ /** Special radius factor for point lighting. */
+ float radius_squared;
+ /** Light Type. */
+ eLightType type;
+ /** Spot angle tangent. */
+ float spot_tan;
+ /** Spot size. Aligned to size of float2. */
+ float2 spot_size_inv;
+ /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */
+ // ShadowData shadow_data;
+};
+BLI_STATIC_ASSERT_ALIGN(LightData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Ray-Tracing
* \{ */
@@ -480,6 +633,34 @@ enum eClosureBits : uint32_t {
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Subsurface
+ * \{ */
+
+#define SSS_SAMPLE_MAX 64
+#define SSS_BURLEY_TRUNCATE 16.0
+#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328
+#define SSS_TRANSMIT_LUT_SIZE 64.0
+#define SSS_TRANSMIT_LUT_RADIUS 1.218
+#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_STEP_RES 64.0
+
+struct SubsurfaceData {
+ /** xy: 2D sample position [-1..1], zw: sample_bounds. */
+ /* NOTE(fclem) Using float4 for alignment. */
+ float4 samples[SSS_SAMPLE_MAX];
+ /** Sample index after which samples are not randomly rotated anymore. */
+ int jitter_threshold;
+ /** Number of samples precomputed in the set. */
+ int sample_len;
+ int _pad0;
+ int _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Utility Texture
* \{ */
@@ -518,6 +699,12 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
using CameraDataBuf = draw::UniformBuffer<CameraData>;
+using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
+using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>;
+using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
+using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>;
using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index c195f68380c..b7154465a70 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -118,6 +118,9 @@ void ShadingView::render()
inst_.pipelines.world.render();
+ /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
+ inst_.lights.set_view(render_view_, extent_);
+
// inst_.pipelines.deferred.render(
// render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_);
@@ -128,13 +131,14 @@ void ShadingView::render()
inst_.pipelines.forward.render(
render_view_, prepass_fb_, combined_fb_, rbufs.depth_tx, rbufs.combined_tx);
- // inst_.lights.debug_draw(view_fb_);
- // inst_.shadows.debug_draw(view_fb_);
+ inst_.lights.debug_draw(combined_fb_);
GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
inst_.film.accumulate(sub_view_, combined_final_tx);
+ // inst_.shadows.debug_draw();
+
rbufs.release();
postfx_tx_.release();
@@ -176,13 +180,10 @@ void ShadingView::update_view()
window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter));
DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr());
- /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop
+ /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop
* out of the blurring radius. To fix this, use custom enlarged culling matrix. */
inst_.depth_of_field.jitter_apply(winmat, viewmat);
DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
-
- // inst_.lightprobes.set_view(render_view_, extent_);
- // inst_.lights.set_view(render_view_, extent_, !inst_.use_scene_lights());
}
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
new file mode 100644
index 00000000000..321c99f7952
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
@@ -0,0 +1,52 @@
+
+/**
+ * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
+ * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
+ * pass is not conservative enough).
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+void main()
+{
+ ivec2 texel = ivec2(gl_FragCoord.xy);
+
+ float depth = texelFetch(depth_tx, texel, 0).r;
+ float vP_z = get_view_z_from_depth(depth);
+ vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
+
+ float light_count = 0.0;
+ uint light_cull = 0u;
+ vec2 px = gl_FragCoord.xy;
+ LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+ {
+ LightData light = light_buf[l_idx];
+ light_cull |= 1u << l_idx;
+ light_count += 1.0;
+ }
+ LIGHT_FOREACH_END
+
+ uint light_nocull = 0u;
+ LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(light_cull_buf, l_idx)
+ {
+ LightData light = light_buf[l_idx];
+ vec3 L;
+ float dist;
+ light_vector_get(light, P, L, dist);
+ if (light_attenuation(light_buf[l_idx], L, dist) > 0.0) {
+ light_nocull |= 1u << l_idx;
+ }
+ }
+ LIGHT_FOREACH_END
+
+ if ((light_cull & light_nocull) != light_nocull) {
+ /* ERROR. Some lights were culled incorrectly. */
+ out_debug_color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+ else {
+ out_debug_color = vec4(heatmap_gradient(light_count / 4.0), 1.0);
+ }
+} \ No newline at end of file
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
new file mode 100644
index 00000000000..9c12b0e50e6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
@@ -0,0 +1,62 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= light_cull_buf.items_count) {
+ return;
+ }
+
+ LightData light = in_light_buf[l_idx];
+
+ /* Do not select 0 power lights. */
+ if (light.influence_radius_max < 1e-8) {
+ return;
+ }
+
+ /* Sun lights are packed at the end of the array. Perform early copy. */
+ if (light.type == LIGHT_SUN) {
+ /* NOTE: We know the index because sun lights are packed at the start of the input buffer. */
+ out_light_buf[light_cull_buf.local_lights_len + l_idx] = light;
+ return;
+ }
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SPOT:
+ /* Only for < ~170° Cone due to plane extraction precision. */
+ if (light.spot_tan < 10.0) {
+ Pyramid pyramid = shape_pyramid_non_oblique(
+ light._position,
+ light._position - light._back * light.influence_radius_max,
+ light._right * light.influence_radius_max * light.spot_tan / light.spot_size_inv.x,
+ light._up * light.influence_radius_max * light.spot_tan / light.spot_size_inv.y);
+ if (!intersect_view(pyramid)) {
+ return;
+ }
+ }
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ /* TODO(fclem): HiZ culling? Could be quite beneficial given the nature of the 2.5D culling. */
+
+ /* TODO(fclem): Small light culling / fading? */
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(light_cull_buf.visible_count, 1u);
+
+ out_zdist_buf[index] = dot(cameraForward, light._position) - dot(cameraForward, cameraPos);
+ out_key_buf[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
new file mode 100644
index 00000000000..daf2016cd35
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer.
+ * One thread processes one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float zdists_cache[gl_WorkGroupSize.x];
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= light_cull_buf.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = light_cull_buf.visible_count - 1;
+ valid_thread = false;
+ }
+
+ float local_zdist = in_zdist_buf[src_index];
+
+ int prefix_sum = 0;
+ /* Iterate over the whole key buffer. */
+ uint iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ for (uint i = 0u; i < iter; i++) {
+ uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x;
+ /* NOTE: This will load duplicated values, but they will be discarded. */
+ index = min(index, light_cull_buf.visible_count - 1);
+ zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index];
+
+ barrier();
+
+ /* Iterate over the cache line. */
+ uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i);
+ for (uint j = 0u; j < line_end; j++) {
+ if (zdists_cache[j] < local_zdist) {
+ prefix_sum++;
+ }
+ else if (zdists_cache[j] == local_zdist) {
+ /* Same depth, use index to order and avoid same prefix for 2 different lights. */
+ if ((gl_WorkGroupSize.x * i + j) < src_index) {
+ prefix_sum++;
+ }
+ }
+ }
+ }
+
+ if (valid_thread) {
+ /* Copy sorted light to render light buffer. */
+ uint input_index = in_key_buf[src_index];
+ out_light_buf[prefix_sum] = in_light_buf[input_index];
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
new file mode 100644
index 00000000000..37705e22b22
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
@@ -0,0 +1,188 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Culling shapes extraction
+ * \{ */
+
+struct CullingTile {
+ IsectFrustum frustum;
+ vec4 bounds;
+};
+
+/* Corners are expected to be in viewspace so that the cone is starting from the origin.
+ * Corner order does not matter. */
+vec4 tile_bound_cone(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+ v00 = normalize(v00);
+ v01 = normalize(v01);
+ v10 = normalize(v10);
+ v11 = normalize(v11);
+ vec3 center = normalize(v00 + v01 + v10 + v11);
+ float angle_cosine = dot(center, v00);
+ angle_cosine = max(angle_cosine, dot(center, v01));
+ angle_cosine = max(angle_cosine, dot(center, v10));
+ angle_cosine = max(angle_cosine, dot(center, v11));
+ return vec4(center, angle_cosine);
+}
+
+/* Corners are expected to be in viewspace. Returns Z-aligned bounding cylinder.
+ * Corner order does not matter. */
+vec4 tile_bound_cylinder(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+ vec3 center = (v00 + v01 + v10 + v11) * 0.25;
+ vec4 corners_dist;
+ float dist_sqr = distance_squared(center, v00);
+ dist_sqr = max(dist_sqr, distance_squared(center, v01));
+ dist_sqr = max(dist_sqr, distance_squared(center, v10));
+ dist_sqr = max(dist_sqr, distance_squared(center, v11));
+ /* Return a cone. Later converted to cylinder. */
+ return vec4(center, sqrt(dist_sqr));
+}
+
+vec2 tile_to_ndc(vec2 tile_co, vec2 offset)
+{
+ /* Add a margin to prevent culling too much if the frustum becomes too much unstable. */
+ const float margin = 0.02;
+ tile_co += margin * (offset * 2.0 - 1.0);
+
+ tile_co += offset;
+ return tile_co * light_cull_buf.tile_to_uv_fac * 2.0 - 1.0;
+}
+
+CullingTile tile_culling_get(uvec2 tile_co)
+{
+ vec2 ftile = vec2(tile_co);
+ /* Culling frustum corners for this tile. */
+ vec3 corners[8];
+ /* Follow same corners order as view frustum. */
+ corners[1].xy = corners[0].xy = tile_to_ndc(ftile, vec2(0, 0));
+ corners[5].xy = corners[4].xy = tile_to_ndc(ftile, vec2(1, 0));
+ corners[6].xy = corners[7].xy = tile_to_ndc(ftile, vec2(1, 1));
+ corners[2].xy = corners[3].xy = tile_to_ndc(ftile, vec2(0, 1));
+ corners[1].z = corners[5].z = corners[6].z = corners[2].z = -1.0;
+ corners[0].z = corners[4].z = corners[7].z = corners[3].z = 1.0;
+
+ for (int i = 0; i < 8; i++) {
+ /* Culling in view space for precision. */
+ corners[i] = project_point(ProjectionMatrixInverse, corners[i]);
+ }
+
+ bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ CullingTile tile;
+ tile.bounds = (is_persp) ? tile_bound_cone(corners[0], corners[4], corners[7], corners[3]) :
+ tile_bound_cylinder(corners[0], corners[4], corners[7], corners[3]);
+
+ tile.frustum = isect_data_setup(shape_frustum(corners));
+ return tile;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Intersection Tests
+ * \{ */
+
+bool intersect(CullingTile tile, Sphere sphere)
+{
+ bool isect = true;
+ /* Test tile intersection using bounding cone or bounding cylinder.
+ * This has less false positive cases when the sphere is large. */
+ if (ProjectionMatrix[3][3] == 0.0) {
+ isect = intersect(shape_cone(tile.bounds.xyz, tile.bounds.w), sphere);
+ }
+ else {
+ /* Simplify to a 2D circle test on the view Z axis plane. */
+ isect = intersect(shape_circle(tile.bounds.xy, tile.bounds.w),
+ shape_circle(sphere.center.xy, sphere.radius));
+ }
+ /* Refine using frustum test. If the sphere is small it avoids intersection
+ * with a neighbor tile. */
+ if (isect) {
+ isect = intersect(tile.frustum, sphere);
+ }
+ return isect;
+}
+
+bool intersect(CullingTile tile, Box bbox)
+{
+ return intersect(tile.frustum, bbox);
+}
+
+bool intersect(CullingTile tile, Pyramid pyramid)
+{
+ return intersect(tile.frustum, pyramid);
+}
+
+/** \} */
+
+void main()
+{
+ uint word_idx = gl_GlobalInvocationID.x % light_cull_buf.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / light_cull_buf.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % light_cull_buf.tile_x_len,
+ tile_idx / light_cull_buf.tile_x_len);
+
+ if (tile_co.y >= light_cull_buf.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = tile_culling_get(tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, light_cull_buf.visible_count);
+ uint word = 0u;
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = light_buf[l_idx];
+
+ /* Culling in view space for precision and simplicity. */
+ vec3 vP = transform_point(ViewMatrix, light._position);
+ vec3 v_right = transform_direction(ViewMatrix, light._right);
+ vec3 v_up = transform_direction(ViewMatrix, light._up);
+ vec3 v_back = transform_direction(ViewMatrix, light._back);
+ float radius = light.influence_radius_max;
+
+ Sphere sphere = shape_sphere(vP, radius);
+ bool intersect_tile = intersect(tile, sphere);
+
+ switch (light.type) {
+ case LIGHT_SPOT:
+ /* Only for < ~170° Cone due to plane extraction precision. */
+ if (light.spot_tan < 10.0) {
+ Pyramid pyramid = shape_pyramid_non_oblique(
+ vP,
+ vP - v_back * radius,
+ v_right * radius * light.spot_tan / light.spot_size_inv.x,
+ v_up * radius * light.spot_tan / light.spot_size_inv.y);
+ intersect_tile = intersect_tile && intersect(tile, pyramid);
+ break;
+ }
+ /* Fallthrough to the hemispheric case. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ vec3 v000 = vP - v_right * radius - v_up * radius;
+ vec3 v100 = v000 + v_right * (radius * 2.0);
+ vec3 v010 = v000 + v_up * (radius * 2.0);
+ vec3 v001 = v000 - v_back * radius;
+ Box bbox = shape_box(v000, v100, v010, v001);
+ intersect_tile = intersect_tile && intersect(tile, bbox);
+ default:
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx % 32u);
+ }
+ }
+
+ out_light_tile_buf[gl_GlobalInvocationID.x] = word;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
new file mode 100644
index 00000000000..d96f191fb77
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
@@ -0,0 +1,56 @@
+
+/**
+ * Create the Zbins from Z-sorted lights.
+ * Perform min-max operation in LDS memory for speed.
+ * For this reason, we only dispatch 1 thread group.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* Fits the limit of 32KB. */
+shared uint zbin_max[CULLING_ZBIN_COUNT];
+shared uint zbin_min[CULLING_ZBIN_COUNT];
+
+void main()
+{
+ const uint zbin_iter = CULLING_ZBIN_COUNT / gl_WorkGroupSize.x;
+ const uint zbin_local = gl_LocalInvocationID.x * zbin_iter;
+
+ uint src_index = gl_GlobalInvocationID.x;
+
+ for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+ zbin_max[l] = 0x0u;
+ zbin_min[l] = ~0x0u;
+ }
+ barrier();
+
+ uint light_iter = divide_ceil_u(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ for (uint i = 0u; i < light_iter; i++) {
+ uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
+ if (index >= light_cull_buf.visible_count) {
+ continue;
+ }
+ vec3 P = light_buf[index]._position;
+ /* TODO(fclem): Could have better bounds for spot and area lights. */
+ float radius = light_buf[index].influence_radius_max;
+ float z_dist = dot(cameraForward, P) - dot(cameraForward, cameraPos);
+ int z_min = culling_z_to_zbin(
+ light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist + radius);
+ int z_max = culling_z_to_zbin(
+ light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist - radius);
+ z_min = clamp(z_min, 0, CULLING_ZBIN_COUNT - 1);
+ z_max = clamp(z_max, 0, CULLING_ZBIN_COUNT - 1);
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ }
+ barrier();
+
+ /* Write result to zbins buffer. Pack min & max into 1 uint. */
+ for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+ out_zbin_buf[l] = (zbin_max[l] << 16u) | (zbin_min[l] & 0xFFFFu);
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
new file mode 100644
index 00000000000..d4abdd43aa4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
@@ -0,0 +1,129 @@
+
+/**
+ * The resources expected to be defined are:
+ * - light_buf
+ * - light_zbin_buf
+ * - light_cull_buf
+ * - light_tile_buf
+ * - shadow_atlas_tx
+ * - shadow_tilemaps_tx
+ * - sss_transmittance_tx
+ * - utility_tx
+ */
+
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
+
+/* TODO(fclem): We could reduce register pressure by only having static branches for sun lights. */
+void light_eval_ex(ClosureDiffuse diffuse,
+ ClosureReflection reflection,
+ const bool is_directional,
+ vec3 P,
+ vec3 V,
+ float vP_z,
+ float thickness,
+ vec4 ltc_mat,
+ uint l_idx,
+ inout vec3 out_diffuse,
+ inout vec3 out_specular)
+{
+ LightData light = light_buf[l_idx];
+ vec3 L;
+ float dist;
+ light_vector_get(light, P, L, dist);
+
+ float visibility = light_attenuation(light, L, dist);
+
+#if 0 /* TODO(fclem): Shadows */
+ if ((light.shadow_id != LIGHT_NO_SHADOW) && (visibility > 0.0)) {
+ vec3 lL = light_world_to_local(light, -L) * dist;
+
+ float shadow_delta = shadow_delta_get(
+ shadow_atlas_tx, shadow_tilemaps_tx, light, light.shadow_data, lL, dist, P);
+
+# ifdef SSS_TRANSMITTANCE
+ /* Transmittance evaluation first to use initial visibility. */
+ if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) {
+ float delta = max(thickness, shadow_delta);
+
+ vec3 intensity = visibility * light.transmit_power *
+ light_translucent(sss_transmittance_tx,
+ is_directional,
+ light,
+ diffuse.N,
+ L,
+ dist,
+ diffuse.sss_radius,
+ delta);
+ out_diffuse += light.color * intensity;
+ }
+# endif
+
+ visibility *= float(shadow_delta - light.shadow_data.bias <= 0.0);
+ }
+#endif
+
+ if (visibility < 1e-6) {
+ return;
+ }
+
+ if (light.diffuse_power > 0.0) {
+ float intensity = visibility * light.diffuse_power *
+ light_diffuse(utility_tx, is_directional, light, diffuse.N, V, L, dist);
+ out_diffuse += light.color * intensity;
+ }
+
+ if (light.specular_power > 0.0) {
+ float intensity = visibility * light.specular_power *
+ light_ltc(
+ utility_tx, is_directional, light, reflection.N, V, L, dist, ltc_mat);
+ out_specular += light.color * intensity;
+ }
+}
+
+void light_eval(ClosureDiffuse diffuse,
+ ClosureReflection reflection,
+ vec3 P,
+ vec3 V,
+ float vP_z,
+ float thickness,
+ inout vec3 out_diffuse,
+ inout vec3 out_specular)
+{
+ vec2 uv = vec2(reflection.roughness, safe_sqrt(1.0 - dot(reflection.N, V)));
+ uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+ vec4 ltc_mat = utility_tx_sample(utility_tx, uv, UTIL_LTC_MAT_LAYER);
+
+ LIGHT_FOREACH_BEGIN_DIRECTIONAL(light_cull_buf, l_idx)
+ {
+ light_eval_ex(diffuse,
+ reflection,
+ true,
+ P,
+ V,
+ vP_z,
+ thickness,
+ ltc_mat,
+ l_idx,
+ out_diffuse,
+ out_specular);
+ }
+ LIGHT_FOREACH_END
+
+ vec2 px = gl_FragCoord.xy;
+ LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+ {
+ light_eval_ex(diffuse,
+ reflection,
+ false,
+ P,
+ V,
+ vP_z,
+ thickness,
+ ltc_mat,
+ l_idx,
+ out_diffuse,
+ out_specular);
+ }
+ LIGHT_FOREACH_END
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
new file mode 100644
index 00000000000..22a5f98e6c3
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
@@ -0,0 +1,72 @@
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
+{
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
+}
+
+int culling_z_to_zbin(float scale, float bias, float z)
+{
+ return int(z * scale + bias);
+}
+
+/* Waiting to implement extensions support. We need:
+ * - GL_KHR_shader_subgroup_ballot
+ * - GL_KHR_shader_subgroup_arithmetic
+ * or
+ * - Vulkan 1.1
+ */
+#if 1
+# define subgroupMin(a) a
+# define subgroupMax(a) a
+# define subgroupOr(a) a
+# define subgroupBroadcastFirst(a) a
+#endif
+
+#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \
+ { \
+ { \
+ for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) {
+
+#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \
+ { \
+ uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ /* Same as divide by 32 but avoid interger division. */ \
+ uint word_min = min_index >> 5u; \
+ uint word_max = max_index >> 5u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ int bit_index; \
+ while ((bit_index = findLSB(word)) != -1) { \
+ word &= ~1u << uint(bit_index); \
+ uint _item_index = word_idx * 32u + bit_index;
+
+/* No culling. Iterate over all items. */
+#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \
+ { \
+ { \
+ for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) {
+
+#define LIGHT_FOREACH_END \
+ } \
+ } \
+ }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
new file mode 100644
index 00000000000..58608f6e1f0
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
@@ -0,0 +1,209 @@
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_ltc_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Light Functions
+ * \{ */
+
+void light_vector_get(LightData ld, vec3 P, out vec3 L, out float dist)
+{
+ if (ld.type == LIGHT_SUN) {
+ L = ld._back;
+ dist = 1.0;
+ }
+ else {
+ L = ld._position - P;
+ dist = inversesqrt(len_squared(L));
+ L *= dist;
+ dist = 1.0 / dist;
+ }
+}
+
+/* Rotate vector to light's local space. Does not translate. */
+vec3 light_world_to_local(LightData ld, vec3 L)
+{
+ /* Avoid relying on compiler to optimize this.
+ * vec3 lL = transpose(mat3(ld.object_mat)) * L; */
+ vec3 lL;
+ lL.x = dot(ld.object_mat[0].xyz, L);
+ lL.y = dot(ld.object_mat[1].xyz, L);
+ lL.z = dot(ld.object_mat[2].xyz, L);
+ return lL;
+}
+
+/* From Frostbite PBR Course
+ * Distance based attenuation
+ * http://www.frostbite.com/wp-content/uploads/2014/11/course_notes_moving_frostbite_to_pbr.pdf */
+float light_influence_attenuation(float dist, float inv_sqr_influence)
+{
+ float factor = sqr(dist) * inv_sqr_influence;
+ float fac = saturate(1.0 - sqr(factor));
+ return sqr(fac);
+}
+
+float light_spot_attenuation(LightData ld, vec3 L)
+{
+ vec3 lL = light_world_to_local(ld, L);
+ float ellipse = inversesqrt(1.0 + len_squared(lL.xy * ld.spot_size_inv / lL.z));
+ float spotmask = smoothstep(0.0, 1.0, ellipse * ld._spot_mul + ld._spot_bias);
+ return spotmask;
+}
+
+float light_attenuation(LightData ld, vec3 L, float dist)
+{
+ float vis = 1.0;
+ if (ld.type == LIGHT_SPOT) {
+ vis *= light_spot_attenuation(ld, L);
+ }
+ if (ld.type >= LIGHT_SPOT) {
+ vis *= step(0.0, -dot(L, -ld._back));
+ }
+ if (ld.type != LIGHT_SUN) {
+#ifdef VOLUME_LIGHTING
+ vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_volume);
+#else
+ vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_surface);
+#endif
+ }
+ return vis;
+}
+
+/* Cheaper alternative than evaluating the LTC.
+ * The result needs to be multiplied by BSDF or Phase Function. */
+float light_point_light(LightData ld, const bool is_directional, vec3 L, float dist)
+{
+ if (is_directional) {
+ return 1.0;
+ }
+ /**
+ * Using "Point Light Attenuation Without Singularity" from Cem Yuksel
+ * http://www.cemyuksel.com/research/pointlightattenuation/pointlightattenuation.pdf
+ * http://www.cemyuksel.com/research/pointlightattenuation/
+ **/
+ float d_sqr = sqr(dist);
+ float r_sqr = ld.radius_squared;
+ /* Using reformulation that has better numerical percision. */
+ float power = 2.0 / (d_sqr + r_sqr + dist * sqrt(d_sqr + r_sqr));
+
+ if (is_area_light(ld.type)) {
+ /* Modulate by light plane orientation / solid angle. */
+ power *= saturate(dot(ld._back, L));
+ }
+ return power;
+}
+
+float light_diffuse(sampler2DArray utility_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 V,
+ vec3 L,
+ float dist)
+{
+ if (is_directional || !is_area_light(ld.type)) {
+ float radius = ld._radius / dist;
+ return ltc_evaluate_disk_simple(utility_tx, radius, dot(N, L));
+ }
+ else if (ld.type == LIGHT_RECT) {
+ vec3 corners[4];
+ corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+ corners[2] = -corners[0];
+ corners[3] = -corners[1];
+
+ corners[0] = normalize(L * dist + corners[0]);
+ corners[1] = normalize(L * dist + corners[1]);
+ corners[2] = normalize(L * dist + corners[2]);
+ corners[3] = normalize(L * dist + corners[3]);
+
+ return ltc_evaluate_quad(utility_tx, corners, N);
+ }
+ else /* (ld.type == LIGHT_ELLIPSE) */ {
+ vec3 points[3];
+ points[0] = ld._right * -ld._area_size_x + ld._up * -ld._area_size_y;
+ points[1] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ points[2] = -points[0];
+
+ points[0] += L * dist;
+ points[1] += L * dist;
+ points[2] += L * dist;
+
+ return ltc_evaluate_disk(utility_tx, N, V, mat3(1.0), points);
+ }
+}
+
+float light_ltc(sampler2DArray utility_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 V,
+ vec3 L,
+ float dist,
+ vec4 ltc_mat)
+{
+ if (is_directional || ld.type != LIGHT_RECT) {
+ vec3 Px = ld._right;
+ vec3 Py = ld._up;
+
+ if (is_directional || !is_area_light(ld.type)) {
+ make_orthonormal_basis(L, Px, Py);
+ }
+
+ vec3 points[3];
+ points[0] = Px * -ld._area_size_x + Py * -ld._area_size_y;
+ points[1] = Px * ld._area_size_x + Py * -ld._area_size_y;
+ points[2] = -points[0];
+
+ points[0] += L * dist;
+ points[1] += L * dist;
+ points[2] += L * dist;
+
+ return ltc_evaluate_disk(utility_tx, N, V, ltc_matrix(ltc_mat), points);
+ }
+ else {
+ vec3 corners[4];
+ corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+ corners[2] = -corners[0];
+ corners[3] = -corners[1];
+
+ corners[0] += L * dist;
+ corners[1] += L * dist;
+ corners[2] += L * dist;
+ corners[3] += L * dist;
+
+ ltc_transform_quad(N, V, ltc_matrix(ltc_mat), corners);
+
+ return ltc_evaluate_quad(utility_tx, corners, vec3(0.0, 0.0, 1.0));
+ }
+}
+
+vec3 light_translucent(sampler1D transmittance_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 L,
+ float dist,
+ vec3 sss_radius,
+ float delta)
+{
+ /* TODO(fclem): We should compute the power at the entry point. */
+ /* NOTE(fclem): we compute the light attenuation using the light vector but the transmittance
+ * using the shadow depth delta. */
+ float power = light_point_light(ld, is_directional, L, dist);
+ /* Do not add more energy on front faces. Also apply lambertian BSDF. */
+ power *= max(0.0, dot(-N, L)) * M_1_PI;
+
+ sss_radius *= SSS_TRANSMIT_LUT_RADIUS;
+ vec3 channels_co = saturate(delta / sss_radius) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS;
+
+ vec3 translucency;
+ translucency.x = (sss_radius.x > 0.0) ? texture(transmittance_tx, channels_co.x).r : 0.0;
+ translucency.y = (sss_radius.y > 0.0) ? texture(transmittance_tx, channels_co.y).r : 0.0;
+ translucency.z = (sss_radius.z > 0.0) ? texture(transmittance_tx, channels_co.z).r : 0.0;
+ return translucency * power;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
new file mode 100644
index 00000000000..57e92b0b9b4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
@@ -0,0 +1,299 @@
+
+/**
+ * Adapted from :
+ * Real-Time Polygonal-Light Shading with Linearly Transformed Cosines.
+ * Eric Heitz, Jonathan Dupuy, Stephen Hill and David Neubelt.
+ * ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2016) 35(4), 2016.
+ * Project page: https://eheitzresearch.wordpress.com/415-2/
+ */
+
+/* Diffuse *clipped* sphere integral. */
+float ltc_diffuse_sphere_integral(sampler2DArray utility_tx, float avg_dir_z, float form_factor)
+{
+#if 1
+ /* use tabulated horizon-clipped sphere */
+ vec2 uv = vec2(avg_dir_z * 0.5 + 0.5, form_factor);
+ uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+
+ return texture(utility_tx, vec3(uv, UTIL_DISK_INTEGRAL_LAYER))[UTIL_DISK_INTEGRAL_COMP];
+#else
+ /* Cheap approximation. Less smooth and have energy issues. */
+ return max((form_factor * form_factor + avg_dir_z) / (form_factor + 1.0), 0.0);
+#endif
+}
+
+/**
+ * An extended version of the implementation from
+ * "How to solve a cubic equation, revisited"
+ * http://momentsingraphics.de/?p=105
+ */
+vec3 ltc_solve_cubic(vec4 coefs)
+{
+ /* Normalize the polynomial */
+ coefs.xyz /= coefs.w;
+ /* Divide middle coefficients by three */
+ coefs.yz /= 3.0;
+
+ float A = coefs.w;
+ float B = coefs.z;
+ float C = coefs.y;
+ float D = coefs.x;
+
+ /* Compute the Hessian and the discriminant */
+ vec3 delta = vec3(-coefs.zy * coefs.zz + coefs.yx, dot(vec2(coefs.z, -coefs.y), coefs.xy));
+
+ /* Discriminant */
+ float discr = dot(vec2(4.0 * delta.x, -delta.y), delta.zy);
+
+ /* Clamping avoid NaN output on some platform. (see T67060) */
+ float sqrt_discr = sqrt(clamp(discr, 0.0, FLT_MAX));
+
+ vec2 xlc, xsc;
+
+ /* Algorithm A */
+ {
+ float A_a = 1.0;
+ float C_a = delta.x;
+ float D_a = -2.0 * B * delta.x + delta.y;
+
+ /* Take the cubic root of a normalized complex number */
+ float theta = atan(sqrt_discr, -D_a) / 3.0;
+
+ float _2_sqrt_C_a = 2.0 * sqrt(-C_a);
+ float x_1a = _2_sqrt_C_a * cos(theta);
+ float x_3a = _2_sqrt_C_a * cos(theta + (2.0 / 3.0) * M_PI);
+
+ float xl;
+ if ((x_1a + x_3a) > 2.0 * B) {
+ xl = x_1a;
+ }
+ else {
+ xl = x_3a;
+ }
+
+ xlc = vec2(xl - B, A);
+ }
+
+ /* Algorithm D */
+ {
+ float A_d = D;
+ float C_d = delta.z;
+ float D_d = -D * delta.y + 2.0 * C * delta.z;
+
+ /* Take the cubic root of a normalized complex number */
+ float theta = atan(D * sqrt_discr, -D_d) / 3.0;
+
+ float _2_sqrt_C_d = 2.0 * sqrt(-C_d);
+ float x_1d = _2_sqrt_C_d * cos(theta);
+ float x_3d = _2_sqrt_C_d * cos(theta + (2.0 / 3.0) * M_PI);
+
+ float xs;
+ if (x_1d + x_3d < 2.0 * C) {
+ xs = x_1d;
+ }
+ else {
+ xs = x_3d;
+ }
+
+ xsc = vec2(-D, xs + C);
+ }
+
+ float E = xlc.y * xsc.y;
+ float F = -xlc.x * xsc.y - xlc.y * xsc.x;
+ float G = xlc.x * xsc.x;
+
+ vec2 xmc = vec2(C * F - B * G, -B * F + C * E);
+
+ vec3 root = vec3(xsc.x / xsc.y, xmc.x / xmc.y, xlc.x / xlc.y);
+
+ if (root.x < root.y && root.x < root.z) {
+ root.xyz = root.yxz;
+ }
+ else if (root.z < root.x && root.z < root.y) {
+ root.xyz = root.xzy;
+ }
+
+ return root;
+}
+
+/* from Real-Time Area Lighting: a Journey from Research to Production
+ * Stephen Hill and Eric Heitz */
+vec3 ltc_edge_integral_vec(vec3 v1, vec3 v2)
+{
+ float x = dot(v1, v2);
+ float y = abs(x);
+
+ float a = 0.8543985 + (0.4965155 + 0.0145206 * y) * y;
+ float b = 3.4175940 + (4.1616724 + y) * y;
+ float v = a / b;
+
+ float theta_sintheta = (x > 0.0) ? v : 0.5 * inversesqrt(max(1.0 - x * x, 1e-7)) - v;
+
+ return cross(v1, v2) * theta_sintheta;
+}
+
+mat3 ltc_matrix(vec4 lut)
+{
+ /* Load inverse matrix. */
+ return mat3(vec3(lut.x, 0, lut.y), vec3(0, 1, 0), vec3(lut.z, 0, lut.w));
+}
+
+void ltc_transform_quad(vec3 N, vec3 V, mat3 Minv, inout vec3 corners[4])
+{
+ /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+ V = normalize(V + 1e-8);
+
+ /* Construct orthonormal basis around N. */
+ vec3 T1, T2;
+ T1 = normalize(V - N * dot(N, V));
+ T2 = cross(N, T1);
+
+ /* Rotate area light in (T1, T2, R) basis. */
+ Minv = Minv * transpose(mat3(T1, T2, N));
+
+ /* Apply LTC inverse matrix. */
+ corners[0] = normalize(Minv * corners[0]);
+ corners[1] = normalize(Minv * corners[1]);
+ corners[2] = normalize(Minv * corners[2]);
+ corners[3] = normalize(Minv * corners[3]);
+}
+
+/* If corners have already pass through ltc_transform_quad(),
+ * then N **MUST** be vec3(0.0, 0.0, 1.0), corresponding to the Up axis of the shading basis. */
+float ltc_evaluate_quad(sampler2DArray utility_tx, vec3 corners[4], vec3 N)
+{
+ /* Approximation using a sphere of the same solid angle than the quad.
+ * Finding the clipped sphere diffuse integral is easier than clipping the quad. */
+ vec3 avg_dir;
+ avg_dir = ltc_edge_integral_vec(corners[0], corners[1]);
+ avg_dir += ltc_edge_integral_vec(corners[1], corners[2]);
+ avg_dir += ltc_edge_integral_vec(corners[2], corners[3]);
+ avg_dir += ltc_edge_integral_vec(corners[3], corners[0]);
+
+ float form_factor = length(avg_dir);
+ float avg_dir_z = dot(N, avg_dir / form_factor);
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir_z, form_factor);
+}
+
+/* If disk does not need to be transformed and is already front facing. */
+float ltc_evaluate_disk_simple(sampler2DArray utility_tx, float disk_radius, float NL)
+{
+ float r_sqr = disk_radius * disk_radius;
+ float one_r_sqr = 1.0 + r_sqr;
+ float form_factor = r_sqr * inversesqrt(one_r_sqr * one_r_sqr);
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, NL, form_factor);
+}
+
+/* disk_points are WS vectors from the shading point to the disk "bounding domain" */
+float ltc_evaluate_disk(sampler2DArray utility_tx, vec3 N, vec3 V, mat3 Minv, vec3 disk_points[3])
+{
+ /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+ V = normalize(V + 1e-8);
+
+ /* construct orthonormal basis around N */
+ vec3 T1, T2;
+ T1 = normalize(V - N * dot(V, N));
+ T2 = cross(N, T1);
+
+ /* rotate area light in (T1, T2, R) basis */
+ mat3 R = transpose(mat3(T1, T2, N));
+
+ /* Intermediate step: init ellipse. */
+ vec3 L_[3];
+ L_[0] = mul(R, disk_points[0]);
+ L_[1] = mul(R, disk_points[1]);
+ L_[2] = mul(R, disk_points[2]);
+
+ vec3 C = 0.5 * (L_[0] + L_[2]);
+ vec3 V1 = 0.5 * (L_[1] - L_[2]);
+ vec3 V2 = 0.5 * (L_[1] - L_[0]);
+
+ /* Transform ellipse by Minv. */
+ C = Minv * C;
+ V1 = Minv * V1;
+ V2 = Minv * V2;
+
+ /* Compute eigenvectors of new ellipse. */
+
+ float d11 = dot(V1, V1);
+ float d22 = dot(V2, V2);
+ float d12 = dot(V1, V2);
+ float a, b; /* Eigenvalues */
+ const float threshold = 0.0007; /* Can be adjusted. Fix artifacts. */
+ if (abs(d12) / sqrt(d11 * d22) > threshold) {
+ float tr = d11 + d22;
+ float det = -d12 * d12 + d11 * d22;
+
+ /* use sqrt matrix to solve for eigenvalues */
+ det = sqrt(det);
+ float u = 0.5 * sqrt(tr - 2.0 * det);
+ float v = 0.5 * sqrt(tr + 2.0 * det);
+ float e_max = (u + v);
+ float e_min = (u - v);
+ e_max *= e_max;
+ e_min *= e_min;
+
+ vec3 V1_, V2_;
+ if (d11 > d22) {
+ V1_ = d12 * V1 + (e_max - d11) * V2;
+ V2_ = d12 * V1 + (e_min - d11) * V2;
+ }
+ else {
+ V1_ = d12 * V2 + (e_max - d22) * V1;
+ V2_ = d12 * V2 + (e_min - d22) * V1;
+ }
+
+ a = 1.0 / e_max;
+ b = 1.0 / e_min;
+ V1 = normalize(V1_);
+ V2 = normalize(V2_);
+ }
+ else {
+ a = 1.0 / d11;
+ b = 1.0 / d22;
+ V1 *= sqrt(a);
+ V2 *= sqrt(b);
+ }
+
+ /* Now find front facing ellipse with same solid angle. */
+
+ vec3 V3 = normalize(cross(V1, V2));
+ if (dot(C, V3) < 0.0) {
+ V3 *= -1.0;
+ }
+
+ float L = dot(V3, C);
+ float inv_L = 1.0 / L;
+ float x0 = dot(V1, C) * inv_L;
+ float y0 = dot(V2, C) * inv_L;
+
+ float L_sqr = L * L;
+ a *= L_sqr;
+ b *= L_sqr;
+
+ float t = 1.0 + x0 * x0;
+ float c0 = a * b;
+ float c1 = c0 * (t + y0 * y0) - a - b;
+ float c2 = (1.0 - a * t) - b * (1.0 + y0 * y0);
+ float c3 = 1.0;
+
+ vec3 roots = ltc_solve_cubic(vec4(c0, c1, c2, c3));
+ float e1 = roots.x;
+ float e2 = roots.y;
+ float e3 = roots.z;
+
+ vec3 avg_dir = vec3(a * x0 / (a - e2), b * y0 / (b - e2), 1.0);
+
+ mat3 rotate = mat3(V1, V2, V3);
+
+ avg_dir = rotate * avg_dir;
+ avg_dir = normalize(avg_dir);
+
+ /* L1, L2 are the extends of the front facing ellipse. */
+ float L1 = sqrt(-e2 / e3);
+ float L2 = sqrt(-e2 / e1);
+
+ /* Find the sphere and compute lighting. */
+ float form_factor = max(0.0, L1 * L2 * inversesqrt((1.0 + L1 * L1) * (1.0 + L2 * L2)));
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir.z, form_factor);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
new file mode 100644
index 00000000000..56fda25ed13
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Shared
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_data)
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(2, Qualifier::READ, "uint", "light_zbin_buf[]")
+ .storage_buf(3, Qualifier::READ, "uint", "light_tile_buf[]");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Culling
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_select)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_SELECT_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ_WRITE, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+ .storage_buf(3, Qualifier::WRITE, "float", "out_zdist_buf[]")
+ .storage_buf(4, Qualifier::WRITE, "uint", "out_key_buf[]")
+ .compute_source("eevee_light_culling_select_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_sort)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+ .storage_buf(3, Qualifier::READ, "float", "in_zdist_buf[]")
+ .storage_buf(4, Qualifier::READ, "uint", "in_key_buf[]")
+ .local_group_size(CULLING_SORT_GROUP_SIZE)
+ .compute_source("eevee_light_culling_sort_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_zbin)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_ZBIN_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "uint", "out_zbin_buf[]")
+ .compute_source("eevee_light_culling_zbin_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_tile)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_TILE_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "uint", "out_light_tile_buf[]")
+ .compute_source("eevee_light_culling_tile_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_debug)
+ .do_static_compilation(true)
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .fragment_out(0, Type::VEC4, "out_debug_color")
+ .additional_info("eevee_shared", "draw_view")
+ .fragment_source("eevee_light_culling_debug_frag.glsl")
+ .additional_info("draw_fullscreen", "eevee_light_data");
+
+/** \} */
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 65a6a2dc6b7..1d67b5be4fb 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -495,6 +495,7 @@ set(SRC_SHADER_CREATE_INFOS
../draw/engines/basic/shaders/infos/basic_depth_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
+ ../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh