Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/draw/engines')
-rw-r--r--source/blender/draw/engines/basic/basic_engine.c10
-rw-r--r--source/blender/draw/engines/basic/basic_private.h1
-rw-r--r--source/blender/draw/engines/basic/basic_shader.c11
-rw-r--r--source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl27
-rw-r--r--source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh6
-rw-r--r--source/blender/draw/engines/compositor/compositor_engine.cc203
-rw-r--r--source/blender/draw/engines/compositor/compositor_engine.h13
-rw-r--r--source/blender/draw/engines/eevee/eevee_bloom.c24
-rw-r--r--source/blender/draw/engines/eevee/eevee_cryptomatte.c17
-rw-r--r--source/blender/draw/engines/eevee/eevee_engine.c16
-rw-r--r--source/blender/draw/engines/eevee/eevee_materials.c2
-rw-r--r--source/blender/draw/engines/eevee/eevee_private.h2
-rw-r--r--source/blender/draw/engines/eevee/eevee_render.c7
-rw-r--r--source/blender/draw/engines/eevee/eevee_sampling.c5
-rw-r--r--source/blender/draw/engines/eevee/eevee_screen_raytrace.c2
-rw-r--r--source/blender/draw/engines/eevee/eevee_shaders.c14
-rw-r--r--source/blender/draw/engines/eevee/eevee_shadows_cascade.c2
-rw-r--r--source/blender/draw/engines/eevee/eevee_volumes.c17
-rw-r--r--source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl4
-rw-r--r--source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl4
-rw-r--r--source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl19
-rw-r--r--source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl1
-rw-r--r--source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl2
-rw-r--r--source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl2
-rw-r--r--source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl2
-rw-r--r--source/blender/draw/engines/eevee/shaders/prepass_frag.glsl14
-rw-r--r--source/blender/draw/engines/eevee/shaders/shadow_vert.glsl6
-rw-r--r--source/blender/draw/engines/eevee/shaders/surface_frag.glsl7
-rw-r--r--source/blender/draw/engines/eevee/shaders/surface_lib.glsl8
-rw-r--r--source/blender/draw/engines/eevee/shaders/surface_vert.glsl6
-rw-r--r--source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl6
-rw-r--r--source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl5
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_camera.cc21
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_camera.hh26
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_defines.hh79
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc761
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh200
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_engine.cc68
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_film.cc633
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_film.hh218
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc99
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh88
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.cc147
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_instance.hh48
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_light.cc488
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_light.hh169
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_material.cc95
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_material.hh15
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_motion_blur.cc256
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_motion_blur.hh130
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_pipeline.cc284
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_pipeline.hh84
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc93
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh55
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_sampling.cc268
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_sampling.hh195
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.cc97
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.hh35
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader_shared.hh584
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_sync.cc89
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_sync.hh12
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_velocity.cc128
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_velocity.hh86
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_view.cc135
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_view.hh30
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_world.cc6
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl44
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl20
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl37
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl680
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl55
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl32
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl163
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl99
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl70
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl327
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl247
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl178
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl62
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl45
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl46
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl367
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl97
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl78
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl13
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl31
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl701
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl24
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl121
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl54
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl62
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl57
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl188
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl56
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl129
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl72
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl209
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl299
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl115
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl103
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl221
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl48
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl100
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl104
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl27
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl105
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl6
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl13
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl107
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl58
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh247
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh47
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh31
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh76
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh71
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh46
-rw-r--r--source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh44
-rw-r--r--source/blender/draw/engines/external/external_engine.c12
-rw-r--r--source/blender/draw/engines/gpencil/gpencil_engine.h2
-rw-r--r--source/blender/draw/engines/gpencil/gpencil_shader_shared.h26
-rw-r--r--source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl2
-rw-r--r--source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl2
-rw-r--r--source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl4
-rw-r--r--source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh2
-rw-r--r--source/blender/draw/engines/overlay/overlay_antialiasing.c2
-rw-r--r--source/blender/draw/engines/overlay/overlay_armature.c4
-rw-r--r--source/blender/draw/engines/overlay/overlay_edit_text.c44
-rw-r--r--source/blender/draw/engines/overlay/overlay_edit_uv.c4
-rw-r--r--source/blender/draw/engines/overlay/overlay_engine.c17
-rw-r--r--source/blender/draw/engines/overlay/overlay_extra.c16
-rw-r--r--source/blender/draw/engines/overlay/overlay_outline.c15
-rw-r--r--source/blender/draw/engines/overlay/overlay_private.h21
-rw-r--r--source/blender/draw/engines/overlay/overlay_sculpt_curves.cc96
-rw-r--r--source/blender/draw/engines/overlay/overlay_shader.c26
-rw-r--r--source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh1
-rw-r--r--source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh10
-rw-r--r--source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh21
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl16
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl4
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl4
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl8
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl6
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl12
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl4
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl6
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl4
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl4
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl81
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl6
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl5
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl34
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl2
-rw-r--r--source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl6
-rw-r--r--source/blender/draw/engines/select/select_engine.c2
-rw-r--r--source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl6
-rw-r--r--source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl2
-rw-r--r--source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl6
-rw-r--r--source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl2
-rw-r--r--source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl2
-rw-r--r--source/blender/draw/engines/workbench/workbench_engine.c2
-rw-r--r--source/blender/draw/engines/workbench/workbench_render.c5
173 files changed, 11975 insertions, 1024 deletions
diff --git a/source/blender/draw/engines/basic/basic_engine.c b/source/blender/draw/engines/basic/basic_engine.c
index 04a3c27959d..975d9e299bf 100644
--- a/source/blender/draw/engines/basic/basic_engine.c
+++ b/source/blender/draw/engines/basic/basic_engine.c
@@ -53,6 +53,7 @@ typedef struct BASIC_PrivateData {
DRWShadingGroup *depth_shgrp[2];
DRWShadingGroup *depth_shgrp_cull[2];
DRWShadingGroup *depth_hair_shgrp[2];
+ DRWShadingGroup *depth_curves_shgrp[2];
DRWShadingGroup *depth_pointcloud_shgrp[2];
bool use_material_slot_selection;
} BASIC_PrivateData; /* Transient data */
@@ -99,6 +100,9 @@ static void basic_cache_init(void *vedata)
stl->g_data->depth_hair_shgrp[i] = grp = DRW_shgroup_create(
BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]);
+ stl->g_data->depth_curves_shgrp[i] = grp = DRW_shgroup_create(
+ BASIC_shaders_curves_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]);
+
sh = DRW_state_is_select() ? BASIC_shaders_depth_conservative_sh_get(draw_ctx->sh_cfg) :
BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg);
state |= DRW_STATE_CULL_BACK;
@@ -156,8 +160,12 @@ static void basic_cache_populate(void *vedata, Object *ob)
basic_cache_populate_particles(vedata, ob);
}
- /* Make flat object selectable in ortho view if wireframe is enabled. */
const bool do_in_front = (ob->dtx & OB_DRAW_IN_FRONT) != 0;
+ if (ob->type == OB_CURVES) {
+ DRW_shgroup_curves_create_sub(ob, stl->g_data->depth_curves_shgrp[do_in_front], NULL);
+ }
+
+ /* Make flat object selectable in ortho view if wireframe is enabled. */
if ((draw_ctx->v3d->overlay.flag & V3D_OVERLAY_WIREFRAMES) ||
(draw_ctx->v3d->shading.type == OB_WIRE) || (ob->dtx & OB_DRAWWIRE) || (ob->dt == OB_WIRE)) {
int flat_axis = 0;
diff --git a/source/blender/draw/engines/basic/basic_private.h b/source/blender/draw/engines/basic/basic_private.h
index 22b458baca2..197831b9ee8 100644
--- a/source/blender/draw/engines/basic/basic_private.h
+++ b/source/blender/draw/engines/basic/basic_private.h
@@ -11,6 +11,7 @@ extern "C" {
GPUShader *BASIC_shaders_depth_sh_get(eGPUShaderConfig config);
GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config);
+GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config);
GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config);
GPUShader *BASIC_shaders_pointcloud_depth_conservative_sh_get(eGPUShaderConfig config);
void BASIC_shaders_free(void);
diff --git a/source/blender/draw/engines/basic/basic_shader.c b/source/blender/draw/engines/basic/basic_shader.c
index 3d40c627fff..5b7636ca9fd 100644
--- a/source/blender/draw/engines/basic/basic_shader.c
+++ b/source/blender/draw/engines/basic/basic_shader.c
@@ -24,6 +24,7 @@ typedef struct BASIC_Shaders {
/* Depth Pre Pass */
struct GPUShader *depth;
struct GPUShader *pointcloud_depth;
+ struct GPUShader *curves_depth;
struct GPUShader *depth_conservative;
struct GPUShader *pointcloud_depth_conservative;
} BASIC_Shaders;
@@ -53,6 +54,16 @@ GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config)
return sh_data->pointcloud_depth;
}
+GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config)
+{
+ BASIC_Shaders *sh_data = &e_data.sh_data[config];
+ if (sh_data->curves_depth == NULL) {
+ sh_data->curves_depth = GPU_shader_create_from_info_name(
+ config == GPU_SHADER_CFG_CLIPPED ? "basic_depth_curves_clipped" : "basic_depth_curves");
+ }
+ return sh_data->curves_depth;
+}
+
GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config)
{
BASIC_Shaders *sh_data = &e_data.sh_data[config];
diff --git a/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl
new file mode 100644
index 00000000000..b0da9754fc6
--- /dev/null
+++ b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl
@@ -0,0 +1,27 @@
+
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+void main()
+{
+ GPU_INTEL_VERTEX_SHADER_WORKAROUND
+
+ bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ float time, thick_time, thickness;
+ vec3 world_pos, tan, binor;
+ hair_get_pos_tan_binor_time(is_persp,
+ ModelMatrixInverse,
+ ViewMatrixInverse[3].xyz,
+ ViewMatrixInverse[2].xyz,
+ world_pos,
+ tan,
+ binor,
+ time,
+ thickness,
+ thick_time);
+
+ gl_Position = point_world_to_ndc(world_pos);
+
+ view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
index bae50eb48fa..561cef0e442 100644
--- a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
+++ b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh
@@ -27,6 +27,9 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud)
.vertex_source("basic_depth_pointcloud_vert.glsl")
.additional_info("draw_pointcloud");
+GPU_SHADER_CREATE_INFO(basic_curves)
+ .vertex_source("basic_depth_curves_vert.glsl")
+ .additional_info("draw_hair");
/** \} */
/* -------------------------------------------------------------------- */
@@ -46,7 +49,8 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud)
#define BASIC_OBTYPE_VARIATIONS(prefix, ...) \
BASIC_CONSERVATIVE_VARIATIONS(prefix##_mesh, "basic_mesh", __VA_ARGS__) \
- BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__)
+ BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__) \
+ BASIC_CLIPPING_VARIATIONS(prefix##_curves, "basic_curves", __VA_ARGS__)
/** \} */
diff --git a/source/blender/draw/engines/compositor/compositor_engine.cc b/source/blender/draw/engines/compositor/compositor_engine.cc
new file mode 100644
index 00000000000..f36a59a4ce6
--- /dev/null
+++ b/source/blender/draw/engines/compositor/compositor_engine.cc
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_listbase.h"
+#include "BLI_math_vec_types.hh"
+#include "BLI_string_ref.hh"
+#include "BLI_utildefines.h"
+
+#include "BLT_translation.h"
+
+#include "DNA_ID_enums.h"
+#include "DNA_scene_types.h"
+
+#include "DEG_depsgraph_query.h"
+
+#include "DRW_render.h"
+
+#include "IMB_colormanagement.h"
+
+#include "COM_context.hh"
+#include "COM_evaluator.hh"
+#include "COM_texture_pool.hh"
+
+#include "GPU_texture.h"
+
+namespace blender::draw::compositor {
+
+class TexturePool : public realtime_compositor::TexturePool {
+ public:
+ GPUTexture *allocate_texture(int2 size, eGPUTextureFormat format) override
+ {
+ DrawEngineType *owner = (DrawEngineType *)this;
+ return DRW_texture_pool_query_2d(size.x, size.y, format, owner);
+ }
+};
+
+class Context : public realtime_compositor::Context {
+ private:
+ /* A pointer to the info message of the compositor engine. This is a char array of size
+ * GPU_INFO_SIZE. The message is cleared prior to updating or evaluating the compositor. */
+ char *info_message_;
+
+ public:
+ Context(realtime_compositor::TexturePool &texture_pool, char *info_message)
+ : realtime_compositor::Context(texture_pool), info_message_(info_message)
+ {
+ }
+
+ const Scene *get_scene() const override
+ {
+ return DRW_context_state_get()->scene;
+ }
+
+ int2 get_output_size() override
+ {
+ return int2(float2(DRW_viewport_size_get()));
+ }
+
+ GPUTexture *get_output_texture() override
+ {
+ return DRW_viewport_texture_list_get()->color;
+ }
+
+ GPUTexture *get_input_texture(int UNUSED(view_layer), eScenePassType UNUSED(pass_type)) override
+ {
+ return get_output_texture();
+ }
+
+ StringRef get_view_name() override
+ {
+ const SceneRenderView *view = static_cast<SceneRenderView *>(
+ BLI_findlink(&get_scene()->r.views, DRW_context_state_get()->v3d->multiview_eye));
+ return view->name;
+ }
+
+ void set_info_message(StringRef message) const override
+ {
+ message.copy(info_message_, GPU_INFO_SIZE);
+ }
+};
+
+class Engine {
+ private:
+ TexturePool texture_pool_;
+ Context context_;
+ realtime_compositor::Evaluator evaluator_;
+ /* Stores the viewport size at the time the last compositor evaluation happened. See the
+ * update_viewport_size method for more information. */
+ int2 last_viewport_size_;
+
+ public:
+ Engine(char *info_message)
+ : context_(texture_pool_, info_message),
+ evaluator_(context_, node_tree()),
+ last_viewport_size_(context_.get_output_size())
+ {
+ }
+
+ /* Update the viewport size and evaluate the compositor. */
+ void draw()
+ {
+ update_viewport_size();
+ evaluator_.evaluate();
+ }
+
+ /* If the size of the viewport changed from the last time the compositor was evaluated, update
+ * the viewport size and reset the evaluator. That's because the evaluator compiles the node tree
+ * in a manner that is specifically optimized for the size of the viewport. This should be called
+ * before evaluating the compositor. */
+ void update_viewport_size()
+ {
+ if (last_viewport_size_ == context_.get_output_size()) {
+ return;
+ }
+
+ last_viewport_size_ = context_.get_output_size();
+
+ evaluator_.reset();
+ }
+
+ /* If the compositor node tree changed, reset the evaluator. */
+ void update(const Depsgraph *depsgraph)
+ {
+ if (DEG_id_type_updated(depsgraph, ID_NT)) {
+ evaluator_.reset();
+ }
+ }
+
+ /* Get a reference to the compositor node tree. */
+ static bNodeTree &node_tree()
+ {
+ return *DRW_context_state_get()->scene->nodetree;
+ }
+};
+
+} // namespace blender::draw::compositor
+
+using namespace blender::draw::compositor;
+
+struct COMPOSITOR_Data {
+ DrawEngineType *engine_type;
+ DRWViewportEmptyList *fbl;
+ DRWViewportEmptyList *txl;
+ DRWViewportEmptyList *psl;
+ DRWViewportEmptyList *stl;
+ Engine *instance_data;
+ char info[GPU_INFO_SIZE];
+};
+
+static void compositor_engine_init(void *data)
+{
+ COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+
+ if (!compositor_data->instance_data) {
+ compositor_data->instance_data = new Engine(compositor_data->info);
+ }
+}
+
+static void compositor_engine_free(void *instance_data)
+{
+ Engine *engine = static_cast<Engine *>(instance_data);
+ delete engine;
+}
+
+static void compositor_engine_draw(void *data)
+{
+ const COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+ compositor_data->instance_data->draw();
+}
+
+static void compositor_engine_update(void *data)
+{
+ COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
+
+ /* Clear any info message that was set in a previous update. */
+ compositor_data->info[0] = '\0';
+
+ if (compositor_data->instance_data) {
+ compositor_data->instance_data->update(DRW_context_state_get()->depsgraph);
+ }
+}
+
+extern "C" {
+
+static const DrawEngineDataSize compositor_data_size = DRW_VIEWPORT_DATA_SIZE(COMPOSITOR_Data);
+
+DrawEngineType draw_engine_compositor_type = {
+ nullptr, /* next */
+ nullptr, /* prev */
+ N_("Compositor"), /* idname */
+ &compositor_data_size, /* vedata_size */
+ &compositor_engine_init, /* engine_init */
+ nullptr, /* engine_free */
+ &compositor_engine_free, /* instance_free */
+ nullptr, /* cache_init */
+ nullptr, /* cache_populate */
+ nullptr, /* cache_finish */
+ &compositor_engine_draw, /* draw_scene */
+ &compositor_engine_update, /* view_update */
+ nullptr, /* id_update */
+ nullptr, /* render_to_image */
+ nullptr, /* store_metadata */
+};
+}
diff --git a/source/blender/draw/engines/compositor/compositor_engine.h b/source/blender/draw/engines/compositor/compositor_engine.h
new file mode 100644
index 00000000000..5de0de8a0b3
--- /dev/null
+++ b/source/blender/draw/engines/compositor/compositor_engine.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern DrawEngineType draw_engine_compositor_type;
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/engines/eevee/eevee_bloom.c b/source/blender/draw/engines/eevee/eevee_bloom.c
index d12ce7213f9..4528027a9ea 100644
--- a/source/blender/draw/engines/eevee/eevee_bloom.c
+++ b/source/blender/draw/engines/eevee/eevee_bloom.c
@@ -125,7 +125,8 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name,
struct GPUShader *sh,
DRWPass **pass,
bool upsample,
- bool resolve)
+ bool resolve,
+ bool resolve_add_base)
{
struct GPUBatch *quad = DRW_cache_fullscreen_quad_get();
@@ -141,7 +142,7 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name,
}
if (resolve) {
DRW_shgroup_uniform_vec3(grp, "bloomColor", effects->bloom_color, 1);
- DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", true);
+ DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", resolve_add_base);
}
return grp;
@@ -193,18 +194,21 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
EEVEE_shaders_bloom_downsample_get(use_antiflicker),
&psl->bloom_downsample_first,
false,
+ false,
false);
eevee_create_bloom_pass("Bloom Downsample",
effects,
EEVEE_shaders_bloom_downsample_get(false),
&psl->bloom_downsample,
false,
+ false,
false);
eevee_create_bloom_pass("Bloom Upsample",
effects,
EEVEE_shaders_bloom_upsample_get(use_highres),
&psl->bloom_upsample,
true,
+ false,
false);
grp = eevee_create_bloom_pass("Bloom Blit",
@@ -212,6 +216,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
EEVEE_shaders_bloom_blit_get(use_antiflicker),
&psl->bloom_blit,
false,
+ false,
false);
DRW_shgroup_uniform_vec4(grp, "curveThreshold", effects->bloom_curve_threshold, 1);
DRW_shgroup_uniform_float(grp, "clampIntensity", &effects->bloom_clamp, 1);
@@ -221,6 +226,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved
EEVEE_shaders_bloom_resolve_get(use_highres),
&psl->bloom_resolve,
true,
+ true,
true);
}
}
@@ -304,13 +310,13 @@ void EEVEE_bloom_output_init(EEVEE_ViewLayerData *UNUSED(sldata),
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(txl->bloom_accum)});
/* Create Pass and shgroup. */
- DRWShadingGroup *grp = eevee_create_bloom_pass("Bloom Accumulate",
- effects,
- EEVEE_shaders_bloom_resolve_get(use_highres),
- &psl->bloom_accum_ps,
- true,
- true);
- DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", false);
+ eevee_create_bloom_pass("Bloom Accumulate",
+ effects,
+ EEVEE_shaders_bloom_resolve_get(use_highres),
+ &psl->bloom_accum_ps,
+ true,
+ true,
+ false);
}
void EEVEE_bloom_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata)
diff --git a/source/blender/draw/engines/eevee/eevee_cryptomatte.c b/source/blender/draw/engines/eevee/eevee_cryptomatte.c
index 33063e14c03..d805a039e8f 100644
--- a/source/blender/draw/engines/eevee/eevee_cryptomatte.c
+++ b/source/blender/draw/engines/eevee/eevee_cryptomatte.c
@@ -25,7 +25,6 @@
* they take into account to create the render passes. When accurate mode is off the number of
* levels is used as the number of cryptomatte samples to take. When accuracy mode is on the number
* of render samples is used.
- *
*/
#include "DRW_engine.h"
@@ -94,7 +93,7 @@ BLI_INLINE int eevee_cryptomatte_pixel_stride(const ViewLayer *view_layer)
/** \} */
/* -------------------------------------------------------------------- */
-/** \name Init Renderpasses
+/** \name Init Render-Passes
* \{ */
void EEVEE_cryptomatte_renderpasses_init(EEVEE_Data *vedata)
@@ -249,7 +248,9 @@ void EEVEE_cryptomatte_object_curves_cache_populate(EEVEE_Data *vedata,
{
BLI_assert(ob->type == OB_CURVES);
Material *material = BKE_object_material_get_eval(ob, CURVES_MATERIAL_NR);
- eevee_cryptomatte_curves_cache_populate(vedata, sldata, ob, NULL, NULL, material);
+ DRWShadingGroup *grp = eevee_cryptomatte_shading_group_create(
+ vedata, sldata, ob, material, true);
+ DRW_shgroup_curves_create_sub(ob, grp, NULL);
}
void EEVEE_cryptomatte_particle_hair_cache_populate(EEVEE_Data *vedata,
@@ -420,27 +421,31 @@ void EEVEE_cryptomatte_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EE
void EEVEE_cryptomatte_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer)
{
+ /* NOTE: Name channels lowercase rgba so that compression rules check in OpenEXR DWA code uses
+ * lossless compression. Reportedly this naming is the only one which works good from the
+ * interoperability point of view. Using XYZW naming is not portable. */
+
char cryptomatte_pass_name[MAX_NAME];
const short num_passes = eevee_cryptomatte_passes_per_layer(view_layer);
if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_OBJECT) != 0) {
for (short pass = 0; pass < num_passes; pass++) {
BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoObject%02d", pass);
RE_engine_register_pass(
- engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+ engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
}
}
if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_MATERIAL) != 0) {
for (short pass = 0; pass < num_passes; pass++) {
BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoMaterial%02d", pass);
RE_engine_register_pass(
- engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+ engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
}
}
if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_ASSET) != 0) {
for (short pass = 0; pass < num_passes; pass++) {
BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoAsset%02d", pass);
RE_engine_register_pass(
- engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA);
+ engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA);
}
}
}
diff --git a/source/blender/draw/engines/eevee/eevee_engine.c b/source/blender/draw/engines/eevee/eevee_engine.c
index 227757bad23..5ae4b730cfa 100644
--- a/source/blender/draw/engines/eevee/eevee_engine.c
+++ b/source/blender/draw/engines/eevee/eevee_engine.c
@@ -109,7 +109,7 @@ void EEVEE_cache_populate(void *vedata, Object *ob)
}
if (DRW_object_is_renderable(ob) && (ob_visibility & OB_VISIBLE_SELF)) {
- if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+ if (ob->type == OB_MESH) {
EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow);
}
else if (ob->type == OB_CURVES) {
@@ -312,12 +312,12 @@ static void eevee_draw_scene(void *vedata)
/* Volumetrics Resolve Opaque */
EEVEE_volumes_resolve(sldata, vedata);
- /* Renderpasses */
+ /* Render-passes. */
EEVEE_renderpasses_output_accumulate(sldata, vedata, false);
/* Transparent */
- /* TODO(fclem): should be its own Frame-buffer.
- * This is needed because dualsource blending only works with 1 color buffer. */
+ /* TODO(@fclem): should be its own Frame-buffer.
+ * This is needed because dual-source blending only works with 1 color buffer. */
GPU_framebuffer_texture_attach(fbl->main_color_fb, dtxl->depth, 0, 0);
GPU_framebuffer_bind(fbl->main_color_fb);
DRW_draw_pass(psl->transparent_pass);
@@ -366,7 +366,7 @@ static void eevee_draw_scene(void *vedata)
static void eevee_view_update(void *vedata)
{
EEVEE_StorageList *stl = ((EEVEE_Data *)vedata)->stl;
- if (stl->g_data) {
+ if (stl && stl->g_data) {
stl->g_data->view_updated = true;
}
}
@@ -451,8 +451,8 @@ static void eevee_render_to_image(void *vedata,
}
EEVEE_PrivateData *g_data = ved->stl->g_data;
- int initial_frame = CFRA;
- float initial_subframe = SUBFRA;
+ int initial_frame = scene->r.cfra;
+ float initial_subframe = scene->r.subframe;
float shuttertime = (do_motion_blur) ? scene->eevee.motion_blur_shutter : 0.0f;
int time_steps_tot = (do_motion_blur) ? max_ii(1, scene->eevee.motion_blur_steps) : 1;
g_data->render_timesteps = time_steps_tot;
@@ -588,7 +588,7 @@ static void eevee_render_to_image(void *vedata,
/* Restore original viewport size. */
DRW_render_viewport_size_set((int[2]){g_data->size_orig[0], g_data->size_orig[1]});
- if (CFRA != initial_frame || SUBFRA != initial_subframe) {
+ if (scene->r.cfra != initial_frame || scene->r.subframe != initial_subframe) {
/* Restore original frame number. This is because the render pipeline expects it. */
RE_engine_frame_set(engine, initial_frame, initial_subframe);
}
diff --git a/source/blender/draw/engines/eevee/eevee_materials.c b/source/blender/draw/engines/eevee/eevee_materials.c
index efd27c19654..94f29d64628 100644
--- a/source/blender/draw/engines/eevee/eevee_materials.c
+++ b/source/blender/draw/engines/eevee/eevee_materials.c
@@ -806,7 +806,7 @@ void EEVEE_materials_cache_populate(EEVEE_Data *vedata,
!DRW_state_is_image_render();
/* First get materials for this mesh. */
- if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+ if (ELEM(ob->type, OB_MESH, OB_SURF)) {
const int materials_len = DRW_cache_object_material_count_get(ob);
EeveeMaterialCache *matcache = BLI_array_alloca(matcache, materials_len);
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h
index ad218d80cdf..8d47d80987c 100644
--- a/source/blender/draw/engines/eevee/eevee_private.h
+++ b/source/blender/draw/engines/eevee/eevee_private.h
@@ -1050,7 +1050,7 @@ typedef struct EEVEE_PrivateData {
float studiolight_glossy_clamp;
float studiolight_filter_quality;
- /* Renderpasses */
+ /* Render-passes */
/* Bitmask containing the active render_passes */
eViewLayerEEVEEPassType render_passes;
uint aov_hash;
diff --git a/source/blender/draw/engines/eevee/eevee_render.c b/source/blender/draw/engines/eevee/eevee_render.c
index bef19c589c2..c3b909f5fb9 100644
--- a/source/blender/draw/engines/eevee/eevee_render.c
+++ b/source/blender/draw/engines/eevee/eevee_render.c
@@ -24,6 +24,7 @@
#include "DEG_depsgraph_query.h"
#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_framebuffer.h"
#include "GPU_state.h"
@@ -223,7 +224,7 @@ void EEVEE_render_cache(void *vedata,
}
if (ob_visibility & OB_VISIBLE_SELF) {
- if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) {
+ if (ob->type == OB_MESH) {
EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow);
if (do_cryptomatte) {
EEVEE_cryptomatte_cache_populate(data, sldata, ob);
@@ -646,6 +647,10 @@ void EEVEE_render_draw(EEVEE_Data *vedata, RenderEngine *engine, RenderLayer *rl
/* XXX Seems to fix TDR issue with NVidia drivers on linux. */
GPU_finish();
+ /* Perform render step between samples to allow
+ * flushing of freed GPUBackend resources. */
+ GPU_render_step();
+
RE_engine_update_progress(engine, (float)(render_samples++) / (float)tot_sample);
}
}
diff --git a/source/blender/draw/engines/eevee/eevee_sampling.c b/source/blender/draw/engines/eevee/eevee_sampling.c
index a1a3e98f34f..34d3cd74b36 100644
--- a/source/blender/draw/engines/eevee/eevee_sampling.c
+++ b/source/blender/draw/engines/eevee/eevee_sampling.c
@@ -74,7 +74,8 @@ void EEVEE_sample_ellipse(int sample_ofs,
BLI_halton_2d(ht_primes, ht_offset, sample_ofs, ht_point);
- /* Decorelate AA and shadow samples. (see T68594) */
+ /* Decorrelate AA and shadow samples. (see T68594) */
+
ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0);
ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0);
@@ -97,7 +98,7 @@ void EEVEE_random_rotation_m4(int sample_ofs, float scale, float r_mat[4][4])
BLI_halton_3d(ht_primes, ht_offset, sample_ofs, ht_point);
- /* Decorelate AA and shadow samples. (see T68594) */
+ /* Decorrelate AA and shadow samples. (see T68594) */
ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0);
ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0);
ht_point[2] = fmod(ht_point[2] * 1151.0, 1.0);
diff --git a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
index 5af794c9158..0d0e551f3dc 100644
--- a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
+++ b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c
@@ -198,7 +198,7 @@ void EEVEE_reflection_compute(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *v
if (((effects->enabled_effects & EFFECT_SSR) != 0) && stl->g_data->valid_double_buffer) {
DRW_stats_group_start("SSR");
- /* Raytrace. */
+ /* Ray-trace. */
GPU_framebuffer_bind(fbl->screen_tracing_fb);
DRW_draw_pass(psl->ssr_raytrace);
diff --git a/source/blender/draw/engines/eevee/eevee_shaders.c b/source/blender/draw/engines/eevee/eevee_shaders.c
index 5709621fc05..04d1168a30d 100644
--- a/source/blender/draw/engines/eevee/eevee_shaders.c
+++ b/source/blender/draw/engines/eevee/eevee_shaders.c
@@ -181,6 +181,7 @@ extern char datatoc_closure_type_lib_glsl[];
extern char datatoc_closure_eval_volume_lib_glsl[];
extern char datatoc_common_uniforms_lib_glsl[];
extern char datatoc_common_utiltex_lib_glsl[];
+extern char datatoc_cryptomatte_lib_glsl[];
extern char datatoc_cryptomatte_frag_glsl[];
extern char datatoc_cryptomatte_vert_glsl[];
extern char datatoc_cubemap_lib_glsl[];
@@ -304,6 +305,7 @@ static void eevee_shader_library_ensure(void)
DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_refraction_lib);
DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_surface_lib);
DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_volume_lib);
+ DRW_SHADER_LIB_ADD(e_data.lib, cryptomatte_lib);
DRW_SHADER_LIB_ADD(e_data.lib, surface_vert);
e_data.surface_lit_frag = DRW_shader_library_create_shader_string(e_data.lib,
@@ -1190,8 +1192,8 @@ Material *EEVEE_material_default_diffuse_get(void)
if (!e_data.diffuse_mat) {
Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default diffuse");
- bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
- ma->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
ma->use_nodes = true;
bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_DIFFUSE);
@@ -1217,8 +1219,8 @@ Material *EEVEE_material_default_glossy_get(void)
if (!e_data.glossy_mat) {
Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default metal");
- bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
- ma->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
ma->use_nodes = true;
bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_GLOSSY);
@@ -1246,8 +1248,8 @@ Material *EEVEE_material_default_error_get(void)
if (!e_data.error_mat) {
Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default error");
- bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname);
- ma->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname);
ma->use_nodes = true;
/* Use emission and output material to be compatible with both World and Material. */
diff --git a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
index 536242f67d8..a3ab4cdb830 100644
--- a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
+++ b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c
@@ -357,7 +357,7 @@ static void eevee_shadow_cascade_setup(EEVEE_LightsInfo *linfo,
mul_m4_m4m4(csm_data->shadowmat[c], texcomat, viewprojmat);
#ifdef DEBUG_CSM
- DRW_debug_m4_as_bbox(viewprojmat, dbg_col, true);
+ DRW_debug_m4_as_bbox(viewprojmat, true, dbg_col);
#endif
}
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index b8bef61f8b1..2d96cffb4ba 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -30,6 +30,7 @@
#include "DEG_depsgraph_query.h"
#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "GPU_texture.h"
#include "eevee_private.h"
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
+ /* Clamp 3D texture size based on device maximum. */
+ int maxSize = GPU_max_texture_3d_size();
+ BLI_assert(tex_size[0] <= maxSize);
+ tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
+ tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
+ tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
+
common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
@@ -306,9 +314,14 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata,
return;
}
+ GPUShader *sh = GPU_material_get_shader(mat);
+ if (sh == NULL) {
+ return;
+ }
+
/* TODO(fclem): Reuse main shading group to avoid shading binding cost just like for surface
* shaders. */
- DRWShadingGroup *grp = DRW_shgroup_material_create(mat, vedata->psl->volumetric_objects_ps);
+ DRWShadingGroup *grp = DRW_shgroup_create(sh, vedata->psl->volumetric_objects_ps);
grp = DRW_shgroup_volume_create_sub(scene, ob, grp, mat);
@@ -316,6 +329,8 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata,
return;
}
+ DRW_shgroup_add_material_resources(grp, mat);
+
/* TODO(fclem): remove those "unnecessary" UBOs */
DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
diff --git a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
index 0f5290a7c07..ffca97b6b8f 100644
--- a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl
@@ -181,6 +181,8 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection)
/* Glue with the old system. */
CLOSURE_VARS_DECLARE_2(Diffuse, Glossy);
+ /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */
+ in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0;
in_Diffuse_0.N = diffuse.N;
in_Diffuse_0.albedo = diffuse.color;
in_Glossy_1.N = reflection.N;
@@ -207,6 +209,8 @@ Closure closure_eval(ClosureDiffuse diffuse,
/* Glue with the old system. */
CLOSURE_VARS_DECLARE_3(Diffuse, Glossy, Glossy);
+ /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */
+ in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0;
in_Diffuse_0.N = diffuse.N;
in_Diffuse_0.albedo = diffuse.color;
in_Glossy_1.N = reflection.N;
diff --git a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
index 4070ede116b..eeccb393a5c 100644
--- a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl
@@ -6,8 +6,8 @@
#ifndef VOLUMETRICS
-uniform int outputSsrId; /*Default = 1;*/
-uniform int outputSssId; /*Default = 1;*/
+uniform int outputSsrId; /* Default = 1; */
+uniform int outputSssId; /* Default = 1; */
#endif
diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl
new file mode 100644
index 00000000000..0f8810ff7ac
--- /dev/null
+++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl
@@ -0,0 +1,19 @@
+/* NOTE: this lib is included in the cryptomatte vertex shader to work around the issue that eevee
+ * cannot use create infos for its static shaders. Keep in sync with draw_shader_shared.h */
+#ifdef HAIR_SHADER
+/* Define the maximum number of attribute we allow in a curves UBO.
+ * This should be kept in sync with `GPU_ATTR_MAX` */
+# define DRW_ATTRIBUTE_PER_CURVES_MAX 15
+
+struct CurvesInfos {
+ /* Per attribute scope, follows loading order.
+ * NOTE: uint as bool in GLSL is 4 bytes.
+ * NOTE: GLSL pad arrays of scalar to 16 bytes (std140). */
+ uvec4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
+};
+layout(std140) uniform drw_curves
+{
+ CurvesInfos _drw_curves;
+};
+# define drw_curves (_drw_curves)
+#endif
diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
index f8dbc4772e9..14fbc98469a 100644
--- a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl
@@ -3,4 +3,5 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_attribute_lib.glsl)
+#pragma BLENDER_REQUIRE(cryptomatte_lib.glsl)
#pragma BLENDER_REQUIRE(surface_vert.glsl)
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
index 688ae4915e1..7dec30a96b1 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl
@@ -124,7 +124,7 @@ void dof_slight_focus_gather(float radius, out vec4 out_color, out float out_wei
dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion);
dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion);
- /* Fix weighting issues on perfectly focus > slight focus transitionning areas. */
+ /* Fix weighting issues on perfectly focus > slight focus transitioning areas. */
if (abs(center_data.coc) < 0.5) {
bg_col = center_data.color;
bg_weight = 1.0;
diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
index 06dcbeaed66..7230758a93f 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl
@@ -67,7 +67,7 @@ void main(void)
/* Occlude the sprite with geometry from the same field
* using a VSM like chebychev test (slide 85). */
float mean = occlusion_data.x;
- float variance = occlusion_data.x;
+ float variance = occlusion_data.y;
shapes *= variance * safe_rcp(variance + sqr(max(cocs * correction_fac - mean, 0.0)));
}
diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
index 9ecc50d9df5..c7f6687d2e2 100644
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl
@@ -100,7 +100,7 @@ void main()
coef = 0.315392 * (3.0 * cubevec.y * cubevec.y - 1.0) * 1.0 / 4.0;
}
else if (comp == 7) {
- coef = 1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0;
+ coef = -1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0;
}
else { /* (comp == 8) */
coef = 0.546274 * (cubevec.x * cubevec.x - cubevec.z * cubevec.z) * 1.0 / 4.0;
diff --git a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
index 15c68dc5829..87e944a2ac0 100644
--- a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl
@@ -91,3 +91,17 @@ void main()
}
#endif
}
+
+/* Passthrough. */
+float attr_load_temperature_post(float attr)
+{
+ return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+ return attr;
+}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
index 2926f8c5a89..062a40f35c2 100644
--- a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl
@@ -73,7 +73,7 @@ int g_curves_attr_id = 0;
int curves_attribute_element_id()
{
int id = hairStrandID;
- if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+ if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
id = hair_get_base_id();
}
@@ -152,3 +152,7 @@ vec4 attr_load_color_post(vec4 attr)
{
return attr;
}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
index ace6c7d788d..88755705a53 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl
@@ -152,7 +152,8 @@ void main()
/* Only supported attrib for world/background shaders. */
vec3 attr_load_orco(vec4 orco)
{
- return g_data.P;
+ /* Retain precision better than g_data.P (see T99128). */
+ return -normal_view_to_world(viewCameraVec(viewPosition));
}
/* Unsupported. */
vec4 attr_load_tangent(vec4 tangent)
@@ -181,3 +182,7 @@ vec4 attr_load_color_post(vec4 attr)
{
return attr;
}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
index 8e1bafe8d92..69762027643 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl
@@ -97,11 +97,12 @@ GlobalData init_globals(void)
GlobalData surf;
# if defined(WORLD_BACKGROUND) || defined(PROBE_CAPTURE)
- surf.P = -cameraVec(worldPosition);
- surf.N = surf.Ng = -surf.P;
+ surf.P = transform_direction(ViewMatrixInverse, -viewCameraVec(viewPosition));
+ surf.N = surf.Ng = surf.Ni = -surf.P;
surf.ray_length = 0.0;
# else
surf.P = worldPosition;
+ surf.Ni = worldNormal;
surf.N = safe_normalize(worldNormal);
surf.Ng = safe_normalize(cross(dFdx(surf.P), dFdy(surf.P)));
surf.ray_length = distance(surf.P, cameraPos);
@@ -109,6 +110,7 @@ GlobalData init_globals(void)
surf.barycentric_coords = vec2(0.0);
surf.barycentric_dists = vec3(0.0);
surf.N = (FrontFacing) ? surf.N : -surf.N;
+ surf.Ni = (FrontFacing) ? surf.Ni : -surf.Ni;
# ifdef HAIR_SHADER
vec3 V = cameraVec(surf.P);
/* Shade as a cylinder. */
@@ -123,7 +125,7 @@ GlobalData init_globals(void)
cos_theta = hairThickTime / hairThickness;
}
float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta));
- surf.N = safe_normalize(worldNormal * sin_theta + B * cos_theta);
+ surf.N = surf.Ni = safe_normalize(worldNormal * sin_theta + B * cos_theta);
surf.curve_T = -hairTangent;
/* Costly, but follows cycles per pixel tangent space (not following curve shape). */
surf.curve_B = cross(V, surf.curve_T);
diff --git a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
index a8e95e13b12..54aad7891dc 100644
--- a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl
@@ -80,7 +80,7 @@ int g_curves_attr_id = 0;
int curves_attribute_element_id()
{
int id = hairStrandID;
- if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+ if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
id = hair_get_base_id();
}
@@ -165,3 +165,7 @@ vec4 attr_load_color_post(vec4 attr)
{
return attr;
}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
index 88ade8451a4..9ed21fc0bf5 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl
@@ -86,6 +86,8 @@ void main()
discard;
return;
}
+#else /* WORLD_SHADER */
+ volumeOrco = worldPosition;
#endif
#ifdef CLEAR
@@ -176,3 +178,7 @@ vec4 attr_load_color_post(vec4 attr)
#endif
return attr;
}
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
index b3b9c7af19c..2d51fbd9edc 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl
@@ -87,3 +87,8 @@ vec4 attr_load_color_post(vec4 attr)
{
return attr;
}
+
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+ return attr;
+}
diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.cc b/source/blender/draw/engines/eevee_next/eevee_camera.cc
index e6d2e2db764..b9040f0f3ab 100644
--- a/source/blender/draw/engines/eevee_next/eevee_camera.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_camera.cc
@@ -29,10 +29,8 @@ namespace blender::eevee {
void Camera::init()
{
const Object *camera_eval = inst_.camera_eval_object;
- synced_ = false;
- data_.swap();
- CameraData &data = data_.current();
+ CameraData &data = data_;
if (camera_eval) {
const ::Camera *cam = reinterpret_cast<const ::Camera *>(camera_eval->data);
@@ -77,9 +75,8 @@ void Camera::init()
void Camera::sync()
{
const Object *camera_eval = inst_.camera_eval_object;
- CameraData &data = data_.current();
- data.filter_size = inst_.scene->r.gauss;
+ CameraData &data = data_;
if (inst_.drw_view) {
DRW_view_viewmat_get(inst_.drw_view, data.viewmat.ptr(), false);
@@ -127,6 +124,10 @@ void Camera::sync()
data.equirect_scale *= data.uv_scale;
data.equirect_scale_inv = 1.0f / data.equirect_scale;
+#else
+ data.fisheye_fov = data.fisheye_lens = -1.0f;
+ data.equirect_bias = float2(0.0f);
+ data.equirect_scale = float2(0.0f);
#endif
}
else if (inst_.drw_view) {
@@ -137,14 +138,8 @@ void Camera::sync()
data.equirect_scale = float2(0.0f);
}
- data_.current().push_update();
-
- synced_ = true;
-
- /* Detect changes in parameters. */
- if (data_.current() != data_.previous()) {
- // inst_.sampling.reset();
- }
+ data_.initialized = true;
+ data_.push_update();
}
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.hh b/source/blender/draw/engines/eevee_next/eevee_camera.hh
index dfec738b1f3..49f9b14e11b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_camera.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_camera.hh
@@ -61,8 +61,7 @@ inline bool operator==(const CameraData &a, const CameraData &b)
return compare_m4m4(a.persmat.ptr(), b.persmat.ptr(), FLT_MIN) && (a.uv_scale == b.uv_scale) &&
(a.uv_bias == b.uv_bias) && (a.equirect_scale == b.equirect_scale) &&
(a.equirect_bias == b.equirect_bias) && (a.fisheye_fov == b.fisheye_fov) &&
- (a.fisheye_lens == b.fisheye_lens) && (a.filter_size == b.filter_size) &&
- (a.type == b.type);
+ (a.fisheye_lens == b.fisheye_lens) && (a.type == b.type);
}
inline bool operator!=(const CameraData &a, const CameraData &b)
@@ -83,10 +82,7 @@ class Camera {
private:
Instance &inst_;
- /** Double buffered to detect changes and have history for re-projection. */
- SwapChain<CameraDataBuf, 2> data_;
- /** Detects wrong usage. */
- bool synced_ = false;
+ CameraDataBuf data_;
public:
Camera(Instance &inst) : inst_(inst){};
@@ -100,28 +96,32 @@ class Camera {
**/
const CameraData &data_get() const
{
- BLI_assert(synced_);
- return data_.current();
+ BLI_assert(data_.initialized);
+ return data_;
}
const GPUUniformBuf *ubo_get() const
{
- return data_.current();
+ return data_;
}
bool is_panoramic() const
{
- return eevee::is_panoramic(data_.current().type);
+ return eevee::is_panoramic(data_.type);
}
bool is_orthographic() const
{
- return data_.current().type == CAMERA_ORTHO;
+ return data_.type == CAMERA_ORTHO;
+ }
+ bool is_perspective() const
+ {
+ return data_.type == CAMERA_PERSP;
}
const float3 &position() const
{
- return *reinterpret_cast<const float3 *>(data_.current().viewinv[3]);
+ return *reinterpret_cast<const float3 *>(data_.viewinv[3]);
}
const float3 &forward() const
{
- return *reinterpret_cast<const float3 *>(data_.current().viewinv[2]);
+ return *reinterpret_cast<const float3 *>(data_.viewinv[2]);
}
};
diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh
index f75ebd2bd13..2f338e707c0 100644
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -11,12 +11,18 @@
#pragma once
-/**
- * Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536.
- * Current limiting factor is the sorting phase which is single pass and only sort within a
- * thread-group which maximum size is 1024.
- */
-#define CULLING_BATCH_SIZE 1024
+/* Hierarchical Z down-sampling. */
+#define HIZ_MIP_COUNT 8
+/* NOTE: The shader is written to update 5 mipmaps using LDS. */
+#define HIZ_GROUP_SIZE 32
+
+/* Avoid too much overhead caused by resizing the light buffers too many time. */
+#define LIGHT_CHUNK 256
+
+#define CULLING_SELECT_GROUP_SIZE 256
+#define CULLING_SORT_GROUP_SIZE 256
+#define CULLING_ZBIN_GROUP_SIZE 1024
+#define CULLING_TILE_GROUP_SIZE 1024
/**
* IMPORTANT: Some data packing are tweaked for these values.
@@ -34,12 +40,65 @@
#define SHADOW_MAX_PAGE 4096
#define SHADOW_PAGE_PER_ROW 64
-#define HIZ_MIP_COUNT 6u
-/* Group size is 2x smaller because we simply copy the level 0. */
-#define HIZ_GROUP_SIZE 1u << (HIZ_MIP_COUNT - 2u)
-
+/* Ray-tracing. */
#define RAYTRACE_GROUP_SIZE 16
#define RAYTRACE_MAX_TILES (16384 / RAYTRACE_GROUP_SIZE) * (16384 / RAYTRACE_GROUP_SIZE)
/* Minimum visibility size. */
#define LIGHTPROBE_FILTER_VIS_GROUP_SIZE 16
+
+/* Film. */
+#define FILM_GROUP_SIZE 16
+
+/* Motion Blur. */
+#define MOTION_BLUR_GROUP_SIZE 32
+#define MOTION_BLUR_DILATE_GROUP_SIZE 512
+
+/* Depth Of Field. */
+#define DOF_TILES_SIZE 8
+#define DOF_TILES_FLATTEN_GROUP_SIZE DOF_TILES_SIZE
+#define DOF_TILES_DILATE_GROUP_SIZE 8
+#define DOF_BOKEH_LUT_SIZE 32
+#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5
+#define DOF_SLIGHT_FOCUS_SAMPLE_MAX 16
+#define DOF_MIP_COUNT 4
+#define DOF_REDUCE_GROUP_SIZE (1 << (DOF_MIP_COUNT - 1))
+#define DOF_DEFAULT_GROUP_SIZE 32
+#define DOF_STABILIZE_GROUP_SIZE 16
+#define DOF_FILTER_GROUP_SIZE 8
+#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
+#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
+
+/* Resource bindings. */
+
+/* Texture. */
+#define RBUFS_UTILITY_TEX_SLOT 14
+
+/* Images. */
+#define RBUFS_NORMAL_SLOT 0
+#define RBUFS_LIGHT_SLOT 1
+#define RBUFS_DIFF_COLOR_SLOT 2
+#define RBUFS_SPEC_COLOR_SLOT 3
+#define RBUFS_EMISSION_SLOT 4
+#define RBUFS_AOV_COLOR_SLOT 5
+#define RBUFS_AOV_VALUE_SLOT 6
+
+/* Uniform Buffers. */
+/* Only during prepass. */
+#define VELOCITY_CAMERA_PREV_BUF 3
+#define VELOCITY_CAMERA_CURR_BUF 4
+#define VELOCITY_CAMERA_NEXT_BUF 5
+
+/* Storage Buffers. */
+#define LIGHT_CULL_BUF_SLOT 0
+#define LIGHT_BUF_SLOT 1
+#define LIGHT_ZBIN_BUF_SLOT 2
+#define LIGHT_TILE_BUF_SLOT 3
+#define RBUFS_AOV_BUF_SLOT 5
+#define SAMPLING_BUF_SLOT 6
+/* Only during pre-pass. */
+#define VELOCITY_OBJ_PREV_BUF_SLOT 0
+#define VELOCITY_OBJ_NEXT_BUF_SLOT 1
+#define VELOCITY_GEO_PREV_BUF_SLOT 2
+#define VELOCITY_GEO_NEXT_BUF_SLOT 3
+#define VELOCITY_INDIRECTION_BUF_SLOT 4
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
new file mode 100644
index 00000000000..bc0891ceb92
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
@@ -0,0 +1,761 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Depth of field post process effect.
+ *
+ * There are 2 methods to achieve this effect.
+ * - The first uses projection matrix offsetting and sample accumulation to give
+ * reference quality depth of field. But this needs many samples to hide the
+ * under-sampling.
+ * - The second one is a post-processing based one. It follows the
+ * implementation described in the presentation
+ * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
+ * There are some difference with our actual implementation that prioritize quality.
+ */
+
+#include "DRW_render.h"
+
+#include "BKE_camera.h"
+#include "DNA_camera_types.h"
+
+#include "GPU_platform.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+
+#include "eevee_camera.hh"
+#include "eevee_instance.hh"
+#include "eevee_sampling.hh"
+#include "eevee_shader.hh"
+#include "eevee_shader_shared.hh"
+
+#include "eevee_depth_of_field.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+void DepthOfField::init()
+{
+ const SceneEEVEE &sce_eevee = inst_.scene->eevee;
+ const Object *camera_object_eval = inst_.camera_eval_object;
+ const ::Camera *camera = (camera_object_eval) ?
+ reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
+ nullptr;
+ if (camera == nullptr) {
+ /* Set to invalid value for update detection */
+ data_.scatter_color_threshold = -1.0f;
+ return;
+ }
+ /* Reminder: These are parameters not interpolated by motion blur. */
+ int update = 0;
+ int sce_flag = sce_eevee.flag;
+ update += assign_if_different(do_jitter_, (sce_flag & SCE_EEVEE_DOF_JITTER) != 0);
+ update += assign_if_different(user_overblur_, sce_eevee.bokeh_overblur / 100.0f);
+ update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size);
+ update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold);
+ update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max);
+ update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades));
+ if (update > 0) {
+ inst_.sampling.reset();
+ }
+}
+
+void DepthOfField::sync()
+{
+ const Camera &camera = inst_.camera;
+ const Object *camera_object_eval = inst_.camera_eval_object;
+ const ::Camera *camera_data = (camera_object_eval) ?
+ reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
+ nullptr;
+
+ int update = 0;
+
+ if (camera_data == nullptr || (camera_data->dof.flag & CAM_DOF_ENABLED) == 0) {
+ update += assign_if_different(jitter_radius_, 0.0f);
+ update += assign_if_different(fx_radius_, 0.0f);
+ if (update > 0) {
+ inst_.sampling.reset();
+ }
+ return;
+ }
+
+ float2 anisotropic_scale = {clamp_f(1.0f / camera_data->dof.aperture_ratio, 1e-5f, 1.0f),
+ clamp_f(camera_data->dof.aperture_ratio, 1e-5f, 1.0f)};
+ update += assign_if_different(data_.bokeh_anisotropic_scale, anisotropic_scale);
+ update += assign_if_different(data_.bokeh_rotation, camera_data->dof.aperture_rotation);
+ update += assign_if_different(focus_distance_,
+ BKE_camera_object_dof_distance(camera_object_eval));
+ data_.bokeh_anisotropic_scale_inv = 1.0f / data_.bokeh_anisotropic_scale;
+
+ float fstop = max_ff(camera_data->dof.aperture_fstop, 1e-5f);
+
+ if (update) {
+ inst_.sampling.reset();
+ }
+
+ float aperture = 1.0f / (2.0f * fstop);
+ if (camera.is_perspective()) {
+ aperture *= camera_data->lens * 1e-3f;
+ }
+
+ if (camera.is_orthographic()) {
+ /* FIXME: Why is this needed? Some kind of implicit unit conversion? */
+ aperture *= 0.04f;
+ /* Really strange behavior from Cycles but replicating. */
+ focus_distance_ += camera.data_get().clip_near;
+ }
+
+ if (camera.is_panoramic()) {
+ /* FIXME: Eyeballed. */
+ aperture *= 0.185f;
+ }
+
+ if (camera_data->dof.aperture_ratio < 1.0) {
+ /* If ratio is scaling the bokeh outwards, we scale the aperture so that
+ * the gather kernel size will encompass the maximum axis. */
+ aperture /= max_ff(camera_data->dof.aperture_ratio, 1e-5f);
+ }
+
+ float jitter_radius, fx_radius;
+
+ /* Balance blur radius between fx dof and jitter dof. */
+ if (do_jitter_ && (inst_.sampling.dof_ring_count_get() > 0) && !camera.is_panoramic() &&
+ !inst_.is_viewport()) {
+ /* Compute a minimal overblur radius to fill the gaps between the samples.
+ * This is just the simplified form of dividing the area of the bokeh by
+ * the number of samples. */
+ float minimal_overblur = 1.0f / sqrtf(inst_.sampling.dof_sample_count_get());
+
+ fx_radius = (minimal_overblur + user_overblur_) * aperture;
+ /* Avoid dilating the shape. Over-blur only soften. */
+ jitter_radius = max_ff(0.0f, aperture - fx_radius);
+ }
+ else {
+ jitter_radius = 0.0f;
+ fx_radius = aperture;
+ }
+
+ /* Disable post fx if result wouldn't be noticeable. */
+ if (fx_max_coc_ <= 0.5f) {
+ fx_radius = 0.0f;
+ }
+
+ update += assign_if_different(jitter_radius_, jitter_radius);
+ update += assign_if_different(fx_radius_, fx_radius);
+ if (update > 0) {
+ inst_.sampling.reset();
+ }
+
+ if (fx_radius_ == 0.0f) {
+ return;
+ }
+
+ /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */
+ int2 max_render_res = inst_.film.render_extent_get();
+ int2 half_res = math::divide_ceil(max_render_res, int2(2));
+ int2 reduce_size = math::ceil_to_multiple(half_res, int2(DOF_REDUCE_GROUP_SIZE));
+
+ data_.gather_uv_fac = 1.0f / float2(reduce_size);
+
+ /* Now that we know the maximum render resolution of every view, using depth of field, allocate
+ * the reduced buffers. Color needs to be signed format here. See note in shader for
+ * explanation. Do not use texture pool because of needs mipmaps. */
+ reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT);
+ reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT);
+ reduced_color_tx_.ensure_mip_views();
+ reduced_coc_tx_.ensure_mip_views();
+
+ /* Resize the scatter list to contain enough entry to cover half the screen with sprites (which
+ * is unlikely due to local contrast test). */
+ data_.scatter_max_rect = (reduced_color_tx_.pixel_count() / 4) / 2;
+ scatter_fg_list_buf_.resize(data_.scatter_max_rect);
+ scatter_bg_list_buf_.resize(data_.scatter_max_rect);
+
+ bokeh_lut_pass_sync();
+ setup_pass_sync();
+ stabilize_pass_sync();
+ downsample_pass_sync();
+ reduce_pass_sync();
+ tiles_flatten_pass_sync();
+ tiles_dilate_pass_sync();
+ gather_pass_sync();
+ filter_pass_sync();
+ scatter_pass_sync();
+ hole_fill_pass_sync();
+ resolve_pass_sync();
+}
+
+void DepthOfField::jitter_apply(float4x4 &winmat, float4x4 &viewmat)
+{
+ if (jitter_radius_ == 0.0f) {
+ return;
+ }
+
+ float radius, theta;
+ inst_.sampling.dof_disk_sample_get(&radius, &theta);
+
+ if (data_.bokeh_blades >= 3.0f) {
+ theta = circle_to_polygon_angle(data_.bokeh_blades, theta);
+ radius *= circle_to_polygon_radius(data_.bokeh_blades, theta);
+ }
+ radius *= jitter_radius_;
+ theta += data_.bokeh_rotation;
+
+ /* Sample in View Space. */
+ float2 sample = float2(radius * cosf(theta), radius * sinf(theta));
+ sample *= data_.bokeh_anisotropic_scale;
+ /* Convert to NDC Space. */
+ float3 jitter = float3(UNPACK2(sample), -focus_distance_);
+ float3 center = float3(0.0f, 0.0f, -focus_distance_);
+ mul_project_m4_v3(winmat.ptr(), jitter);
+ mul_project_m4_v3(winmat.ptr(), center);
+
+ const bool is_ortho = (winmat[2][3] != -1.0f);
+ if (is_ortho) {
+ sample *= focus_distance_;
+ }
+ /* Translate origin. */
+ sub_v2_v2(viewmat[3], sample);
+ /* Skew winmat Z axis. */
+ add_v2_v2(winmat[2], center - jitter);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Passes setup.
+ * \{ */
+
+void DepthOfField::bokeh_lut_pass_sync()
+{
+ const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f);
+ if (!has_anisotropy && (data_.bokeh_blades == 0.0)) {
+ /* No need for LUTs in these cases. */
+ use_bokeh_lut_ = false;
+ return;
+ }
+ use_bokeh_lut_ = true;
+
+ /* Precompute bokeh texture. */
+ bokeh_lut_ps_.init();
+ bokeh_lut_ps_.shader_set(inst_.shaders.static_shader_get(DOF_BOKEH_LUT));
+ bokeh_lut_ps_.bind_ubo("dof_buf", data_);
+ bokeh_lut_ps_.bind_image("out_gather_lut_img", &bokeh_gather_lut_tx_);
+ bokeh_lut_ps_.bind_image("out_scatter_lut_img", &bokeh_scatter_lut_tx_);
+ bokeh_lut_ps_.bind_image("out_resolve_lut_img", &bokeh_resolve_lut_tx_);
+ bokeh_lut_ps_.dispatch(int3(1, 1, 1));
+}
+
+void DepthOfField::setup_pass_sync()
+{
+ RenderBuffers &render_buffers = inst_.render_buffers;
+
+ setup_ps_.init();
+ setup_ps_.shader_set(inst_.shaders.static_shader_get(DOF_SETUP));
+ setup_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
+ setup_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+ setup_ps_.bind_ubo("dof_buf", data_);
+ setup_ps_.bind_image("out_color_img", &setup_color_tx_);
+ setup_ps_.bind_image("out_coc_img", &setup_coc_tx_);
+ setup_ps_.dispatch(&dispatch_setup_size_);
+ setup_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::stabilize_pass_sync()
+{
+ RenderBuffers &render_buffers = inst_.render_buffers;
+ VelocityModule &velocity = inst_.velocity;
+
+ stabilize_ps_.init();
+ stabilize_ps_.shader_set(inst_.shaders.static_shader_get(DOF_STABILIZE));
+ stabilize_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+ stabilize_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+ /* This is only for temporal stability. The next step is not needed. */
+ stabilize_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
+ stabilize_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
+ stabilize_ps_.bind_texture("color_tx", &setup_color_tx_, no_filter);
+ stabilize_ps_.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
+ stabilize_ps_.bind_texture("in_history_tx", &stabilize_input_, with_filter);
+ stabilize_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+ stabilize_ps_.bind_ubo("dof_buf", data_);
+ stabilize_ps_.push_constant("use_history", &stabilize_valid_history_, 1);
+ stabilize_ps_.bind_image("out_coc_img", reduced_coc_tx_.mip_view(0));
+ stabilize_ps_.bind_image("out_color_img", reduced_color_tx_.mip_view(0));
+ stabilize_ps_.bind_image("out_history_img", &stabilize_output_tx_);
+ stabilize_ps_.dispatch(&dispatch_stabilize_size_);
+ stabilize_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
+}
+
+void DepthOfField::downsample_pass_sync()
+{
+ downsample_ps_.init();
+ downsample_ps_.shader_set(inst_.shaders.static_shader_get(DOF_DOWNSAMPLE));
+ downsample_ps_.bind_texture("color_tx", reduced_color_tx_.mip_view(0), no_filter);
+ downsample_ps_.bind_texture("coc_tx", reduced_coc_tx_.mip_view(0), no_filter);
+ downsample_ps_.bind_image("out_color_img", &downsample_tx_);
+ downsample_ps_.dispatch(&dispatch_downsample_size_);
+ downsample_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::reduce_pass_sync()
+{
+ reduce_ps_.init();
+ reduce_ps_.shader_set(inst_.shaders.static_shader_get(DOF_REDUCE));
+ reduce_ps_.bind_ubo("dof_buf", data_);
+ reduce_ps_.bind_texture("downsample_tx", &downsample_tx_, no_filter);
+ reduce_ps_.bind_ssbo("scatter_fg_list_buf", scatter_fg_list_buf_);
+ reduce_ps_.bind_ssbo("scatter_bg_list_buf", scatter_bg_list_buf_);
+ reduce_ps_.bind_ssbo("scatter_fg_indirect_buf", scatter_fg_indirect_buf_);
+ reduce_ps_.bind_ssbo("scatter_bg_indirect_buf", scatter_bg_indirect_buf_);
+ reduce_ps_.bind_image("inout_color_lod0_img", reduced_color_tx_.mip_view(0));
+ reduce_ps_.bind_image("out_color_lod1_img", reduced_color_tx_.mip_view(1));
+ reduce_ps_.bind_image("out_color_lod2_img", reduced_color_tx_.mip_view(2));
+ reduce_ps_.bind_image("out_color_lod3_img", reduced_color_tx_.mip_view(3));
+ reduce_ps_.bind_image("in_coc_lod0_img", reduced_coc_tx_.mip_view(0));
+ reduce_ps_.bind_image("out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
+ reduce_ps_.bind_image("out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
+ reduce_ps_.bind_image("out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
+ reduce_ps_.dispatch(&dispatch_reduce_size_);
+ /* NOTE: Command buffer barrier is done automatically by the GPU backend. */
+ reduce_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
+}
+
+void DepthOfField::tiles_flatten_pass_sync()
+{
+ tiles_flatten_ps_.init();
+ tiles_flatten_ps_.shader_set(inst_.shaders.static_shader_get(DOF_TILES_FLATTEN));
+ /* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus
+ * flag from the setup pass. A better way would be to do the brute-force in focus gather without
+ * this. */
+ tiles_flatten_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
+ tiles_flatten_ps_.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
+ tiles_flatten_ps_.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
+ tiles_flatten_ps_.dispatch(&dispatch_tiles_flatten_size_);
+ tiles_flatten_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+}
+
+void DepthOfField::tiles_dilate_pass_sync()
+{
+ for (int pass = 0; pass < 2; pass++) {
+ PassSimple &drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_;
+ eShaderType sh_type = (pass == 0) ? DOF_TILES_DILATE_MINMAX : DOF_TILES_DILATE_MINABS;
+ drw_pass.init();
+ drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
+ drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.previous());
+ drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.previous());
+ drw_pass.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
+ drw_pass.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
+ drw_pass.push_constant("ring_count", &tiles_dilate_ring_count_, 1);
+ drw_pass.push_constant("ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1);
+ drw_pass.dispatch(&dispatch_tiles_dilate_size_);
+ drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+ }
+}
+
+void DepthOfField::gather_pass_sync()
+{
+ for (int pass = 0; pass < 2; pass++) {
+ PassSimple &drw_pass = (pass == 0) ? gather_fg_ps_ : gather_bg_ps_;
+ SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
+ SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
+ eShaderType sh_type = (pass == 0) ?
+ (use_bokeh_lut_ ? DOF_GATHER_FOREGROUND_LUT :
+ DOF_GATHER_FOREGROUND) :
+ (use_bokeh_lut_ ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND);
+ drw_pass.init();
+ inst_.sampling.bind_resources(&drw_pass);
+ drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
+ drw_pass.bind_ubo("dof_buf", data_);
+ drw_pass.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
+ drw_pass.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
+ drw_pass.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
+ drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+ drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+ drw_pass.bind_image("out_color_img", &color_chain.current());
+ drw_pass.bind_image("out_weight_img", &weight_chain.current());
+ drw_pass.bind_image("out_occlusion_img", &occlusion_tx_);
+ drw_pass.bind_texture("bokeh_lut_tx", &bokeh_gather_lut_tx_);
+ drw_pass.dispatch(&dispatch_gather_size_);
+ drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
+ }
+}
+
+void DepthOfField::filter_pass_sync()
+{
+ for (int pass = 0; pass < 2; pass++) {
+ PassSimple &drw_pass = (pass == 0) ? filter_fg_ps_ : filter_bg_ps_;
+ SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
+ SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
+ drw_pass.init();
+ drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_FILTER));
+ drw_pass.bind_texture("color_tx", &color_chain.previous());
+ drw_pass.bind_texture("weight_tx", &weight_chain.previous());
+ drw_pass.bind_image("out_color_img", &color_chain.current());
+ drw_pass.bind_image("out_weight_img", &weight_chain.current());
+ drw_pass.dispatch(&dispatch_filter_size_);
+ drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
+ }
+}
+
+void DepthOfField::scatter_pass_sync()
+{
+ for (int pass = 0; pass < 2; pass++) {
+ PassSimple &drw_pass = (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_;
+ drw_pass.init();
+ drw_pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL);
+ drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_SCATTER));
+ drw_pass.push_constant("use_bokeh_lut", use_bokeh_lut_);
+ drw_pass.bind_texture("bokeh_lut_tx", &bokeh_scatter_lut_tx_);
+ drw_pass.bind_texture("occlusion_tx", &occlusion_tx_);
+ if (pass == 0) {
+ drw_pass.bind_ssbo("scatter_list_buf", scatter_fg_list_buf_);
+ drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_fg_indirect_buf_);
+ /* Avoid background gather pass writing to the occlusion_tx mid pass. */
+ drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+ }
+ else {
+ drw_pass.bind_ssbo("scatter_list_buf", scatter_bg_list_buf_);
+ drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_bg_indirect_buf_);
+ }
+ }
+}
+
+void DepthOfField::hole_fill_pass_sync()
+{
+ hole_fill_ps_.init();
+ inst_.sampling.bind_resources(&hole_fill_ps_);
+ hole_fill_ps_.shader_set(inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL));
+ hole_fill_ps_.bind_ubo("dof_buf", data_);
+ hole_fill_ps_.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
+ hole_fill_ps_.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
+ hole_fill_ps_.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
+ hole_fill_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+ hole_fill_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+ hole_fill_ps_.bind_image("out_color_img", &hole_fill_color_tx_);
+ hole_fill_ps_.bind_image("out_weight_img", &hole_fill_weight_tx_);
+ hole_fill_ps_.dispatch(&dispatch_gather_size_);
+ hole_fill_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+void DepthOfField::resolve_pass_sync()
+{
+ eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
+ RenderBuffers &render_buffers = inst_.render_buffers;
+ eShaderType sh_type = use_bokeh_lut_ ? DOF_RESOLVE_LUT : DOF_RESOLVE;
+
+ resolve_ps_.init();
+ inst_.sampling.bind_resources(&resolve_ps_);
+ resolve_ps_.shader_set(inst_.shaders.static_shader_get(sh_type));
+ resolve_ps_.bind_ubo("dof_buf", data_);
+ resolve_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+ resolve_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
+ resolve_ps_.bind_texture("stable_color_tx", &resolve_stable_color_tx_, no_filter);
+ resolve_ps_.bind_texture("color_bg_tx", &color_bg_tx_.current(), with_filter);
+ resolve_ps_.bind_texture("color_fg_tx", &color_fg_tx_.current(), with_filter);
+ resolve_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
+ resolve_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
+ resolve_ps_.bind_texture("weight_bg_tx", &weight_bg_tx_.current());
+ resolve_ps_.bind_texture("weight_fg_tx", &weight_fg_tx_.current());
+ resolve_ps_.bind_texture("color_hole_fill_tx", &hole_fill_color_tx_);
+ resolve_ps_.bind_texture("weight_hole_fill_tx", &hole_fill_weight_tx_);
+ resolve_ps_.bind_texture("bokeh_lut_tx", &bokeh_resolve_lut_tx_);
+ resolve_ps_.bind_image("out_color_img", &output_color_tx_);
+ resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+ resolve_ps_.dispatch(&dispatch_resolve_size_);
+ resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Post-FX Rendering.
+ * \{ */
+
+/* Similar to Film::update_sample_table() but with constant filter radius and constant sample
+ * count. */
+void DepthOfField::update_sample_table()
+{
+ float2 subpixel_offset = inst_.film.pixel_jitter_get();
+ /* Since the film jitter is in full-screen res, divide by 2 to get the jitter in half res. */
+ subpixel_offset *= 0.5;
+
+ /* Same offsets as in dof_spatial_filtering(). */
+ const std::array<int2, 4> plus_offsets = {int2(-1, 0), int2(0, -1), int2(1, 0), int2(0, 1)};
+
+ const float radius = 1.5f;
+ int i = 0;
+ for (int2 offset : plus_offsets) {
+ float2 pixel_ofs = float2(offset) - subpixel_offset;
+ data_.filter_samples_weight[i++] = film_filter_weight(radius, math::length_squared(pixel_ofs));
+ }
+ data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset));
+}
+
+void DepthOfField::render(View &view,
+ GPUTexture **input_tx,
+ GPUTexture **output_tx,
+ DepthOfFieldBuffer &dof_buffer)
+{
+ if (fx_radius_ == 0.0f) {
+ return;
+ }
+
+ input_color_tx_ = *input_tx;
+ output_color_tx_ = *output_tx;
+ extent_ = {GPU_texture_width(input_color_tx_), GPU_texture_height(input_color_tx_)};
+
+ {
+ const CameraData &cam_data = inst_.camera.data_get();
+ data_.camera_type = cam_data.type;
+ /* OPTI(fclem) Could be optimized. */
+ float3 jitter = float3(fx_radius_, 0.0f, -focus_distance_);
+ float3 center = float3(0.0f, 0.0f, -focus_distance_);
+ mul_project_m4_v3(cam_data.winmat.ptr(), jitter);
+ mul_project_m4_v3(cam_data.winmat.ptr(), center);
+ /* Simplify CoC calculation to a simple MADD. */
+ if (inst_.camera.is_orthographic()) {
+ data_.coc_mul = (center[0] - jitter[0]) * 0.5f * extent_[0];
+ data_.coc_bias = focus_distance_ * data_.coc_mul;
+ }
+ else {
+ data_.coc_bias = -(center[0] - jitter[0]) * 0.5f * extent_[0];
+ data_.coc_mul = focus_distance_ * data_.coc_bias;
+ }
+
+ float min_fg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_near);
+ float max_bg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_far);
+ if (data_.camera_type != CAMERA_ORTHO) {
+ /* Background is at infinity so maximum CoC is the limit of coc_radius_from_camera_depth
+ * at -inf. We only do this for perspective camera since orthographic coc limit is inf. */
+ max_bg_coc = data_.coc_bias;
+ }
+ /* Clamp with user defined max. */
+ data_.coc_abs_max = min_ff(max_ff(fabsf(min_fg_coc), fabsf(max_bg_coc)), fx_max_coc_);
+ /* TODO(fclem): Make this dependent of the quality of the gather pass. */
+ data_.scatter_coc_threshold = 4.0f;
+
+ update_sample_table();
+
+ data_.push_update();
+ }
+
+ int2 half_res = math::divide_ceil(extent_, int2(2));
+ int2 quarter_res = math::divide_ceil(extent_, int2(4));
+ int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE));
+
+ dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
+ dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_STABILIZE_GROUP_SIZE)), 1);
+ dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)),
+ 1);
+ dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1);
+ dispatch_tiles_flatten_size_ = int3(math::divide_ceil(half_res, int2(DOF_TILES_SIZE)), 1);
+ dispatch_tiles_dilate_size_ = int3(
+ math::divide_ceil(tile_res, int2(DOF_TILES_DILATE_GROUP_SIZE)), 1);
+ dispatch_gather_size_ = int3(math::divide_ceil(half_res, int2(DOF_GATHER_GROUP_SIZE)), 1);
+ dispatch_filter_size_ = int3(math::divide_ceil(half_res, int2(DOF_FILTER_GROUP_SIZE)), 1);
+ dispatch_resolve_size_ = int3(math::divide_ceil(extent_, int2(DOF_RESOLVE_GROUP_SIZE)), 1);
+
+ if (GPU_type_matches_ex(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+ /* On Mesa, there is a sync bug which can make a portion of the main pass (usually one shader)
+ * leave blocks of un-initialized memory. Doing a flush seems to alleviate the issue. */
+ GPU_flush();
+ }
+
+ DRW_stats_group_start("Depth of Field");
+
+ Manager &drw = *inst_.manager;
+
+ {
+ DRW_stats_group_start("Setup");
+ {
+ bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
+ bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
+ bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
+
+ if (use_bokeh_lut_) {
+ drw.submit(bokeh_lut_ps_, view);
+ }
+ }
+ {
+ setup_color_tx_.acquire(half_res, GPU_RGBA16F);
+ setup_coc_tx_.acquire(half_res, GPU_R16F);
+
+ drw.submit(setup_ps_, view);
+ }
+ {
+ stabilize_output_tx_.acquire(half_res, GPU_RGBA16F);
+ stabilize_valid_history_ = !dof_buffer.stabilize_history_tx_.ensure_2d(GPU_RGBA16F,
+ half_res);
+
+ if (stabilize_valid_history_ == false) {
+ /* Avoid uninitialized memory that can contain NaNs. */
+ dof_buffer.stabilize_history_tx_.clear(float4(0.0f));
+ }
+
+ stabilize_input_ = dof_buffer.stabilize_history_tx_;
+ /* Outputs to reduced_*_tx_ mip 0. */
+ drw.submit(stabilize_ps_, view);
+
+ /* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse
+ * the one that we just consumed. */
+ TextureFromPool::swap(stabilize_output_tx_, dof_buffer.stabilize_history_tx_);
+
+ /* Used by stabilize pass. */
+ stabilize_output_tx_.release();
+ setup_color_tx_.release();
+ }
+ {
+ DRW_stats_group_start("Tile Prepare");
+
+ /* WARNING: If format changes, make sure dof_tile_* GLSL constants are properly encoded. */
+ tiles_fg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F);
+ tiles_bg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F);
+ tiles_fg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
+ tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
+
+ drw.submit(tiles_flatten_ps_, view);
+
+ /* Used by tile_flatten and stabilize_ps pass. */
+ setup_coc_tx_.release();
+
+ /* Error introduced by gather center jittering. */
+ const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f);
+ int dilation_end_radius = ceilf((fx_max_coc_ * error_multiplier) / (DOF_TILES_SIZE * 2));
+
+ /* Run dilation twice. One for minmax and one for minabs. */
+ for (int pass = 0; pass < 2; pass++) {
+ /* This algorithm produce the exact dilation radius by dividing it in multiple passes. */
+ int dilation_radius = 0;
+ while (dilation_radius < dilation_end_radius) {
+ int remainder = dilation_end_radius - dilation_radius;
+ /* Do not step over any unvisited tile. */
+ int max_multiplier = dilation_radius + 1;
+
+ int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier));
+ int multiplier = min_ii(max_multiplier, floorf(remainder / (float)ring_count));
+
+ dilation_radius += ring_count * multiplier;
+
+ tiles_dilate_ring_count_ = ring_count;
+ tiles_dilate_ring_width_mul_ = multiplier;
+
+ tiles_fg_tx_.swap();
+ tiles_bg_tx_.swap();
+
+ drw.submit((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_, view);
+ }
+ }
+
+ tiles_fg_tx_.previous().release();
+ tiles_bg_tx_.previous().release();
+
+ DRW_stats_group_end();
+ }
+
+ downsample_tx_.acquire(quarter_res, GPU_RGBA16F);
+
+ drw.submit(downsample_ps_, view);
+
+ scatter_fg_indirect_buf_.clear_to_zero();
+ scatter_bg_indirect_buf_.clear_to_zero();
+
+ drw.submit(reduce_ps_, view);
+
+ /* Used by reduce pass. */
+ downsample_tx_.release();
+
+ DRW_stats_group_end();
+ }
+
+ for (int is_background = 0; is_background < 2; is_background++) {
+ DRW_stats_group_start(is_background ? "Background Convolution" : "Foreground Convolution");
+
+ SwapChain<TextureFromPool, 2> &color_tx = is_background ? color_bg_tx_ : color_fg_tx_;
+ SwapChain<TextureFromPool, 2> &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_;
+ Framebuffer &scatter_fb = is_background ? scatter_bg_fb_ : scatter_fg_fb_;
+ PassSimple &gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_;
+ PassSimple &filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_;
+ PassSimple &scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_;
+
+ color_tx.current().acquire(half_res, GPU_RGBA16F);
+ weight_tx.current().acquire(half_res, GPU_R16F);
+ occlusion_tx_.acquire(half_res, GPU_RG16F);
+
+ drw.submit(gather_ps, view);
+
+ {
+ /* Filtering pass. */
+ color_tx.swap();
+ weight_tx.swap();
+
+ color_tx.current().acquire(half_res, GPU_RGBA16F);
+ weight_tx.current().acquire(half_res, GPU_R16F);
+
+ drw.submit(filter_ps, view);
+
+ color_tx.previous().release();
+ weight_tx.previous().release();
+ }
+
+ GPU_memory_barrier(GPU_BARRIER_FRAMEBUFFER);
+
+ scatter_fb.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current()));
+
+ GPU_framebuffer_bind(scatter_fb);
+ drw.submit(scatter_ps, view);
+
+ /* Used by scatter pass. */
+ occlusion_tx_.release();
+
+ DRW_stats_group_end();
+ }
+ {
+ DRW_stats_group_start("Hole Fill");
+
+ bokeh_gather_lut_tx_.release();
+ bokeh_scatter_lut_tx_.release();
+
+ hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F);
+ hole_fill_weight_tx_.acquire(half_res, GPU_R16F);
+
+ drw.submit(hole_fill_ps_, view);
+
+ /* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */
+
+ DRW_stats_group_end();
+ }
+ {
+ DRW_stats_group_start("Resolve");
+
+ resolve_stable_color_tx_ = dof_buffer.stabilize_history_tx_;
+
+ drw.submit(resolve_ps_, view);
+
+ color_bg_tx_.current().release();
+ color_fg_tx_.current().release();
+ weight_bg_tx_.current().release();
+ weight_fg_tx_.current().release();
+ tiles_fg_tx_.current().release();
+ tiles_bg_tx_.current().release();
+ hole_fill_color_tx_.release();
+ hole_fill_weight_tx_.release();
+ bokeh_resolve_lut_tx_.release();
+
+ DRW_stats_group_end();
+ }
+
+ DRW_stats_group_end();
+
+ /* Swap buffers so that next effect has the right input. */
+ SWAP(GPUTexture *, *input_tx, *output_tx);
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
new file mode 100644
index 00000000000..bac0e394d66
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Depth of field post process effect.
+ *
+ * There are 2 methods to achieve this effect.
+ * - The first uses projection matrix offsetting and sample accumulation to give
+ * reference quality depth of field. But this needs many samples to hide the
+ * under-sampling.
+ * - The second one is a post-processing based one. It follows the
+ * implementation described in the presentation
+ * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
+ * There are some difference with our actual implementation that prioritize quality.
+ */
+
+#pragma once
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+struct DepthOfFieldBuffer {
+ /**
+ * Per view history texture for stabilize pass.
+ * Swapped with stabilize_output_tx_ in order to reuse the previous history during DoF
+ * processing.
+ * Note this should be private as its inner working only concerns the Depth Of Field
+ * implementation. The view itself should not touch it.
+ */
+ Texture stabilize_history_tx_ = {"dof_taa"};
+};
+
+class DepthOfField {
+ private:
+ class Instance &inst_;
+
+ /** Samplers */
+ static constexpr eGPUSamplerState gather_bilinear = GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER;
+ static constexpr eGPUSamplerState gather_nearest = GPU_SAMPLER_MIPMAP;
+
+ /** Input/Output texture references. */
+ GPUTexture *input_color_tx_ = nullptr;
+ GPUTexture *output_color_tx_ = nullptr;
+
+ /** Bokeh LUT precompute pass. */
+ TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"};
+ TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"};
+ TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"};
+ PassSimple bokeh_lut_ps_ = {"BokehLut"};
+
+ /** Outputs half-resolution color and Circle Of Confusion. */
+ TextureFromPool setup_coc_tx_ = {"dof_setup_coc"};
+ TextureFromPool setup_color_tx_ = {"dof_setup_color"};
+ int3 dispatch_setup_size_ = int3(-1);
+ PassSimple setup_ps_ = {"Setup"};
+
+ /** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */
+ Texture reduced_coc_tx_ = {"dof_reduced_coc"};
+ Texture reduced_color_tx_ = {"dof_reduced_color"};
+
+ /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */
+ TextureFromPool stabilize_output_tx_ = {"dof_taa"};
+ GPUTexture *stabilize_input_ = nullptr;
+ bool1 stabilize_valid_history_ = false;
+ int3 dispatch_stabilize_size_ = int3(-1);
+ PassSimple stabilize_ps_ = {"Stabilize"};
+
+ /** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */
+ TextureFromPool downsample_tx_ = {"dof_downsample"};
+ int3 dispatch_downsample_size_ = int3(-1);
+ PassSimple downsample_ps_ = {"Downsample"};
+
+ /** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */
+ DepthOfFieldScatterListBuf scatter_fg_list_buf_;
+ DepthOfFieldScatterListBuf scatter_bg_list_buf_;
+ DrawIndirectBuf scatter_fg_indirect_buf_;
+ DrawIndirectBuf scatter_bg_indirect_buf_;
+ int3 dispatch_reduce_size_ = int3(-1);
+ PassSimple reduce_ps_ = {"Reduce"};
+
+ /** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */
+ SwapChain<TextureFromPool, 2> tiles_fg_tx_;
+ SwapChain<TextureFromPool, 2> tiles_bg_tx_;
+ int3 dispatch_tiles_flatten_size_ = int3(-1);
+ PassSimple tiles_flatten_ps_ = {"TilesFlatten"};
+
+ /** Dilates the min & max CoCs to cover maximum COC values. */
+ int tiles_dilate_ring_count_ = -1;
+ int tiles_dilate_ring_width_mul_ = -1;
+ int3 dispatch_tiles_dilate_size_ = int3(-1);
+ PassSimple tiles_dilate_minmax_ps_ = {"TilesDilateMinmax"};
+ PassSimple tiles_dilate_minabs_ps_ = {"TilesDilateMinabs"};
+
+ /** Gather convolution for low intensity pixels and low contrast areas. */
+ SwapChain<TextureFromPool, 2> color_bg_tx_;
+ SwapChain<TextureFromPool, 2> color_fg_tx_;
+ SwapChain<TextureFromPool, 2> weight_bg_tx_;
+ SwapChain<TextureFromPool, 2> weight_fg_tx_;
+ TextureFromPool occlusion_tx_ = {"dof_occlusion"};
+ int3 dispatch_gather_size_ = int3(-1);
+ PassSimple gather_fg_ps_ = {"GatherFg"};
+ PassSimple gather_bg_ps_ = {"GatherBg"};
+
+ /** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */
+ TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"};
+ TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"};
+ PassSimple hole_fill_ps_ = {"HoleFill"};
+
+ /** Small Filter pass to reduce noise out of gather passes. */
+ int3 dispatch_filter_size_ = int3(-1);
+ PassSimple filter_fg_ps_ = {"FilterFg"};
+ PassSimple filter_bg_ps_ = {"FilterBg"};
+
+ /** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */
+ Framebuffer scatter_fg_fb_ = {"dof_scatter_fg"};
+ Framebuffer scatter_bg_fb_ = {"dof_scatter_bg"};
+ PassSimple scatter_fg_ps_ = {"ScatterFg"};
+ PassSimple scatter_bg_ps_ = {"ScatterBg"};
+
+ /** Recombine the results and also perform a slight out of focus gather. */
+ GPUTexture *resolve_stable_color_tx_ = nullptr;
+ int3 dispatch_resolve_size_ = int3(-1);
+ PassSimple resolve_ps_ = {"Resolve"};
+
+ DepthOfFieldDataBuf data_;
+
+ /** Scene settings that are immutable. */
+ float user_overblur_;
+ float fx_max_coc_;
+ /** Use jittered depth of field where we randomize camera location. */
+ bool do_jitter_;
+ /** Enable bokeh lookup texture. */
+ bool use_bokeh_lut_;
+
+ /** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */
+ float fx_radius_;
+ /** Circle of Confusion radius for jittered DoF. Is in view X direction in [0..1] range. */
+ float jitter_radius_;
+ /** Focus distance in view space. */
+ float focus_distance_;
+ /** Extent of the input buffer. */
+ int2 extent_;
+
+ public:
+ DepthOfField(Instance &inst) : inst_(inst){};
+ ~DepthOfField(){};
+
+ void init();
+
+ void sync();
+
+ /**
+ * Apply Depth Of Field jittering to the view and projection matrices..
+ */
+ void jitter_apply(float4x4 &winmat, float4x4 &viewmat);
+
+ /**
+ * Will swap input and output texture if rendering happens. The actual output of this function
+ * is in input_tx.
+ */
+ void render(View &view,
+ GPUTexture **input_tx,
+ GPUTexture **output_tx,
+ DepthOfFieldBuffer &dof_buffer);
+
+ bool postfx_enabled() const
+ {
+ return fx_radius_ > 0.0f;
+ }
+
+ private:
+ void bokeh_lut_pass_sync();
+ void setup_pass_sync();
+ void stabilize_pass_sync();
+ void downsample_pass_sync();
+ void reduce_pass_sync();
+ void tiles_flatten_pass_sync();
+ void tiles_dilate_pass_sync();
+ void gather_pass_sync();
+ void filter_pass_sync();
+ void scatter_pass_sync();
+ void hole_fill_pass_sync();
+ void resolve_pass_sync();
+
+ void update_sample_table();
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_engine.cc b/source/blender/draw/engines/eevee_next/eevee_engine.cc
index be0adfad568..2e476b7d891 100644
--- a/source/blender/draw/engines/eevee_next/eevee_engine.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_engine.cc
@@ -12,6 +12,8 @@
#include "DRW_render.h"
+#include "RE_pipeline.h"
+
#include "eevee_engine.h" /* Own include. */
#include "eevee_instance.hh"
@@ -97,6 +99,8 @@ static void eevee_draw_scene(void *vedata)
DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
ved->instance->draw_viewport(dfbl);
STRNCPY(ved->info, ved->instance->info.c_str());
+ /* Reset view for other following engines. */
+ DRW_view_set_active(nullptr);
}
static void eevee_cache_init(void *vedata)
@@ -144,7 +148,23 @@ static void eevee_render_to_image(void *UNUSED(vedata),
if (!GPU_shader_storage_buffer_objects_support()) {
return;
}
- UNUSED_VARS(engine, layer);
+
+ eevee::Instance *instance = new eevee::Instance();
+
+ Render *render = engine->re;
+ Depsgraph *depsgraph = DRW_context_state_get()->depsgraph;
+ Object *camera_original_ob = RE_GetCamera(engine->re);
+ const char *viewname = RE_GetActiveRenderView(engine->re);
+ int size[2] = {engine->resolution_x, engine->resolution_y};
+
+ rctf view_rect;
+ rcti rect;
+ RE_GetViewPlane(render, &view_rect, &rect);
+
+ instance->init(size, &rect, engine, depsgraph, nullptr, camera_original_ob, layer);
+ instance->render_frame(layer, viewname);
+
+ delete instance;
}
static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer)
@@ -152,7 +172,51 @@ static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewL
if (!GPU_shader_storage_buffer_objects_support()) {
return;
}
- UNUSED_VARS(engine, scene, view_layer);
+
+ RE_engine_register_pass(engine, scene, view_layer, RE_PASSNAME_COMBINED, 4, "RGBA", SOCK_RGBA);
+
+#define CHECK_PASS_LEGACY(name, type, channels, chanid) \
+ if (view_layer->passflag & (SCE_PASS_##name)) { \
+ RE_engine_register_pass( \
+ engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \
+ } \
+ ((void)0)
+#define CHECK_PASS_EEVEE(name, type, channels, chanid) \
+ if (view_layer->eevee.render_passes & (EEVEE_RENDER_PASS_##name)) { \
+ RE_engine_register_pass( \
+ engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \
+ } \
+ ((void)0)
+
+ CHECK_PASS_LEGACY(Z, SOCK_FLOAT, 1, "Z");
+ CHECK_PASS_LEGACY(MIST, SOCK_FLOAT, 1, "Z");
+ CHECK_PASS_LEGACY(NORMAL, SOCK_VECTOR, 3, "XYZ");
+ CHECK_PASS_LEGACY(DIFFUSE_DIRECT, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_LEGACY(DIFFUSE_COLOR, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_LEGACY(GLOSSY_DIRECT, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_LEGACY(GLOSSY_COLOR, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_EEVEE(VOLUME_LIGHT, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_LEGACY(EMIT, SOCK_RGBA, 3, "RGB");
+ CHECK_PASS_LEGACY(ENVIRONMENT, SOCK_RGBA, 3, "RGB");
+ /* TODO: CHECK_PASS_LEGACY(SHADOW, SOCK_RGBA, 3, "RGB");
+ * CHECK_PASS_LEGACY(AO, SOCK_RGBA, 3, "RGB");
+ * When available they should be converted from Value textures to RGB. */
+
+ LISTBASE_FOREACH (ViewLayerAOV *, aov, &view_layer->aovs) {
+ if ((aov->flag & AOV_CONFLICT) != 0) {
+ continue;
+ }
+ switch (aov->type) {
+ case AOV_TYPE_COLOR:
+ RE_engine_register_pass(engine, scene, view_layer, aov->name, 4, "RGBA", SOCK_RGBA);
+ break;
+ case AOV_TYPE_VALUE:
+ RE_engine_register_pass(engine, scene, view_layer, aov->name, 1, "X", SOCK_FLOAT);
+ break;
+ default:
+ break;
+ }
+ }
}
static const DrawEngineDataSize eevee_data_size = DRW_VIEWPORT_DATA_SIZE(EEVEE_Data);
diff --git a/source/blender/draw/engines/eevee_next/eevee_film.cc b/source/blender/draw/engines/eevee_next/eevee_film.cc
new file mode 100644
index 00000000000..4679889e59a
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_film.cc
@@ -0,0 +1,633 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * A film is a fullscreen buffer (usually at output extent)
+ * that will be able to accumulate sample in any distorted camera_type
+ * using a pixel filter.
+ *
+ * Input needs to be jittered so that the filter converges to the right result.
+ */
+
+#include "BLI_hash.h"
+#include "BLI_rect.h"
+
+#include "GPU_framebuffer.h"
+#include "GPU_texture.h"
+
+#include "DRW_render.h"
+#include "RE_pipeline.h"
+
+#include "eevee_film.hh"
+#include "eevee_instance.hh"
+
+namespace blender::eevee {
+
+ENUM_OPERATORS(eViewLayerEEVEEPassType, 1 << EEVEE_RENDER_PASS_MAX_BIT)
+
+/* -------------------------------------------------------------------- */
+/** \name Arbitrary Output Variables
+ * \{ */
+
+void Film::init_aovs()
+{
+ Vector<ViewLayerAOV *> aovs;
+
+ aovs_info.display_id = -1;
+ aovs_info.display_is_value = false;
+ aovs_info.value_len = aovs_info.color_len = 0;
+
+ if (inst_.is_viewport()) {
+ /* Viewport case. */
+ if (inst_.v3d->shading.render_pass == EEVEE_RENDER_PASS_AOV) {
+ /* AOV display, request only a single AOV. */
+ ViewLayerAOV *aov = (ViewLayerAOV *)BLI_findstring(
+ &inst_.view_layer->aovs, inst_.v3d->shading.aov_name, offsetof(ViewLayerAOV, name));
+
+ if (aov == nullptr) {
+ /* AOV not found in view layer. */
+ return;
+ }
+
+ aovs.append(aov);
+ aovs_info.display_id = 0;
+ aovs_info.display_is_value = (aov->type == AOV_TYPE_VALUE);
+ }
+ else {
+ /* TODO(fclem): The realtime compositor could ask for several AOVs. */
+ }
+ }
+ else {
+ /* Render case. */
+ LISTBASE_FOREACH (ViewLayerAOV *, aov, &inst_.view_layer->aovs) {
+ aovs.append(aov);
+ }
+ }
+
+ if (aovs.size() > AOV_MAX) {
+ inst_.info = "Error: Too many AOVs";
+ return;
+ }
+
+ for (ViewLayerAOV *aov : aovs) {
+ bool is_value = (aov->type == AOV_TYPE_VALUE);
+ uint &index = is_value ? aovs_info.value_len : aovs_info.color_len;
+ uint &hash = is_value ? aovs_info.hash_value[index] : aovs_info.hash_color[index];
+ hash = BLI_hash_string(aov->name);
+ index++;
+ }
+}
+
+float *Film::read_aov(ViewLayerAOV *aov)
+{
+ bool is_value = (aov->type == AOV_TYPE_VALUE);
+ Texture &accum_tx = is_value ? value_accum_tx_ : color_accum_tx_;
+
+ Span<uint> aovs_hash(is_value ? aovs_info.hash_value : aovs_info.hash_color,
+ is_value ? aovs_info.value_len : aovs_info.color_len);
+ /* Find AOV index. */
+ uint hash = BLI_hash_string(aov->name);
+ int aov_index = -1;
+ int i = 0;
+ for (uint candidate_hash : aovs_hash) {
+ if (candidate_hash == hash) {
+ aov_index = i;
+ break;
+ }
+ i++;
+ }
+
+ accum_tx.ensure_layer_views();
+
+ int index = aov_index + (is_value ? data_.aov_value_id : data_.aov_color_id);
+ GPUTexture *pass_tx = accum_tx.layer_view(index);
+
+ return (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Mist Pass
+ * \{ */
+
+void Film::sync_mist()
+{
+ const CameraData &cam = inst_.camera.data_get();
+ const ::World *world = inst_.scene->world;
+ float mist_start = world ? world->miststa : cam.clip_near;
+ float mist_distance = world ? world->mistdist : fabsf(cam.clip_far - cam.clip_near);
+ int mist_type = world ? world->mistype : (int)WO_MIST_LINEAR;
+
+ switch (mist_type) {
+ case WO_MIST_QUADRATIC:
+ data_.mist_exponent = 2.0f;
+ break;
+ case WO_MIST_LINEAR:
+ data_.mist_exponent = 1.0f;
+ break;
+ case WO_MIST_INVERSE_QUADRATIC:
+ data_.mist_exponent = 0.5f;
+ break;
+ }
+
+ data_.mist_scale = 1.0 / mist_distance;
+ data_.mist_bias = -mist_start / mist_distance;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name FilmData
+ * \{ */
+
+inline bool operator==(const FilmData &a, const FilmData &b)
+{
+ return (a.extent == b.extent) && (a.offset == b.offset) &&
+ (a.filter_radius == b.filter_radius) && (a.scaling_factor == b.scaling_factor) &&
+ (a.background_opacity == b.background_opacity);
+}
+
+inline bool operator!=(const FilmData &a, const FilmData &b)
+{
+ return !(a == b);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+void Film::init(const int2 &extent, const rcti *output_rect)
+{
+ Sampling &sampling = inst_.sampling;
+ Scene &scene = *inst_.scene;
+ SceneEEVEE &scene_eevee = scene.eevee;
+
+ init_aovs();
+
+ {
+ /* Enable passes that need to be rendered. */
+ eViewLayerEEVEEPassType render_passes = eViewLayerEEVEEPassType(0);
+
+ if (inst_.is_viewport()) {
+ /* Viewport Case. */
+ render_passes = eViewLayerEEVEEPassType(inst_.v3d->shading.render_pass);
+
+ if (inst_.overlays_enabled() || inst_.gpencil_engine_enabled) {
+ /* Overlays and Grease Pencil needs the depth for correct compositing.
+ * Using the render pass ensure we store the center depth. */
+ render_passes |= EEVEE_RENDER_PASS_Z;
+ }
+ }
+ else {
+ /* Render Case. */
+ render_passes = eViewLayerEEVEEPassType(inst_.view_layer->eevee.render_passes);
+
+#define ENABLE_FROM_LEGACY(name_legacy, name_eevee) \
+ SET_FLAG_FROM_TEST(render_passes, \
+ (inst_.view_layer->passflag & SCE_PASS_##name_legacy) != 0, \
+ EEVEE_RENDER_PASS_##name_eevee);
+
+ ENABLE_FROM_LEGACY(COMBINED, COMBINED)
+ ENABLE_FROM_LEGACY(Z, Z)
+ ENABLE_FROM_LEGACY(MIST, MIST)
+ ENABLE_FROM_LEGACY(NORMAL, NORMAL)
+ ENABLE_FROM_LEGACY(SHADOW, SHADOW)
+ ENABLE_FROM_LEGACY(AO, AO)
+ ENABLE_FROM_LEGACY(EMIT, EMIT)
+ ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT)
+ ENABLE_FROM_LEGACY(DIFFUSE_COLOR, DIFFUSE_COLOR)
+ ENABLE_FROM_LEGACY(GLOSSY_COLOR, SPECULAR_COLOR)
+ ENABLE_FROM_LEGACY(DIFFUSE_DIRECT, DIFFUSE_LIGHT)
+ ENABLE_FROM_LEGACY(GLOSSY_DIRECT, SPECULAR_LIGHT)
+ ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT)
+ ENABLE_FROM_LEGACY(VECTOR, VECTOR)
+
+#undef ENABLE_FROM_LEGACY
+ }
+
+ /* Filter obsolete passes. */
+ render_passes &= ~(EEVEE_RENDER_PASS_UNUSED_8 | EEVEE_RENDER_PASS_BLOOM);
+
+ if (scene_eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) {
+ /* Disable motion vector pass if motion blur is enabled. */
+ render_passes &= ~EEVEE_RENDER_PASS_VECTOR;
+ }
+
+ /* TODO(@fclem): Can't we rely on depsgraph update notification? */
+ if (assign_if_different(enabled_passes_, render_passes)) {
+ sampling.reset();
+ }
+ }
+ {
+ rcti fallback_rect;
+ if (BLI_rcti_is_empty(output_rect)) {
+ BLI_rcti_init(&fallback_rect, 0, extent[0], 0, extent[1]);
+ output_rect = &fallback_rect;
+ }
+
+ FilmData data = data_;
+ data.extent = int2(BLI_rcti_size_x(output_rect), BLI_rcti_size_y(output_rect));
+ data.offset = int2(output_rect->xmin, output_rect->ymin);
+ data.extent_inv = 1.0f / float2(data.extent);
+ /* Disable filtering if sample count is 1. */
+ data.filter_radius = (sampling.sample_count() == 1) ? 0.0f :
+ clamp_f(scene.r.gauss, 0.0f, 100.0f);
+ /* TODO(fclem): parameter hidden in experimental.
+ * We need to figure out LOD bias first in order to preserve texture crispiness. */
+ data.scaling_factor = 1;
+
+ data.background_opacity = (scene.r.alphamode == R_ALPHAPREMUL) ? 0.0f : 1.0f;
+ if (inst_.is_viewport() && false /* TODO(fclem): StudioLight */) {
+ data.background_opacity = inst_.v3d->shading.studiolight_background;
+ }
+
+ FilmData &data_prev_ = data_;
+ if (assign_if_different(data_prev_, data)) {
+ sampling.reset();
+ }
+
+ const eViewLayerEEVEEPassType data_passes = EEVEE_RENDER_PASS_Z | EEVEE_RENDER_PASS_NORMAL |
+ EEVEE_RENDER_PASS_VECTOR;
+ const eViewLayerEEVEEPassType color_passes_1 = EEVEE_RENDER_PASS_DIFFUSE_LIGHT |
+ EEVEE_RENDER_PASS_SPECULAR_LIGHT |
+ EEVEE_RENDER_PASS_VOLUME_LIGHT |
+ EEVEE_RENDER_PASS_EMIT;
+ const eViewLayerEEVEEPassType color_passes_2 = EEVEE_RENDER_PASS_DIFFUSE_COLOR |
+ EEVEE_RENDER_PASS_SPECULAR_COLOR |
+ EEVEE_RENDER_PASS_ENVIRONMENT |
+ EEVEE_RENDER_PASS_MIST |
+ EEVEE_RENDER_PASS_SHADOW | EEVEE_RENDER_PASS_AO;
+
+ data_.exposure_scale = pow2f(scene.view_settings.exposure);
+ data_.has_data = (enabled_passes_ & data_passes) != 0;
+ data_.any_render_pass_1 = (enabled_passes_ & color_passes_1) != 0;
+ data_.any_render_pass_2 = (enabled_passes_ & color_passes_2) != 0;
+ }
+ {
+ /* Set pass offsets. */
+
+ data_.display_id = aovs_info.display_id;
+ data_.display_is_value = aovs_info.display_is_value;
+
+ /* Combined is in a separate buffer. */
+ data_.combined_id = (enabled_passes_ & EEVEE_RENDER_PASS_COMBINED) ? 0 : -1;
+ /* Depth is in a separate buffer. */
+ data_.depth_id = (enabled_passes_ & EEVEE_RENDER_PASS_Z) ? 0 : -1;
+
+ data_.color_len = 0;
+ data_.value_len = 0;
+
+ auto pass_index_get = [&](eViewLayerEEVEEPassType pass_type) {
+ bool is_value = pass_is_value(pass_type);
+ int index = (enabled_passes_ & pass_type) ?
+ (is_value ? data_.value_len : data_.color_len)++ :
+ -1;
+ if (inst_.is_viewport() && inst_.v3d->shading.render_pass == pass_type) {
+ data_.display_id = index;
+ data_.display_is_value = is_value;
+ }
+ return index;
+ };
+
+ data_.mist_id = pass_index_get(EEVEE_RENDER_PASS_MIST);
+ data_.normal_id = pass_index_get(EEVEE_RENDER_PASS_NORMAL);
+ data_.vector_id = pass_index_get(EEVEE_RENDER_PASS_VECTOR);
+ data_.diffuse_light_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_LIGHT);
+ data_.diffuse_color_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_COLOR);
+ data_.specular_light_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_LIGHT);
+ data_.specular_color_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_COLOR);
+ data_.volume_light_id = pass_index_get(EEVEE_RENDER_PASS_VOLUME_LIGHT);
+ data_.emission_id = pass_index_get(EEVEE_RENDER_PASS_EMIT);
+ data_.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
+ data_.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
+ data_.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
+
+ data_.aov_color_id = data_.color_len;
+ data_.aov_value_id = data_.value_len;
+
+ data_.aov_color_len = aovs_info.color_len;
+ data_.aov_value_len = aovs_info.value_len;
+
+ data_.color_len += data_.aov_color_len;
+ data_.value_len += data_.aov_value_len;
+ }
+ {
+ /* TODO(@fclem): Over-scans. */
+
+ data_.render_extent = math::divide_ceil(extent, int2(data_.scaling_factor));
+ int2 weight_extent = inst_.camera.is_panoramic() ? data_.extent : int2(data_.scaling_factor);
+
+ eGPUTextureFormat color_format = GPU_RGBA16F;
+ eGPUTextureFormat float_format = GPU_R16F;
+ eGPUTextureFormat weight_format = GPU_R32F;
+ eGPUTextureFormat depth_format = GPU_R32F;
+
+ int reset = 0;
+ reset += depth_tx_.ensure_2d(depth_format, data_.extent);
+ reset += combined_tx_.current().ensure_2d(color_format, data_.extent);
+ reset += combined_tx_.next().ensure_2d(color_format, data_.extent);
+ /* Two layers, one for nearest sample weight and one for weight accumulation. */
+ reset += weight_tx_.current().ensure_2d_array(weight_format, weight_extent, 2);
+ reset += weight_tx_.next().ensure_2d_array(weight_format, weight_extent, 2);
+ reset += color_accum_tx_.ensure_2d_array(color_format,
+ (data_.color_len > 0) ? data_.extent : int2(1),
+ (data_.color_len > 0) ? data_.color_len : 1);
+ reset += value_accum_tx_.ensure_2d_array(float_format,
+ (data_.value_len > 0) ? data_.extent : int2(1),
+ (data_.value_len > 0) ? data_.value_len : 1);
+
+ if (reset > 0) {
+ sampling.reset();
+ data_.use_history = 0;
+ data_.use_reprojection = 0;
+
+ /* Avoid NaN in uninitialized texture memory making history blending dangerous. */
+ color_accum_tx_.clear(float4(0.0f));
+ value_accum_tx_.clear(float4(0.0f));
+ combined_tx_.current().clear(float4(0.0f));
+ weight_tx_.current().clear(float4(0.0f));
+ depth_tx_.clear(float4(0.0f));
+ }
+ }
+
+ force_disable_reprojection_ = (scene_eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0;
+}
+
+void Film::sync()
+{
+ /* We use a fragment shader for viewport because we need to output the depth. */
+ bool use_compute = (inst_.is_viewport() == false);
+
+ eShaderType shader = use_compute ? FILM_COMP : FILM_FRAG;
+
+ /* TODO(fclem): Shader variation for panoramic & scaled resolution. */
+
+ RenderBuffers &rbuffers = inst_.render_buffers;
+ VelocityModule &velocity = inst_.velocity;
+
+ eGPUSamplerState filter = GPU_SAMPLER_FILTER;
+
+ /* For viewport, only previous motion is supported.
+ * Still bind previous step to avoid undefined behavior. */
+ eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
+
+ accumulate_ps_.init();
+ accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
+ accumulate_ps_.shader_set(inst_.shaders.static_shader_get(shader));
+ accumulate_ps_.bind_ubo("film_buf", &data_);
+ accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
+ accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
+ accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
+ accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx);
+ accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_);
+ accumulate_ps_.bind_texture("normal_tx", &rbuffers.normal_tx);
+ accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx);
+ accumulate_ps_.bind_texture("light_tx", &rbuffers.light_tx);
+ accumulate_ps_.bind_texture("diffuse_color_tx", &rbuffers.diffuse_color_tx);
+ accumulate_ps_.bind_texture("specular_color_tx", &rbuffers.specular_color_tx);
+ accumulate_ps_.bind_texture("volume_light_tx", &rbuffers.volume_light_tx);
+ accumulate_ps_.bind_texture("emission_tx", &rbuffers.emission_tx);
+ accumulate_ps_.bind_texture("environment_tx", &rbuffers.environment_tx);
+ accumulate_ps_.bind_texture("shadow_tx", &rbuffers.shadow_tx);
+ accumulate_ps_.bind_texture("ambient_occlusion_tx", &rbuffers.ambient_occlusion_tx);
+ accumulate_ps_.bind_texture("aov_color_tx", &rbuffers.aov_color_tx);
+ accumulate_ps_.bind_texture("aov_value_tx", &rbuffers.aov_value_tx);
+ /* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
+ * If we need more, we need to pack more of the similar passes in the same textures as arrays or
+ * use image binding instead. */
+ accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current());
+ accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next());
+ accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
+ accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next());
+ accumulate_ps_.bind_image("depth_img", &depth_tx_);
+ accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_);
+ accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_);
+ /* Sync with rendering passes. */
+ accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
+ if (use_compute) {
+ accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1));
+ }
+ else {
+ accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+ }
+}
+
+void Film::end_sync()
+{
+ data_.use_reprojection = inst_.sampling.interactive_mode();
+
+ /* Just bypass the reprojection and reset the accumulation. */
+ if (force_disable_reprojection_ && inst_.sampling.is_reset()) {
+ data_.use_reprojection = false;
+ data_.use_history = false;
+ }
+
+ aovs_info.push_update();
+
+ sync_mist();
+}
+
+float2 Film::pixel_jitter_get() const
+{
+ float2 jitter = inst_.sampling.rng_2d_get(SAMPLING_FILTER_U);
+
+ if (!use_box_filter && data_.filter_radius < M_SQRT1_2 && !inst_.camera.is_panoramic()) {
+ /* For filter size less than a pixel, change sampling strategy and use a uniform disk
+ * distribution covering the filter shape. This avoids putting samples in areas without any
+ * weights. */
+ /* TODO(fclem): Importance sampling could be a better option here. */
+ jitter = Sampling::sample_disk(jitter) * data_.filter_radius;
+ }
+ else {
+ /* Jitter the size of a whole pixel. [-0.5..0.5] */
+ jitter -= 0.5f;
+ }
+ /* TODO(fclem): Mixed-resolution rendering: We need to offset to each of the target pixel covered
+ * by a render pixel, ideally, by choosing one randomly using another sampling dimension, or by
+ * repeating the same sample RNG sequence for each pixel offset. */
+ return jitter;
+}
+
+eViewLayerEEVEEPassType Film::enabled_passes_get() const
+{
+ if (inst_.is_viewport() && data_.use_reprojection) {
+ /* Enable motion vector rendering but not the accumulation buffer. */
+ return enabled_passes_ | EEVEE_RENDER_PASS_VECTOR;
+ }
+ return enabled_passes_;
+}
+
+void Film::update_sample_table()
+{
+ data_.subpixel_offset = pixel_jitter_get();
+
+ int filter_radius_ceil = ceilf(data_.filter_radius);
+ float filter_radius_sqr = square_f(data_.filter_radius);
+
+ data_.samples_len = 0;
+ if (use_box_filter || data_.filter_radius < 0.01f) {
+ /* Disable gather filtering. */
+ data_.samples[0].texel = int2(0, 0);
+ data_.samples[0].weight = 1.0f;
+ data_.samples_weight_total = 1.0f;
+ data_.samples_len = 1;
+ }
+ /* NOTE: Threshold determined by hand until we don't hit the assert below. */
+ else if (data_.filter_radius < 2.20f) {
+ /* Small filter Size. */
+ int closest_index = 0;
+ float closest_distance = FLT_MAX;
+ data_.samples_weight_total = 0.0f;
+ /* TODO(fclem): For optimization, could try Z-tile ordering. */
+ for (int y = -filter_radius_ceil; y <= filter_radius_ceil; y++) {
+ for (int x = -filter_radius_ceil; x <= filter_radius_ceil; x++) {
+ float2 pixel_offset = float2(x, y) - data_.subpixel_offset;
+ float distance_sqr = math::length_squared(pixel_offset);
+ if (distance_sqr < filter_radius_sqr) {
+ if (data_.samples_len >= FILM_PRECOMP_SAMPLE_MAX) {
+ BLI_assert_msg(0, "Precomputed sample table is too small.");
+ break;
+ }
+ FilmSample &sample = data_.samples[data_.samples_len];
+ sample.texel = int2(x, y);
+ sample.weight = film_filter_weight(data_.filter_radius, distance_sqr);
+ data_.samples_weight_total += sample.weight;
+
+ if (distance_sqr < closest_distance) {
+ closest_distance = distance_sqr;
+ closest_index = data_.samples_len;
+ }
+ data_.samples_len++;
+ }
+ }
+ }
+ /* Put the closest one in first position. */
+ if (closest_index != 0) {
+ SWAP(FilmSample, data_.samples[closest_index], data_.samples[0]);
+ }
+ }
+ else {
+ /* Large Filter Size. */
+ MutableSpan<FilmSample> sample_table(data_.samples, FILM_PRECOMP_SAMPLE_MAX);
+ /* To avoid hitting driver TDR and slowing rendering too much we use random sampling. */
+ /* TODO(fclem): This case needs more work. We could distribute the samples better to avoid
+ * loading the same pixel twice. */
+ data_.samples_len = sample_table.size();
+ data_.samples_weight_total = 0.0f;
+
+ int i = 0;
+ for (FilmSample &sample : sample_table) {
+ /* TODO(fclem): Own RNG. */
+ float2 random_2d = inst_.sampling.rng_2d_get(SAMPLING_SSS_U);
+ /* This randomization makes sure we converge to the right result but also makes nearest
+ * neighbor filtering not converging rapidly. */
+ random_2d.x = (random_2d.x + i) / float(FILM_PRECOMP_SAMPLE_MAX);
+
+ float2 pixel_offset = math::floor(Sampling::sample_spiral(random_2d) * data_.filter_radius);
+ sample.texel = int2(pixel_offset);
+
+ float distance_sqr = math::length_squared(pixel_offset - data_.subpixel_offset);
+ sample.weight = film_filter_weight(data_.filter_radius, distance_sqr);
+ data_.samples_weight_total += sample.weight;
+ i++;
+ }
+ }
+}
+
+void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
+{
+ if (inst_.is_viewport()) {
+ DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
+ DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
+ GPU_framebuffer_bind(dfbl->default_fb);
+ /* Clear when using render borders. */
+ if (data_.extent != int2(GPU_texture_width(dtxl->color), GPU_texture_height(dtxl->color))) {
+ float4 clear_color = {0.0f, 0.0f, 0.0f, 0.0f};
+ GPU_framebuffer_clear_color(dfbl->default_fb, clear_color);
+ }
+ GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent));
+ }
+
+ update_sample_table();
+
+ combined_final_tx_ = combined_final_tx;
+
+ data_.display_only = false;
+ data_.push_update();
+
+ draw::View drw_view("MainView", view);
+
+ DRW_manager_get()->submit(accumulate_ps_, drw_view);
+
+ combined_tx_.swap();
+ weight_tx_.swap();
+
+ /* Use history after first sample. */
+ if (data_.use_history == 0) {
+ data_.use_history = 1;
+ }
+}
+
+void Film::display()
+{
+ BLI_assert(inst_.is_viewport());
+
+ /* Acquire dummy render buffers for correct binding. They will not be used. */
+ inst_.render_buffers.acquire(int2(1));
+
+ DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
+ GPU_framebuffer_bind(dfbl->default_fb);
+ GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent));
+
+ combined_final_tx_ = inst_.render_buffers.combined_tx;
+
+ data_.display_only = true;
+ data_.push_update();
+
+ draw::View drw_view("MainView", DRW_view_default_get());
+
+ DRW_manager_get()->submit(accumulate_ps_, drw_view);
+
+ inst_.render_buffers.release();
+
+ /* IMPORTANT: Do not swap! No accumulation has happened. */
+}
+
+float *Film::read_pass(eViewLayerEEVEEPassType pass_type)
+{
+
+ bool is_value = pass_is_value(pass_type);
+ Texture &accum_tx = (pass_type == EEVEE_RENDER_PASS_COMBINED) ?
+ combined_tx_.current() :
+ (pass_type == EEVEE_RENDER_PASS_Z) ?
+ depth_tx_ :
+ (is_value ? value_accum_tx_ : color_accum_tx_);
+
+ accum_tx.ensure_layer_views();
+
+ int index = pass_id_get(pass_type);
+ GPUTexture *pass_tx = accum_tx.layer_view(index);
+
+ GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+
+ float *result = (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0);
+
+ if (pass_is_float3(pass_type)) {
+ /* Convert result in place as we cannot do this conversion on GPU. */
+ for (auto px : IndexRange(accum_tx.width() * accum_tx.height())) {
+ *(reinterpret_cast<float3 *>(result) + px) = *(reinterpret_cast<float3 *>(result + px * 4));
+ }
+ }
+
+ return result;
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_film.hh b/source/blender/draw/engines/eevee_next/eevee_film.hh
new file mode 100644
index 00000000000..796fcb24808
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_film.hh
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The film class handles accumulation of samples with any distorted camera_type
+ * using a pixel filter. Inputs needs to be jittered so that the filter converges to the right
+ * result.
+ *
+ * In viewport, we switch between 2 accumulation mode depending on the scene state.
+ * - For static scene, we use a classic weighted accumulation.
+ * - For dynamic scene (if an update is detected), we use a more temporally stable accumulation
+ * following the Temporal Anti-Aliasing method (a.k.a. Temporal Super-Sampling). This does
+ * history reprojection and rectification to avoid most of the flickering.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+class Film {
+ public:
+ /** Stores indirection table of AOVs based on their name hash and their type. */
+ AOVsInfoDataBuf aovs_info;
+ /** For debugging purpose but could be a user option in the future. */
+ static constexpr bool use_box_filter = false;
+
+ private:
+ Instance &inst_;
+
+ /** Incoming combined buffer with post FX applied (motion blur + depth of field). */
+ GPUTexture *combined_final_tx_ = nullptr;
+
+ /** Main accumulation textures containing every render-pass except depth and combined. */
+ Texture color_accum_tx_;
+ Texture value_accum_tx_;
+ /** Depth accumulation texture. Separated because using a different format. */
+ Texture depth_tx_;
+ /** Combined "Color" buffer. Double buffered to allow re-projection. */
+ SwapChain<Texture, 2> combined_tx_;
+ /** Weight buffers. Double buffered to allow updating it during accumulation. */
+ SwapChain<Texture, 2> weight_tx_;
+ /** User setting to disable reprojection. Useful for debugging or have a more precise render. */
+ bool force_disable_reprojection_ = false;
+
+ PassSimple accumulate_ps_ = {"Film.Accumulate"};
+
+ FilmDataBuf data_;
+
+ eViewLayerEEVEEPassType enabled_passes_ = eViewLayerEEVEEPassType(0);
+
+ public:
+ Film(Instance &inst) : inst_(inst){};
+ ~Film(){};
+
+ void init(const int2 &full_extent, const rcti *output_rect);
+
+ void sync();
+ void end_sync();
+
+ /** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */
+ void accumulate(const DRWView *view, GPUTexture *combined_final_tx);
+
+ /** Blit to display. No rendered sample needed. */
+ void display();
+
+ float *read_pass(eViewLayerEEVEEPassType pass_type);
+ float *read_aov(ViewLayerAOV *aov);
+
+ /** Returns shading views internal resolution. */
+ int2 render_extent_get() const
+ {
+ return data_.render_extent;
+ }
+
+ float2 pixel_jitter_get() const;
+
+ float background_opacity_get() const
+ {
+ return data_.background_opacity;
+ }
+
+ eViewLayerEEVEEPassType enabled_passes_get() const;
+
+ static bool pass_is_value(eViewLayerEEVEEPassType pass_type)
+ {
+ switch (pass_type) {
+ case EEVEE_RENDER_PASS_Z:
+ case EEVEE_RENDER_PASS_MIST:
+ case EEVEE_RENDER_PASS_SHADOW:
+ case EEVEE_RENDER_PASS_AO:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static bool pass_is_float3(eViewLayerEEVEEPassType pass_type)
+ {
+ switch (pass_type) {
+ case EEVEE_RENDER_PASS_NORMAL:
+ case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+ case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+ case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+ case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+ case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+ case EEVEE_RENDER_PASS_EMIT:
+ case EEVEE_RENDER_PASS_ENVIRONMENT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /* Returns layer offset in the accumulation texture. -1 if the pass is not enabled. */
+ int pass_id_get(eViewLayerEEVEEPassType pass_type) const
+ {
+ switch (pass_type) {
+ case EEVEE_RENDER_PASS_COMBINED:
+ return data_.combined_id;
+ case EEVEE_RENDER_PASS_Z:
+ return data_.depth_id;
+ case EEVEE_RENDER_PASS_MIST:
+ return data_.mist_id;
+ case EEVEE_RENDER_PASS_NORMAL:
+ return data_.normal_id;
+ case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+ return data_.diffuse_light_id;
+ case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+ return data_.diffuse_color_id;
+ case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+ return data_.specular_light_id;
+ case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+ return data_.specular_color_id;
+ case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+ return data_.volume_light_id;
+ case EEVEE_RENDER_PASS_EMIT:
+ return data_.emission_id;
+ case EEVEE_RENDER_PASS_ENVIRONMENT:
+ return data_.environment_id;
+ case EEVEE_RENDER_PASS_SHADOW:
+ return data_.shadow_id;
+ case EEVEE_RENDER_PASS_AO:
+ return data_.ambient_occlusion_id;
+ case EEVEE_RENDER_PASS_CRYPTOMATTE:
+ return -1; /* TODO */
+ case EEVEE_RENDER_PASS_VECTOR:
+ return data_.vector_id;
+ default:
+ return -1;
+ }
+ }
+
+ static const char *pass_to_render_pass_name(eViewLayerEEVEEPassType pass_type)
+ {
+ switch (pass_type) {
+ case EEVEE_RENDER_PASS_COMBINED:
+ return RE_PASSNAME_COMBINED;
+ case EEVEE_RENDER_PASS_Z:
+ return RE_PASSNAME_Z;
+ case EEVEE_RENDER_PASS_MIST:
+ return RE_PASSNAME_MIST;
+ case EEVEE_RENDER_PASS_NORMAL:
+ return RE_PASSNAME_NORMAL;
+ case EEVEE_RENDER_PASS_DIFFUSE_LIGHT:
+ return RE_PASSNAME_DIFFUSE_DIRECT;
+ case EEVEE_RENDER_PASS_DIFFUSE_COLOR:
+ return RE_PASSNAME_DIFFUSE_COLOR;
+ case EEVEE_RENDER_PASS_SPECULAR_LIGHT:
+ return RE_PASSNAME_GLOSSY_DIRECT;
+ case EEVEE_RENDER_PASS_SPECULAR_COLOR:
+ return RE_PASSNAME_GLOSSY_COLOR;
+ case EEVEE_RENDER_PASS_VOLUME_LIGHT:
+ return RE_PASSNAME_VOLUME_LIGHT;
+ case EEVEE_RENDER_PASS_EMIT:
+ return RE_PASSNAME_EMIT;
+ case EEVEE_RENDER_PASS_ENVIRONMENT:
+ return RE_PASSNAME_ENVIRONMENT;
+ case EEVEE_RENDER_PASS_SHADOW:
+ return RE_PASSNAME_SHADOW;
+ case EEVEE_RENDER_PASS_AO:
+ return RE_PASSNAME_AO;
+ case EEVEE_RENDER_PASS_CRYPTOMATTE:
+ BLI_assert_msg(0, "Cryptomatte is not implemented yet.");
+ return ""; /* TODO */
+ case EEVEE_RENDER_PASS_VECTOR:
+ return RE_PASSNAME_VECTOR;
+ default:
+ BLI_assert(0);
+ return "";
+ }
+ }
+
+ private:
+ void init_aovs();
+ void sync_mist();
+
+ /**
+ * Precompute sample weights if they are uniform across the whole film extent.
+ */
+ void update_sample_table();
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
new file mode 100644
index 00000000000..cf9049da514
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+#include "BKE_global.h"
+
+#include "eevee_instance.hh"
+
+#include "eevee_hizbuffer.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ *
+ * \{ */
+
+void HiZBuffer::sync()
+{
+ RenderBuffers &render_buffers = inst_.render_buffers;
+
+ int2 render_extent = inst_.film.render_extent_get();
+ /* Padding to avoid complexity during down-sampling and screen tracing. */
+ int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1)));
+ int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE));
+
+ hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT);
+ hiz_tx_.ensure_mip_views();
+ GPU_texture_mipmap_mode(hiz_tx_, true, false);
+
+ data_.uv_scale = float2(render_extent) / float2(hiz_extent);
+ data_.push_update();
+
+ {
+ hiz_update_ps_.init();
+ hiz_update_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE));
+ hiz_update_ps_.bind_ssbo("finished_tile_counter", atomic_tile_counter_);
+ hiz_update_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, with_filter);
+ hiz_update_ps_.bind_image("out_mip_0", hiz_tx_.mip_view(0));
+ hiz_update_ps_.bind_image("out_mip_1", hiz_tx_.mip_view(1));
+ hiz_update_ps_.bind_image("out_mip_2", hiz_tx_.mip_view(2));
+ hiz_update_ps_.bind_image("out_mip_3", hiz_tx_.mip_view(3));
+ hiz_update_ps_.bind_image("out_mip_4", hiz_tx_.mip_view(4));
+ hiz_update_ps_.bind_image("out_mip_5", hiz_tx_.mip_view(5));
+ hiz_update_ps_.bind_image("out_mip_6", hiz_tx_.mip_view(6));
+ hiz_update_ps_.bind_image("out_mip_7", hiz_tx_.mip_view(7));
+ /* TODO(@fclem): There might be occasions where we might not want to
+ * copy mip 0 for performance reasons if there is no need for it. */
+ hiz_update_ps_.push_constant("update_mip_0", true);
+ hiz_update_ps_.dispatch(int3(dispatch_size, 1));
+ hiz_update_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
+ }
+
+ if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
+ debug_draw_ps_.init();
+ debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
+ debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_DEBUG));
+ this->bind_resources(&debug_draw_ps_);
+ debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+ }
+}
+
+void HiZBuffer::update()
+{
+ if (!is_dirty_) {
+ return;
+ }
+
+ /* Bind another framebuffer in order to avoid triggering the feedback loop check.
+ * This is safe because we only use compute shaders in this section of the code.
+ * Ideally the check should be smarter. */
+ GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+ if (G.debug & G_DEBUG_GPU) {
+ GPU_framebuffer_restore();
+ }
+
+ inst_.manager->submit(hiz_update_ps_);
+
+ if (G.debug & G_DEBUG_GPU) {
+ GPU_framebuffer_bind(fb);
+ }
+}
+
+void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb)
+{
+ if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
+ inst_.info =
+ "Debug Mode: HiZ Validation\n"
+ " - Red: pixel in front of HiZ tile value.\n"
+ " - Blue: No error.";
+ inst_.hiz_buffer.update();
+ GPU_framebuffer_bind(view_fb);
+ inst_.manager->submit(debug_draw_ps_, view);
+ }
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
new file mode 100644
index 00000000000..8b8e4de55b1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The Hierarchical-Z buffer is texture containing a copy of the depth buffer with mipmaps.
+ * Each mip contains the maximum depth of each 4 pixels on the upper level.
+ * The size of the texture is padded to avoid messing with the mipmap pixels alignments.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z buffer
+ * \{ */
+
+class HiZBuffer {
+ private:
+ Instance &inst_;
+
+ /** The texture containing the hiz mip chain. */
+ Texture hiz_tx_ = {"hiz_tx_"};
+ /**
+ * Atomic counter counting the number of tile that have finished down-sampling.
+ * The last one will process the last few mip level.
+ */
+ draw::StorageBuffer<uint4, true> atomic_tile_counter_ = {"atomic_tile_counter"};
+ /** Single pass recursive downsample. */
+ PassSimple hiz_update_ps_ = {"HizUpdate"};
+ /** Debug pass. */
+ PassSimple debug_draw_ps_ = {"HizUpdate.Debug"};
+ /** Dirty flag to check if the update is necessary. */
+ bool is_dirty_ = true;
+
+ HiZDataBuf data_;
+
+ public:
+ HiZBuffer(Instance &inst) : inst_(inst)
+ {
+ atomic_tile_counter_.clear_to_zero();
+ };
+
+ void sync();
+
+ /**
+ * Tag the buffer for update if needed.
+ */
+ void set_dirty()
+ {
+ is_dirty_ = true;
+ }
+
+ /**
+ * Update the content of the HiZ buffer with the depth render target.
+ * Noop if the buffer has not been tagged as dirty.
+ * Should be called before each passes that needs to read the hiz buffer.
+ */
+ void update();
+
+ void debug_draw(View &view, GPUFrameBuffer *view_fb);
+
+ void bind_resources(DRWShadingGroup *grp)
+ {
+ DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_);
+ DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_);
+ }
+
+ /* TODO(fclem): Hardcoded bind slots. */
+ template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+ {
+ pass->bind_texture("hiz_tx", &hiz_tx_);
+ pass->bind_ubo("hiz_buf", &data_);
+ }
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc
index 606630bcdef..6150f32f150 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@@ -17,6 +17,7 @@
#include "DNA_ID.h"
#include "DNA_lightprobe_types.h"
#include "DNA_modifier_types.h"
+#include "RE_pipeline.h"
#include "eevee_instance.hh"
@@ -43,7 +44,7 @@ void Instance::init(const int2 &output_res,
const View3D *v3d_,
const RegionView3D *rv3d_)
{
- UNUSED_VARS(light_probe_, output_rect);
+ UNUSED_VARS(light_probe_);
render = render_;
depsgraph = depsgraph_;
camera_orig_object = camera_object_;
@@ -51,12 +52,23 @@ void Instance::init(const int2 &output_res,
drw_view = drw_view_;
v3d = v3d_;
rv3d = rv3d_;
+ manager = DRW_manager_get();
+
+ if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
+ sampling.reset();
+ }
info = "";
update_eval_members();
- main_view.init(output_res);
+ sampling.init(scene);
+ camera.init();
+ film.init(output_res, output_rect);
+ velocity.init();
+ depth_of_field.init();
+ motion_blur.init();
+ main_view.init();
}
void Instance::set_time(float time)
@@ -88,16 +100,23 @@ void Instance::update_eval_members()
void Instance::begin_sync()
{
materials.begin_sync();
- velocity.begin_sync();
+ velocity.begin_sync(); /* NOTE: Also syncs camera. */
+ lights.begin_sync();
+
+ gpencil_engine_enabled = false;
+ depth_of_field.sync();
+ motion_blur.sync();
+ hiz_buffer.sync();
pipelines.sync();
main_view.sync();
world.sync();
+ film.sync();
}
void Instance::object_sync(Object *ob)
{
- const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH);
+ const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH, OB_LAMP);
const int ob_visibility = DRW_object_visibility_in_active_context(ob);
const bool partsys_is_visible = (ob_visibility & OB_VISIBLE_PARTICLES) != 0 &&
(ob->type == OB_MESH);
@@ -108,12 +127,16 @@ void Instance::object_sync(Object *ob)
return;
}
+ /* TODO cleanup. */
+ ObjectRef ob_ref = DRW_object_ref_get(ob);
+ ResourceHandle res_handle = manager->resource_handle(ob_ref);
+
ObjectHandle &ob_handle = sync.sync_object(ob);
if (partsys_is_visible && ob != DRW_context_state_get()->object_edit) {
LISTBASE_FOREACH (ModifierData *, md, &ob->modifiers) {
if (md->type == eModifierType_ParticleSystem) {
- sync.sync_curves(ob, ob_handle, md);
+ sync.sync_curves(ob, ob_handle, res_handle, md);
}
}
}
@@ -121,22 +144,18 @@ void Instance::object_sync(Object *ob)
if (object_is_visible) {
switch (ob->type) {
case OB_LAMP:
+ lights.sync_light(ob, ob_handle);
break;
case OB_MESH:
- case OB_CURVES_LEGACY:
- case OB_SURF:
- case OB_FONT:
- case OB_MBALL: {
- sync.sync_mesh(ob, ob_handle);
+ sync.sync_mesh(ob, ob_handle, res_handle, ob_ref);
break;
- }
case OB_VOLUME:
break;
case OB_CURVES:
- sync.sync_curves(ob, ob_handle);
+ sync.sync_curves(ob, ob_handle, res_handle);
break;
case OB_GPENCIL:
- sync.sync_gpencil(ob, ob_handle);
+ sync.sync_gpencil(ob, ob_handle, res_handle);
break;
default:
break;
@@ -146,13 +165,37 @@ void Instance::object_sync(Object *ob)
ob_handle.reset_recalc_flag();
}
+/* Wrapper to use with DRW_render_object_iter. */
+void Instance::object_sync_render(void *instance_,
+ Object *ob,
+ RenderEngine *engine,
+ Depsgraph *depsgraph)
+{
+ UNUSED_VARS(engine, depsgraph);
+ Instance &inst = *reinterpret_cast<Instance *>(instance_);
+ inst.object_sync(ob);
+}
+
void Instance::end_sync()
{
velocity.end_sync();
+ lights.end_sync();
+ sampling.end_sync();
+ film.end_sync();
}
void Instance::render_sync()
{
+ DRW_cache_restart();
+
+ begin_sync();
+ DRW_render_object_iter(this, render, depsgraph, object_sync_render);
+ end_sync();
+
+ DRW_render_instance_buffer_finish();
+ /* Also we weed to have a correct FBO bound for #DRW_hair_update */
+ // GPU_framebuffer_bind();
+ // DRW_hair_update();
}
/** \} */
@@ -167,7 +210,57 @@ void Instance::render_sync()
**/
void Instance::render_sample()
{
+ if (sampling.finished_viewport()) {
+ film.display();
+ return;
+ }
+
+ /* Motion blur may need to do re-sync after a certain number of sample. */
+ if (!is_viewport() && sampling.do_render_sync()) {
+ render_sync();
+ }
+
+ sampling.step();
+
main_view.render();
+
+ motion_blur.step();
+}
+
+void Instance::render_read_result(RenderLayer *render_layer, const char *view_name)
+{
+ eViewLayerEEVEEPassType pass_bits = film.enabled_passes_get();
+ for (auto i : IndexRange(EEVEE_RENDER_PASS_MAX_BIT)) {
+ eViewLayerEEVEEPassType pass_type = eViewLayerEEVEEPassType(pass_bits & (1 << i));
+ if (pass_type == 0) {
+ continue;
+ }
+
+ const char *pass_name = Film::pass_to_render_pass_name(pass_type);
+ RenderPass *rp = RE_pass_find_by_name(render_layer, pass_name, view_name);
+ if (rp) {
+ float *result = film.read_pass(pass_type);
+ if (result) {
+ BLI_mutex_lock(&render->update_render_passes_mutex);
+ /* WORKAROUND: We use texture read to avoid using a framebuffer to get the render result.
+ * However, on some implementation, we need a buffer with a few extra bytes for the read to
+ * happen correctly (see GLTexture::read()). So we need a custom memory allocation. */
+ /* Avoid memcpy(), replace the pointer directly. */
+ MEM_SAFE_FREE(rp->rect);
+ rp->rect = result;
+ BLI_mutex_unlock(&render->update_render_passes_mutex);
+ }
+ }
+ }
+
+ /* The vector pass is initialized to weird values. Set it to neutral value if not rendered. */
+ if ((pass_bits & EEVEE_RENDER_PASS_VECTOR) == 0) {
+ const char *vector_pass_name = Film::pass_to_render_pass_name(EEVEE_RENDER_PASS_VECTOR);
+ RenderPass *vector_rp = RE_pass_find_by_name(render_layer, vector_pass_name, view_name);
+ if (vector_rp) {
+ memset(vector_rp->rect, 0, sizeof(float) * 4 * vector_rp->rectx * vector_rp->recty);
+ }
+ }
}
/** \} */
@@ -178,7 +271,26 @@ void Instance::render_sample()
void Instance::render_frame(RenderLayer *render_layer, const char *view_name)
{
- UNUSED_VARS(render_layer, view_name);
+ while (!sampling.finished()) {
+ this->render_sample();
+
+ /* TODO(fclem) print progression. */
+#if 0
+ /* TODO(fclem): Does not currently work. But would be better to just display to 2D view like
+ * cycles does. */
+ if (G.background == false && first_read) {
+ /* Allow to preview the first sample. */
+ /* TODO(fclem): Might want to not do this during animation render to avoid too much stall. */
+ this->render_read_result(render_layer, view_name);
+ first_read = false;
+ DRW_render_context_disable(render->re);
+ /* Allow the 2D viewport to grab the ticket mutex to display the render. */
+ DRW_render_context_enable(render->re);
+ }
+#endif
+ }
+
+ this->render_read_result(render_layer, view_name);
}
void Instance::draw_viewport(DefaultFramebufferList *dfbl)
@@ -187,6 +299,13 @@ void Instance::draw_viewport(DefaultFramebufferList *dfbl)
render_sample();
velocity.step_swap();
+ /* Do not request redraw during viewport animation to lock the framerate to the animation
+ * playback rate. This is in order to preserve motion blur aspect and also to avoid TAA reset
+ * that can show flickering. */
+ if (!sampling.finished_viewport() && !DRW_state_is_playback()) {
+ DRW_viewport_request_redraw();
+ }
+
if (materials.queued_shaders_count > 0) {
std::stringstream ss;
ss << "Compiling Shaders " << materials.queued_shaders_count;
diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh
index 84be59fc5f0..4ab20d540bf 100644
--- a/source/blender/draw/engines/eevee_next/eevee_instance.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh
@@ -16,8 +16,15 @@
#include "DRW_render.h"
#include "eevee_camera.hh"
+#include "eevee_depth_of_field.hh"
+#include "eevee_film.hh"
+#include "eevee_hizbuffer.hh"
+#include "eevee_light.hh"
#include "eevee_material.hh"
+#include "eevee_motion_blur.hh"
#include "eevee_pipeline.hh"
+#include "eevee_renderbuffers.hh"
+#include "eevee_sampling.hh"
#include "eevee_shader.hh"
#include "eevee_sync.hh"
#include "eevee_view.hh"
@@ -31,19 +38,28 @@ namespace blender::eevee {
*/
class Instance {
friend VelocityModule;
+ friend MotionBlurModule;
public:
ShaderModule &shaders;
SyncModule sync;
MaterialModule materials;
PipelineModule pipelines;
+ LightModule lights;
VelocityModule velocity;
+ MotionBlurModule motion_blur;
+ DepthOfField depth_of_field;
+ HiZBuffer hiz_buffer;
+ Sampling sampling;
Camera camera;
+ Film film;
+ RenderBuffers render_buffers;
MainView main_view;
World world;
/** Input data. */
Depsgraph *depsgraph;
+ Manager *manager;
/** Evaluated IDs. */
Scene *scene;
ViewLayer *view_layer;
@@ -57,8 +73,13 @@ class Instance {
const View3D *v3d;
const RegionView3D *rv3d;
- /* Info string displayed at the top of the render / viewport. */
+ /** True if the grease pencil engine might be running. */
+ bool gpencil_engine_enabled;
+
+ /** Info string displayed at the top of the render / viewport. */
std::string info = "";
+ /** Debug mode from debug value. */
+ eDebugMode debug_mode = eDebugMode::DEBUG_NONE;
public:
Instance()
@@ -66,8 +87,15 @@ class Instance {
sync(*this),
materials(*this),
pipelines(*this),
+ lights(*this),
velocity(*this),
+ motion_blur(*this),
+ depth_of_field(*this),
+ hiz_buffer(*this),
+ sampling(*this),
camera(*this),
+ film(*this),
+ render_buffers(*this),
main_view(*this),
world(*this){};
~Instance(){};
@@ -92,12 +120,17 @@ class Instance {
void draw_viewport(DefaultFramebufferList *dfbl);
- bool is_viewport(void)
+ bool is_viewport() const
+ {
+ return render == nullptr;
+ }
+
+ bool overlays_enabled() const
{
- return !DRW_state_is_scene_render();
+ return v3d && ((v3d->flag2 & V3D_HIDE_OVERLAYS) == 0);
}
- bool use_scene_lights(void) const
+ bool use_scene_lights() const
{
return (!v3d) ||
((v3d->shading.type == OB_MATERIAL) &&
@@ -107,7 +140,7 @@ class Instance {
}
/* Light the scene using the selected HDRI in the viewport shading pop-over. */
- bool use_studio_light(void) const
+ bool use_studio_light() const
{
return (v3d) && (((v3d->shading.type == OB_MATERIAL) &&
((v3d->shading.flag & V3D_SHADING_SCENE_WORLD) == 0)) ||
@@ -116,7 +149,12 @@ class Instance {
}
private:
+ static void object_sync_render(void *instance_,
+ Object *ob,
+ RenderEngine *engine,
+ Depsgraph *depsgraph);
void render_sample();
+ void render_read_result(RenderLayer *render_layer, const char *view_name);
void mesh_sync(Object *ob, ObjectHandle &ob_handle);
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc
new file mode 100644
index 00000000000..b60246fa3ab
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ */
+
+#include "draw_debug.hh"
+
+#include "eevee_instance.hh"
+
+#include "eevee_light.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name LightData
+ * \{ */
+
+static eLightType to_light_type(short blender_light_type, short blender_area_type)
+{
+ switch (blender_light_type) {
+ default:
+ case LA_LOCAL:
+ return LIGHT_POINT;
+ case LA_SUN:
+ return LIGHT_SUN;
+ case LA_SPOT:
+ return LIGHT_SPOT;
+ case LA_AREA:
+ return ELEM(blender_area_type, LA_AREA_DISK, LA_AREA_ELLIPSE) ? LIGHT_ELLIPSE : LIGHT_RECT;
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+void Light::sync(/* ShadowModule &shadows , */ const Object *ob, float threshold)
+{
+ const ::Light *la = (const ::Light *)ob->data;
+ float scale[3];
+
+ float max_power = max_fff(la->r, la->g, la->b) * fabsf(la->energy / 100.0f);
+ float surface_max_power = max_ff(la->diff_fac, la->spec_fac) * max_power;
+ float volume_max_power = la->volume_fac * max_power;
+
+ float influence_radius_surface = attenuation_radius_get(la, threshold, surface_max_power);
+ float influence_radius_volume = attenuation_radius_get(la, threshold, volume_max_power);
+
+ this->influence_radius_max = max_ff(influence_radius_surface, influence_radius_volume);
+ this->influence_radius_invsqr_surface = 1.0f / square_f(max_ff(influence_radius_surface, 1e-8f));
+ this->influence_radius_invsqr_volume = 1.0f / square_f(max_ff(influence_radius_volume, 1e-8f));
+
+ this->color = float3(&la->r) * la->energy;
+ normalize_m4_m4_ex(this->object_mat.ptr(), ob->obmat, scale);
+ /* Make sure we have consistent handedness (in case of negatively scaled Z axis). */
+ float3 cross = math::cross(float3(this->_right), float3(this->_up));
+ if (math::dot(cross, float3(this->_back)) < 0.0f) {
+ negate_v3(this->_up);
+ }
+
+ shape_parameters_set(la, scale);
+
+ float shape_power = shape_power_get(la);
+ float point_power = point_power_get(la);
+ this->diffuse_power = la->diff_fac * shape_power;
+ this->transmit_power = la->diff_fac * point_power;
+ this->specular_power = la->spec_fac * shape_power;
+ this->volume_power = la->volume_fac * point_power;
+
+ eLightType new_type = to_light_type(la->type, la->area_shape);
+ if (this->type != new_type) {
+ /* shadow_discard_safe(shadows); */
+ this->type = new_type;
+ }
+
+#if 0
+ if (la->mode & LA_SHADOW) {
+ if (la->type == LA_SUN) {
+ if (this->shadow_id == LIGHT_NO_SHADOW) {
+ this->shadow_id = shadows.directionals.alloc();
+ }
+
+ ShadowDirectional &shadow = shadows.directionals[this->shadow_id];
+ shadow.sync(this->object_mat, la->bias * 0.05f, 1.0f);
+ }
+ else {
+ float cone_aperture = DEG2RAD(360.0);
+ if (la->type == LA_SPOT) {
+ cone_aperture = min_ff(DEG2RAD(179.9), la->spotsize);
+ }
+ else if (la->type == LA_AREA) {
+ cone_aperture = DEG2RAD(179.9);
+ }
+
+ if (this->shadow_id == LIGHT_NO_SHADOW) {
+ this->shadow_id = shadows.punctuals.alloc();
+ }
+
+ ShadowPunctual &shadow = shadows.punctuals[this->shadow_id];
+ shadow.sync(this->type,
+ this->object_mat,
+ cone_aperture,
+ la->clipsta,
+ this->influence_radius_max,
+ la->bias * 0.05f);
+ }
+ }
+ else {
+ shadow_discard_safe(shadows);
+ }
+#endif
+
+ this->initialized = true;
+}
+
+#if 0
+void Light::shadow_discard_safe(ShadowModule &shadows)
+{
+ if (shadow_id != LIGHT_NO_SHADOW) {
+ if (this->type != LIGHT_SUN) {
+ shadows.punctuals.free(shadow_id);
+ }
+ else {
+ shadows.directionals.free(shadow_id);
+ }
+ shadow_id = LIGHT_NO_SHADOW;
+ }
+}
+#endif
+
+/* Returns attenuation radius inverted & squared for easy bound checking inside the shader. */
+float Light::attenuation_radius_get(const ::Light *la, float light_threshold, float light_power)
+{
+ if (la->type == LA_SUN) {
+ return (light_power > 1e-5f) ? 1e16f : 0.0f;
+ }
+
+ if (la->mode & LA_CUSTOM_ATTENUATION) {
+ return la->att_dist;
+ }
+ /* Compute the distance (using the inverse square law)
+ * at which the light power reaches the light_threshold. */
+ /* TODO take area light scale into account. */
+ return sqrtf(light_power / light_threshold);
+}
+
+void Light::shape_parameters_set(const ::Light *la, const float scale[3])
+{
+ if (la->type == LA_AREA) {
+ float area_size_y = (ELEM(la->area_shape, LA_AREA_RECT, LA_AREA_ELLIPSE)) ? la->area_sizey :
+ la->area_size;
+ _area_size_x = max_ff(0.003f, la->area_size * scale[0] * 0.5f);
+ _area_size_y = max_ff(0.003f, area_size_y * scale[1] * 0.5f);
+ /* For volume point lighting. */
+ radius_squared = max_ff(0.001f, hypotf(_area_size_x, _area_size_y) * 0.5f);
+ radius_squared = square_f(radius_squared);
+ }
+ else {
+ if (la->type == LA_SPOT) {
+ /* Spot size & blend */
+ spot_size_inv[0] = scale[2] / scale[0];
+ spot_size_inv[1] = scale[2] / scale[1];
+ float spot_size = cosf(la->spotsize * 0.5f);
+ float spot_blend = (1.0f - spot_size) * la->spotblend;
+ _spot_mul = 1.0f / max_ff(1e-8f, spot_blend);
+ _spot_bias = -spot_size * _spot_mul;
+ spot_tan = tanf(min_ff(la->spotsize * 0.5f, M_PI_2 - 0.0001f));
+ }
+
+ if (la->type == LA_SUN) {
+ _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f);
+ }
+ else {
+ _area_size_x = la->area_size;
+ }
+ _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x);
+ radius_squared = square_f(_area_size_x);
+ }
+}
+
+float Light::shape_power_get(const ::Light *la)
+{
+ /* Make illumination power constant */
+ switch (la->type) {
+ case LA_AREA: {
+ float area = _area_size_x * _area_size_y;
+ float power = 1.0f / (area * 4.0f * float(M_PI));
+ /* FIXME : Empirical, Fit cycles power */
+ power *= 0.8f;
+ if (ELEM(la->area_shape, LA_AREA_DISK, LA_AREA_ELLIPSE)) {
+ /* Scale power to account for the lower area of the ellipse compared to the surrounding
+ * rectangle. */
+ power *= 4.0f / M_PI;
+ }
+ return power;
+ }
+ case LA_SPOT:
+ case LA_LOCAL: {
+ return 1.0f / (4.0f * square_f(_radius) * float(M_PI * M_PI));
+ }
+ default:
+ case LA_SUN: {
+ float power = 1.0f / (square_f(_radius) * float(M_PI));
+ /* Make illumination power closer to cycles for bigger radii. Cycles uses a cos^3 term that
+ * we cannot reproduce so we account for that by scaling the light power. This function is
+ * the result of a rough manual fitting. */
+ /* Simplification of: power *= 1 + r²/2 */
+ power += 1.0f / (2.0f * M_PI);
+
+ return power;
+ }
+ }
+}
+
+float Light::point_power_get(const ::Light *la)
+{
+ /* Volume light is evaluated as point lights. Remove the shape power. */
+ switch (la->type) {
+ case LA_AREA: {
+ /* Match cycles. Empirical fit... must correspond to some constant. */
+ float power = 0.0792f * M_PI;
+
+ /* This corrects for area light most representative point trick. The fit was found by
+ * reducing the average error compared to cycles. */
+ float area = _area_size_x * _area_size_y;
+ float tmp = M_PI_2 / (M_PI_2 + sqrtf(area));
+ /* Lerp between 1.0 and the limit (1 / pi). */
+ power *= tmp + (1.0f - tmp) * M_1_PI;
+
+ return power;
+ }
+ case LA_SPOT:
+ case LA_LOCAL: {
+ /* Match cycles. Empirical fit... must correspond to some constant. */
+ return 0.0792f;
+ }
+ default:
+ case LA_SUN: {
+ return 1.0f;
+ }
+ }
+}
+
+void Light::debug_draw()
+{
+#ifdef DEBUG
+ drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+void LightModule::begin_sync()
+{
+ use_scene_lights_ = inst_.use_scene_lights();
+
+ /* In begin_sync so it can be animated. */
+ if (assign_if_different(light_threshold_, max_ff(1e-16f, inst_.scene->eevee.light_threshold))) {
+ inst_.sampling.reset();
+ }
+
+ sun_lights_len_ = 0;
+ local_lights_len_ = 0;
+}
+
+void LightModule::sync_light(const Object *ob, ObjectHandle &handle)
+{
+ if (use_scene_lights_ == false) {
+ return;
+ }
+ Light &light = light_map_.lookup_or_add_default(handle.object_key);
+ light.used = true;
+ if (handle.recalc != 0 || !light.initialized) {
+ light.sync(/* inst_.shadows, */ ob, light_threshold_);
+ }
+ sun_lights_len_ += int(light.type == LIGHT_SUN);
+ local_lights_len_ += int(light.type != LIGHT_SUN);
+}
+
+void LightModule::end_sync()
+{
+ // ShadowModule &shadows = inst_.shadows;
+
+ /* NOTE: We resize this buffer before removing deleted lights. */
+ int lights_allocated = ceil_to_multiple_u(max_ii(light_map_.size(), 1), LIGHT_CHUNK);
+ light_buf_.resize(lights_allocated);
+
+ /* Track light deletion. */
+ Vector<ObjectKey, 0> deleted_keys;
+ /* Indices inside GPU data array. */
+ int sun_lights_idx = 0;
+ int local_lights_idx = sun_lights_len_;
+
+ /* Fill GPU data with scene data. */
+ for (auto item : light_map_.items()) {
+ Light &light = item.value;
+
+ if (!light.used) {
+ /* Deleted light. */
+ deleted_keys.append(item.key);
+ // light.shadow_discard_safe(shadows);
+ continue;
+ }
+
+ int dst_idx = (light.type == LIGHT_SUN) ? sun_lights_idx++ : local_lights_idx++;
+ /* Put all light data into global data SSBO. */
+ light_buf_[dst_idx] = light;
+
+#if 0
+ if (light.shadow_id != LIGHT_NO_SHADOW) {
+ if (light.type == LIGHT_SUN) {
+ light_buf_[dst_idx].shadow_data = shadows.directionals[light.shadow_id];
+ }
+ else {
+ light_buf_[dst_idx].shadow_data = shadows.punctuals[light.shadow_id];
+ }
+ }
+#endif
+ /* Untag for next sync. */
+ light.used = false;
+ }
+ /* This scene data buffer is then immutable after this point. */
+ light_buf_.push_update();
+
+ for (auto &key : deleted_keys) {
+ light_map_.remove(key);
+ }
+
+ /* Update sampling on deletion or un-hiding (use_scene_lights). */
+ if (assign_if_different(light_map_size_, light_map_.size())) {
+ inst_.sampling.reset();
+ }
+
+ /* If exceeding the limit, just trim off the excess to avoid glitchy rendering. */
+ if (sun_lights_len_ + local_lights_len_ > CULLING_MAX_ITEM) {
+ sun_lights_len_ = min_ii(sun_lights_len_, CULLING_MAX_ITEM);
+ local_lights_len_ = min_ii(local_lights_len_, CULLING_MAX_ITEM - sun_lights_len_);
+ inst_.info = "Error: Too many lights in the scene.";
+ }
+ lights_len_ = sun_lights_len_ + local_lights_len_;
+
+ /* Resize to the actual number of lights after pruning. */
+ lights_allocated = ceil_to_multiple_u(max_ii(lights_len_, 1), LIGHT_CHUNK);
+ culling_key_buf_.resize(lights_allocated);
+ culling_zdist_buf_.resize(lights_allocated);
+ culling_light_buf_.resize(lights_allocated);
+
+ {
+ /* Compute tile size and total word count. */
+ uint word_per_tile = divide_ceil_u(max_ii(lights_len_, 1), 32);
+ int2 render_extent = inst_.film.render_extent_get();
+ int2 tiles_extent;
+ /* Default to 32 as this is likely to be the maximum
+ * tile size used by hardware or compute shading. */
+ uint tile_size = 16;
+ do {
+ tile_size *= 2;
+ tiles_extent = math::divide_ceil(render_extent, int2(tile_size));
+ uint tile_count = tiles_extent.x * tiles_extent.y;
+ if (tile_count > max_tile_count_threshold) {
+ continue;
+ }
+ total_word_count_ = tile_count * word_per_tile;
+
+ } while (total_word_count_ > max_word_count_threshold);
+ /* Keep aligned with storage buffer requirements. */
+ total_word_count_ = ceil_to_multiple_u(total_word_count_, 32);
+
+ culling_data_buf_.tile_word_len = word_per_tile;
+ culling_data_buf_.tile_size = tile_size;
+ culling_data_buf_.tile_x_len = tiles_extent.x;
+ culling_data_buf_.tile_y_len = tiles_extent.y;
+ culling_data_buf_.items_count = lights_len_;
+ culling_data_buf_.local_lights_len = local_lights_len_;
+ culling_data_buf_.sun_lights_len = sun_lights_len_;
+ }
+ culling_tile_buf_.resize(total_word_count_);
+
+ culling_pass_sync();
+ debug_pass_sync();
+}
+
+void LightModule::culling_pass_sync()
+{
+ uint safe_lights_len = max_ii(lights_len_, 1);
+ uint culling_select_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SELECT_GROUP_SIZE);
+ uint culling_sort_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SORT_GROUP_SIZE);
+ uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE);
+
+ /* NOTE: We reference the buffers that may be resized or updated later. */
+
+ culling_ps_.init();
+ {
+ auto &sub = culling_ps_.sub("Select");
+ sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT));
+ sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+ sub.bind_ssbo("in_light_buf", light_buf_);
+ sub.bind_ssbo("out_light_buf", culling_light_buf_);
+ sub.bind_ssbo("out_zdist_buf", culling_zdist_buf_);
+ sub.bind_ssbo("out_key_buf", culling_key_buf_);
+ sub.dispatch(int3(culling_select_dispatch_size, 1, 1));
+ sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ auto &sub = culling_ps_.sub("Sort");
+ sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SORT));
+ sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+ sub.bind_ssbo("in_light_buf", light_buf_);
+ sub.bind_ssbo("out_light_buf", culling_light_buf_);
+ sub.bind_ssbo("in_zdist_buf", culling_zdist_buf_);
+ sub.bind_ssbo("in_key_buf", culling_key_buf_);
+ sub.dispatch(int3(culling_sort_dispatch_size, 1, 1));
+ sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ auto &sub = culling_ps_.sub("Zbin");
+ sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN));
+ sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+ sub.bind_ssbo("light_buf", culling_light_buf_);
+ sub.bind_ssbo("out_zbin_buf", culling_zbin_buf_);
+ sub.dispatch(int3(1, 1, 1));
+ sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ auto &sub = culling_ps_.sub("Tiles");
+ sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_TILE));
+ sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
+ sub.bind_ssbo("light_buf", culling_light_buf_);
+ sub.bind_ssbo("out_light_tile_buf", culling_tile_buf_);
+ sub.dispatch(int3(culling_tile_dispatch_size, 1, 1));
+ sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+}
+
+void LightModule::debug_pass_sync()
+{
+ if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
+ debug_draw_ps_.init();
+ debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
+ debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG));
+ inst_.hiz_buffer.bind_resources(&debug_draw_ps_);
+ debug_draw_ps_.bind_ssbo("light_buf", &culling_light_buf_);
+ debug_draw_ps_.bind_ssbo("light_cull_buf", &culling_data_buf_);
+ debug_draw_ps_.bind_ssbo("light_zbin_buf", &culling_zbin_buf_);
+ debug_draw_ps_.bind_ssbo("light_tile_buf", &culling_tile_buf_);
+ debug_draw_ps_.bind_texture("depth_tx", &inst_.render_buffers.depth_tx);
+ debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+ }
+}
+
+void LightModule::set_view(View &view, const int2 extent)
+{
+ float far_z = view.far_clip();
+ float near_z = view.near_clip();
+
+ culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
+ culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale;
+ culling_data_buf_.tile_to_uv_fac = (culling_data_buf_.tile_size / float2(extent));
+ culling_data_buf_.visible_count = 0;
+ culling_data_buf_.push_update();
+
+ inst_.manager->submit(culling_ps_, view);
+}
+
+void LightModule::debug_draw(View &view, GPUFrameBuffer *view_fb)
+{
+ if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
+ inst_.info = "Debug Mode: Light Culling Validation";
+ inst_.hiz_buffer.update();
+ GPU_framebuffer_bind(view_fb);
+ inst_.manager->submit(debug_draw_ps_, view);
+ }
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_light.hh b/source/blender/draw/engines/eevee_next/eevee_light.hh
new file mode 100644
index 00000000000..9bacc180ea8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_light.hh
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * The light module manages light data buffers and light culling system.
+ *
+ * The culling follows the principles of Tiled Culling + Z binning from:
+ * "Improved Culling for Tiled and Clustered Rendering"
+ * by Michal Drobot
+ * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
+ *
+ * The culling is separated in 4 compute phases:
+ * - View Culling (select pass): Create a z distance and a index buffer of visible lights.
+ * - Light sorting: Outputs visible lights sorted by Z distance.
+ * - Z binning: Compute the Z bins min/max light indices.
+ * - Tile intersection: Fine grained 2D culling of each lights outputting a bitmap per tile.
+ */
+
+#pragma once
+
+#include "BLI_bitmap.h"
+#include "BLI_vector.hh"
+#include "DNA_light_types.h"
+
+#include "eevee_camera.hh"
+#include "eevee_sampling.hh"
+#include "eevee_shader.hh"
+#include "eevee_shader_shared.hh"
+#include "eevee_sync.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+/* -------------------------------------------------------------------- */
+/** \name Light Object
+ * \{ */
+
+struct Light : public LightData {
+ public:
+ bool initialized = false;
+ bool used = false;
+
+ public:
+ Light()
+ {
+ shadow_id = LIGHT_NO_SHADOW;
+ }
+
+ void sync(/* ShadowModule &shadows, */ const Object *ob, float threshold);
+
+ // void shadow_discard_safe(ShadowModule &shadows);
+
+ void debug_draw();
+
+ private:
+ float attenuation_radius_get(const ::Light *la, float light_threshold, float light_power);
+ void shape_parameters_set(const ::Light *la, const float scale[3]);
+ float shape_power_get(const ::Light *la);
+ float point_power_get(const ::Light *la);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name LightModule
+ * \{ */
+
+/**
+ * The light module manages light data buffers and light culling system.
+ */
+class LightModule {
+ // friend ShadowModule;
+
+ private:
+ /* Keep tile count reasonable for memory usage and 2D culling performance. */
+ static constexpr uint max_memory_threshold = 32 * 1024 * 1024; /* 32 MiB */
+ static constexpr uint max_word_count_threshold = max_memory_threshold / sizeof(uint);
+ static constexpr uint max_tile_count_threshold = 8192;
+
+ Instance &inst_;
+
+ /** Map of light objects data. Converted to flat array each frame. */
+ Map<ObjectKey, Light> light_map_;
+ /** Flat array sent to GPU, populated from light_map_. Source buffer for light culling. */
+ LightDataBuf light_buf_ = {"Lights_no_cull"};
+ /** Recorded size of light_map_ (after pruning) to detect deletion. */
+ int64_t light_map_size_ = 0;
+ /** Luminous intensity to consider the light boundary at. Used for culling. */
+ float light_threshold_ = 0.01f;
+ /** If false, will prevent all scene light from being synced. */
+ bool use_scene_lights_ = false;
+ /** Number of sun lights synced during the last sync. Used as offset. */
+ int sun_lights_len_ = 0;
+ int local_lights_len_ = 0;
+ /** Sun plus local lights count for convenience. */
+ int lights_len_ = 0;
+
+ /**
+ * Light Culling
+ */
+
+ /** LightData buffer used for rendering. Filled by the culling pass. */
+ LightDataBuf culling_light_buf_ = {"Lights_culled"};
+ /** Culling infos. */
+ LightCullingDataBuf culling_data_buf_ = {"LightCull_data"};
+ /** Z-distance matching the key for each visible lights. Used for sorting. */
+ LightCullingZdistBuf culling_zdist_buf_ = {"LightCull_zdist"};
+ /** Key buffer containing only visible lights indices. Used for sorting. */
+ LightCullingKeyBuf culling_key_buf_ = {"LightCull_key"};
+ /** Zbins containing min and max light index for each Z bin. */
+ LightCullingZbinBuf culling_zbin_buf_ = {"LightCull_zbin"};
+ /** Bitmap of lights touching each tiles. */
+ LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"};
+ /** Culling compute passes. */
+ PassSimple culling_ps_ = {"LightCulling"};
+ /** Total number of words the tile buffer needs to contain for the render resolution. */
+ uint total_word_count_ = 0;
+
+ /** Debug Culling visualization. */
+ PassSimple debug_draw_ps_ = {"LightCulling.Debug"};
+
+ public:
+ LightModule(Instance &inst) : inst_(inst){};
+ ~LightModule(){};
+
+ void begin_sync();
+ void sync_light(const Object *ob, ObjectHandle &handle);
+ void end_sync();
+
+ /**
+ * Update acceleration structure for the given view.
+ */
+ void set_view(View &view, const int2 extent);
+
+ void debug_draw(View &view, GPUFrameBuffer *view_fb);
+
+ void bind_resources(DRWShadingGroup *grp)
+ {
+ DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
+ DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_);
+#if 0
+ DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get());
+ DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get());
+#endif
+ }
+
+ template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+ {
+ /* Storage Buf. */
+ pass->bind_ssbo(LIGHT_CULL_BUF_SLOT, &culling_data_buf_);
+ pass->bind_ssbo(LIGHT_BUF_SLOT, &culling_light_buf_);
+ pass->bind_ssbo(LIGHT_ZBIN_BUF_SLOT, &culling_zbin_buf_);
+ pass->bind_ssbo(LIGHT_TILE_BUF_SLOT, &culling_tile_buf_);
+ }
+
+ private:
+ void culling_pass_sync();
+ void debug_pass_sync();
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_material.cc b/source/blender/draw/engines/eevee_next/eevee_material.cc
index 1676c89d679..a92f96e8c70 100644
--- a/source/blender/draw/engines/eevee_next/eevee_material.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_material.cc
@@ -72,10 +72,9 @@ bNodeTree *DefaultSurfaceNodeTree::nodetree_get(::Material *ma)
MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
{
{
- bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
diffuse_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default diffuse");
- diffuse_mat->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ nullptr, &diffuse_mat->id, "Shader Nodetree", ntreeType_Shader->idname);
diffuse_mat->use_nodes = true;
/* To use the forward pipeline. */
diffuse_mat->blend_method = MA_BM_BLEND;
@@ -95,10 +94,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
nodeSetActive(ntree, output);
}
{
- bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
glossy_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default metal");
- glossy_mat->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ nullptr, &glossy_mat->id, "Shader Nodetree", ntreeType_Shader->idname);
glossy_mat->use_nodes = true;
/* To use the forward pipeline. */
glossy_mat->blend_method = MA_BM_BLEND;
@@ -120,10 +118,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
nodeSetActive(ntree, output);
}
{
- bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname);
-
error_mat_ = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default error");
- error_mat_->nodetree = ntree;
+ bNodeTree *ntree = ntreeAddTreeEmbedded(
+ nullptr, &error_mat_->id, "Shader Nodetree", ntreeType_Shader->idname);
error_mat_->use_nodes = true;
/* Use emission and output material to be compatible with both World and Material. */
@@ -145,9 +142,6 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
MaterialModule::~MaterialModule()
{
- for (Material *mat : material_map_.values()) {
- delete mat;
- }
BKE_id_free(nullptr, glossy_mat);
BKE_id_free(nullptr, diffuse_mat);
BKE_id_free(nullptr, error_mat_);
@@ -157,13 +151,12 @@ void MaterialModule::begin_sync()
{
queued_shaders_count = 0;
- for (Material *mat : material_map_.values()) {
- mat->init = false;
- }
+ material_map_.clear();
shader_map_.clear();
}
-MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
+MaterialPass MaterialModule::material_pass_get(Object *ob,
+ ::Material *blender_mat,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type)
{
@@ -195,7 +188,7 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
BLI_assert(GPU_material_status(matpass.gpumat) == GPU_MAT_SUCCESS);
if (GPU_material_recalc_flag_get(matpass.gpumat)) {
- // inst_.sampling.reset();
+ inst_.sampling.reset();
}
if ((pipeline_type == MAT_PIPE_DEFERRED) &&
@@ -203,35 +196,34 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
pipeline_type = MAT_PIPE_FORWARD;
}
- if ((pipeline_type == MAT_PIPE_FORWARD) &&
+ if (ELEM(pipeline_type,
+ MAT_PIPE_FORWARD,
+ MAT_PIPE_FORWARD_PREPASS,
+ MAT_PIPE_FORWARD_PREPASS_VELOCITY) &&
GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT)) {
- /* Transparent needs to use one shgroup per object to support reordering. */
- matpass.shgrp = inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
+ /* Transparent pass is generated later. */
+ matpass.sub_pass = nullptr;
}
else {
ShaderKey shader_key(matpass.gpumat, geometry_type, pipeline_type);
- auto add_cb = [&]() -> DRWShadingGroup * {
- /* First time encountering this shader. Create a shading group. */
- return inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
- };
- DRWShadingGroup *grp = shader_map_.lookup_or_add_cb(shader_key, add_cb);
-
- if (grp != nullptr) {
- /* Shading group for this shader already exists. Create a sub one for this material. */
- /* IMPORTANT: We always create a subgroup so that all subgroups are inserted after the
- * first "empty" shgroup. This avoids messing the order of subgroups when there is more
- * nested subgroup (i.e: hair drawing). */
- /* TODO(@fclem): Remove material resource binding from the first group creation. */
- matpass.shgrp = DRW_shgroup_create_sub(grp);
- DRW_shgroup_add_material_resources(matpass.shgrp, matpass.gpumat);
+ PassMain::Sub *shader_sub = shader_map_.lookup_or_add_cb(shader_key, [&]() {
+ /* First time encountering this shader. Create a sub that will contain materials using it. */
+ return inst_.pipelines.material_add(ob, blender_mat, matpass.gpumat, pipeline_type);
+ });
+
+ if (shader_sub != nullptr) {
+ /* Create a sub for this material as `shader_sub` is for sharing shader between materials. */
+ matpass.sub_pass = &shader_sub->sub(GPU_material_get_name(matpass.gpumat));
+ matpass.sub_pass->material_set(*inst_.manager, matpass.gpumat);
}
}
return matpass;
}
-Material &MaterialModule::material_sync(::Material *blender_mat,
+Material &MaterialModule::material_sync(Object *ob,
+ ::Material *blender_mat,
eMaterialGeometry geometry_type,
bool has_motion)
{
@@ -249,27 +241,32 @@ Material &MaterialModule::material_sync(::Material *blender_mat,
MaterialKey material_key(blender_mat, geometry_type, surface_pipe);
- /* TODO: allocate in blocks to avoid memory fragmentation. */
- auto add_cb = [&]() { return new Material(); };
- Material &mat = *material_map_.lookup_or_add_cb(material_key, add_cb);
-
- /* Forward pipeline needs to use one shgroup per object. */
- if (mat.init == false || (surface_pipe == MAT_PIPE_FORWARD)) {
- mat.init = true;
+ Material &mat = material_map_.lookup_or_add_cb(material_key, [&]() {
+ Material mat;
/* Order is important for transparent. */
- mat.prepass = material_pass_get(blender_mat, prepass_pipe, geometry_type);
- mat.shading = material_pass_get(blender_mat, surface_pipe, geometry_type);
+ mat.prepass = material_pass_get(ob, blender_mat, prepass_pipe, geometry_type);
+ mat.shading = material_pass_get(ob, blender_mat, surface_pipe, geometry_type);
if (blender_mat->blend_shadow == MA_BS_NONE) {
mat.shadow = MaterialPass();
}
else {
- mat.shadow = material_pass_get(blender_mat, MAT_PIPE_SHADOW, geometry_type);
+ mat.shadow = material_pass_get(ob, blender_mat, MAT_PIPE_SHADOW, geometry_type);
}
-
mat.is_alpha_blend_transparent = (blender_mat->blend_method == MA_BM_BLEND) &&
- GPU_material_flag_get(mat.prepass.gpumat,
+ GPU_material_flag_get(mat.shading.gpumat,
GPU_MATFLAG_TRANSPARENT);
+ return mat;
+ });
+
+ if (mat.is_alpha_blend_transparent) {
+ /* Transparent needs to use one sub pass per object to support reordering.
+ * NOTE: Pre-pass needs to be created first in order to be sorted first. */
+ mat.prepass.sub_pass = inst_.pipelines.forward.prepass_transparent_add(
+ ob, blender_mat, mat.shading.gpumat);
+ mat.shading.sub_pass = inst_.pipelines.forward.material_transparent_add(
+ ob, blender_mat, mat.shading.gpumat);
}
+
return mat;
}
@@ -297,7 +294,7 @@ MaterialArray &MaterialModule::material_array_get(Object *ob, bool has_motion)
for (auto i : IndexRange(materials_len)) {
::Material *blender_mat = material_from_slot(ob, i);
- Material &mat = material_sync(blender_mat, to_material_geometry(ob), has_motion);
+ Material &mat = material_sync(ob, blender_mat, to_material_geometry(ob), has_motion);
material_array_.materials.append(&mat);
material_array_.gpu_materials.append(mat.shading.gpumat);
}
@@ -310,7 +307,7 @@ Material &MaterialModule::material_get(Object *ob,
eMaterialGeometry geometry_type)
{
::Material *blender_mat = material_from_slot(ob, mat_nr);
- Material &mat = material_sync(blender_mat, geometry_type, has_motion);
+ Material &mat = material_sync(ob, blender_mat, geometry_type, has_motion);
return mat;
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_material.hh b/source/blender/draw/engines/eevee_next/eevee_material.hh
index 23165a741b9..ad0c293926b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_material.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_material.hh
@@ -203,12 +203,11 @@ class DefaultSurfaceNodeTree {
* \{ */
struct MaterialPass {
- GPUMaterial *gpumat = nullptr;
- DRWShadingGroup *shgrp = nullptr;
+ GPUMaterial *gpumat;
+ PassMain::Sub *sub_pass;
};
struct Material {
- bool init = false;
bool is_alpha_blend_transparent;
MaterialPass shadow, shading, prepass;
};
@@ -228,8 +227,8 @@ class MaterialModule {
private:
Instance &inst_;
- Map<MaterialKey, Material *> material_map_;
- Map<ShaderKey, DRWShadingGroup *> shader_map_;
+ Map<MaterialKey, Material> material_map_;
+ Map<ShaderKey, PassMain::Sub *> shader_map_;
MaterialArray material_array_;
@@ -254,13 +253,15 @@ class MaterialModule {
Material &material_get(Object *ob, bool has_motion, int mat_nr, eMaterialGeometry geometry_type);
private:
- Material &material_sync(::Material *blender_mat,
+ Material &material_sync(Object *ob,
+ ::Material *blender_mat,
eMaterialGeometry geometry_type,
bool has_motion);
/** Return correct material or empty default material if slot is empty. */
::Material *material_from_slot(Object *ob, int slot);
- MaterialPass material_pass_get(::Material *blender_mat,
+ MaterialPass material_pass_get(Object *ob,
+ ::Material *blender_mat,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);
};
diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc
new file mode 100644
index 00000000000..f68abafa3d4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ */
+
+// #include "BLI_map.hh"
+#include "DEG_depsgraph_query.h"
+
+#include "eevee_instance.hh"
+#include "eevee_motion_blur.hh"
+// #include "eevee_sampling.hh"
+// #include "eevee_shader_shared.hh"
+// #include "eevee_velocity.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name MotionBlurModule
+ *
+ * \{ */
+
+void MotionBlurModule::init()
+{
+ const Scene *scene = inst_.scene;
+
+ enabled_ = (scene->eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) != 0;
+
+ if (!enabled_) {
+ motion_blur_fx_enabled_ = false;
+ return;
+ }
+
+ /* Take into account the steps needed for fx motion blur. */
+ int steps_count = max_ii(1, scene->eevee.motion_blur_steps) * 2 + 1;
+
+ time_steps_.resize(steps_count);
+
+ initial_frame_ = scene->r.cfra;
+ initial_subframe_ = scene->r.subframe;
+ frame_time_ = initial_frame_ + initial_subframe_;
+ shutter_position_ = scene->eevee.motion_blur_position;
+ shutter_time_ = scene->eevee.motion_blur_shutter;
+
+ data_.depth_scale = scene->eevee.motion_blur_depth_scale;
+ motion_blur_fx_enabled_ = true; /* TODO(fclem): UI option. */
+
+ /* Viewport stops here. We only do Post-FX motion blur. */
+ if (inst_.is_viewport()) {
+ enabled_ = false;
+ return;
+ }
+
+ /* Without this there is the possibility of the curve table not being allocated. */
+ BKE_curvemapping_changed((struct CurveMapping *)&scene->r.mblur_shutter_curve, false);
+
+ Vector<float> cdf(CM_TABLE);
+ Sampling::cdf_from_curvemapping(scene->r.mblur_shutter_curve, cdf);
+ Sampling::cdf_invert(cdf, time_steps_);
+
+ for (float &time : time_steps_) {
+ time = this->shutter_time_to_scene_time(time);
+ }
+
+ step_id_ = 1;
+
+ if (motion_blur_fx_enabled_) {
+ /* A bit weird but we have to sync the first 2 steps here because the step()
+ * function is only called after rendering a sample. */
+ inst_.velocity.step_sync(STEP_PREVIOUS, time_steps_[0]);
+ inst_.velocity.step_sync(STEP_NEXT, time_steps_[2]);
+ }
+ inst_.set_time(time_steps_[1]);
+}
+
+/* Runs after rendering a sample. */
+void MotionBlurModule::step()
+{
+ if (!enabled_) {
+ return;
+ }
+
+ if (inst_.sampling.finished()) {
+ /* Restore original frame number. This is because the render pipeline expects it. */
+ RE_engine_frame_set(inst_.render, initial_frame_, initial_subframe_);
+ }
+ else if (inst_.sampling.do_render_sync()) {
+ /* Time to change motion step. */
+ BLI_assert(time_steps_.size() > step_id_ + 2);
+ step_id_ += 2;
+
+ if (motion_blur_fx_enabled_) {
+ inst_.velocity.step_swap();
+ inst_.velocity.step_sync(eVelocityStep::STEP_NEXT, time_steps_[step_id_ + 1]);
+ }
+ inst_.set_time(time_steps_[step_id_]);
+ }
+}
+
+float MotionBlurModule::shutter_time_to_scene_time(float time)
+{
+ switch (shutter_position_) {
+ case SCE_EEVEE_MB_START:
+ /* No offset. */
+ break;
+ case SCE_EEVEE_MB_CENTER:
+ time -= 0.5f;
+ break;
+ case SCE_EEVEE_MB_END:
+ time -= 1.0;
+ break;
+ default:
+ BLI_assert(!"Invalid motion blur position enum!");
+ break;
+ }
+ time *= shutter_time_;
+ time += frame_time_;
+ return time;
+}
+
+void MotionBlurModule::sync()
+{
+ /* Disable motion blur in viewport when changing camera projection type.
+ * Avoids really high velocities. */
+ if (inst_.velocity.camera_changed_projection()) {
+ motion_blur_fx_enabled_ = false;
+ }
+
+ if (!motion_blur_fx_enabled_) {
+ return;
+ }
+
+ eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
+ RenderBuffers &render_buffers = inst_.render_buffers;
+
+ motion_blur_ps_.init();
+ inst_.velocity.bind_resources(&motion_blur_ps_);
+ inst_.sampling.bind_resources(&motion_blur_ps_);
+ {
+ /* Create max velocity tiles. */
+ PassSimple::Sub &sub = motion_blur_ps_.sub("TilesFlatten");
+ eShaderType shader = (inst_.is_viewport()) ? MOTION_BLUR_TILE_FLATTEN_VIEWPORT :
+ MOTION_BLUR_TILE_FLATTEN_RENDER;
+ sub.shader_set(inst_.shaders.static_shader_get(shader));
+ sub.bind_ubo("motion_blur_buf", data_);
+ sub.bind_texture("depth_tx", &render_buffers.depth_tx);
+ sub.bind_image("velocity_img", &render_buffers.vector_tx);
+ sub.bind_image("out_tiles_img", &tiles_tx_);
+ sub.dispatch(&dispatch_flatten_size_);
+ sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
+ }
+ {
+ /* Expand max velocity tiles by spreading them in their neighborhood. */
+ PassSimple::Sub &sub = motion_blur_ps_.sub("TilesDilate");
+ sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_TILE_DILATE));
+ sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
+ sub.bind_image("in_tiles_img", &tiles_tx_);
+ sub.dispatch(&dispatch_dilate_size_);
+ sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+ }
+ {
+ /* Do the motion blur gather algorithm. */
+ PassSimple::Sub &sub = motion_blur_ps_.sub("ConvolveGather");
+ sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_GATHER));
+ sub.bind_ubo("motion_blur_buf", data_);
+ sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
+ sub.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
+ sub.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
+ sub.bind_texture("in_color_tx", &input_color_tx_, no_filter);
+ sub.bind_image("in_tiles_img", &tiles_tx_);
+ sub.bind_image("out_color_img", &output_color_tx_);
+
+ sub.dispatch(&dispatch_gather_size_);
+ sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
+ }
+}
+
+void MotionBlurModule::render(View &view, GPUTexture **input_tx, GPUTexture **output_tx)
+{
+ if (!motion_blur_fx_enabled_) {
+ return;
+ }
+
+ const Texture &depth_tx = inst_.render_buffers.depth_tx;
+
+ int2 extent = {depth_tx.width(), depth_tx.height()};
+ int2 tiles_extent = math::divide_ceil(extent, int2(MOTION_BLUR_TILE_SIZE));
+
+ if (inst_.is_viewport()) {
+ float frame_delta = fabsf(inst_.velocity.step_time_delta_get(STEP_PREVIOUS, STEP_CURRENT));
+ /* Avoid highly disturbing blurs, during navigation with high shutter time. */
+ if (frame_delta > 0.0f && !DRW_state_is_navigating()) {
+ /* Rescale motion blur intensity to be shutter time relative and avoid long streak when we
+ * have frame skipping. Always try to stick to what the render frame would look like. */
+ data_.motion_scale = float2(shutter_time_ / frame_delta);
+ }
+ else {
+ /* There is no time change. Motion only comes from viewport navigation and object transform.
+ * Apply motion blur as smoothing and only blur towards last frame. */
+ data_.motion_scale = float2(1.0f, 0.0f);
+
+ if (was_navigating_ != DRW_state_is_navigating()) {
+ /* Special case for navigation events that only last for one frame (for instance mouse
+ * scroll for zooming). For this case we have to wait for the next frame before enabling
+ * the navigation motion blur. */
+ was_navigating_ = DRW_state_is_navigating();
+ return;
+ }
+ }
+ was_navigating_ = DRW_state_is_navigating();
+
+ /* Change texture swizzling to avoid complexity in gather pass shader. */
+ GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgrg");
+ }
+ else {
+ data_.motion_scale = float2(1.0f);
+ }
+ /* Second motion vector is stored inverted. */
+ data_.motion_scale.y = -data_.motion_scale.y;
+ data_.target_size_inv = 1.0f / float2(extent);
+ data_.push_update();
+
+ input_color_tx_ = *input_tx;
+ output_color_tx_ = *output_tx;
+
+ dispatch_flatten_size_ = int3(tiles_extent, 1);
+ dispatch_dilate_size_ = int3(math::divide_ceil(tiles_extent, int2(MOTION_BLUR_GROUP_SIZE)), 1);
+ dispatch_gather_size_ = int3(math::divide_ceil(extent, int2(MOTION_BLUR_GROUP_SIZE)), 1);
+
+ DRW_stats_group_start("Motion Blur");
+
+ tiles_tx_.acquire(tiles_extent, GPU_RGBA16F);
+
+ GPU_storagebuf_clear_to_zero(tile_indirection_buf_);
+
+ inst_.manager->submit(motion_blur_ps_, view);
+
+ tiles_tx_.release();
+
+ DRW_stats_group_end();
+
+ if (inst_.is_viewport()) {
+ /* Reset swizzle since this texture might be reused in other places. */
+ GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgba");
+ }
+
+ /* Swap buffers so that next effect has the right input. */
+ *input_tx = output_color_tx_;
+ *output_tx = input_color_tx_;
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh
new file mode 100644
index 00000000000..056c2e323d5
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Motion blur is done by accumulating scene samples over shutter time.
+ * Since the number of step is discrete, quite low, and not per pixel randomized,
+ * we couple this with a post processing motion blur.
+ *
+ * The post-fx motion blur is done in two directions, from the previous step and to the next.
+ *
+ * For a scene with 3 motion steps, a flat shutter curve and shutter time of 2 frame
+ * centered on frame we have:
+ *
+ * |--------------------|--------------------|
+ * -1 0 1 Frames
+ *
+ * |-------------|-------------|-------------|
+ * 1 2 3 Motion steps
+ *
+ * |------|------|------|------|------|------|
+ * 0 1 2 4 5 6 7 Time Steps
+ *
+ * |-------------| One motion step blurs this range.
+ * -1 | +1 Objects and geometry steps are recorded here.
+ * 0 Scene is rendered here.
+ *
+ * Since motion step N and N+1 share one time step we reuse it to avoid an extra scene evaluation.
+ *
+ * Note that we have to evaluate -1 and +1 time steps before rendering so eval order is -1, +1, 0.
+ * This is because all GPUBatches from the DRWCache are being free when changing a frame.
+ *
+ * For viewport, we only have the current and previous step data to work with. So we center the
+ * blur on the current frame and extrapolate the motion.
+ *
+ * The Post-FX motion blur is based on:
+ * "A Fast and Stable Feature-Aware Motion Blur Filter"
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ */
+
+#pragma once
+
+#include "BLI_map.hh"
+#include "DEG_depsgraph_query.h"
+
+#include "eevee_sampling.hh"
+#include "eevee_shader_shared.hh"
+#include "eevee_velocity.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name MotionBlur
+ *
+ * \{ */
+
+/**
+ * Manages time-steps evaluations and accumulation Motion blur.
+ * Also handles Post process motion blur.
+ */
+class MotionBlurModule {
+ private:
+ Instance &inst_;
+
+ /**
+ * Array containing all steps (in scene time) we need to evaluate (not render).
+ * Only odd steps are rendered. The even ones are evaluated for fx motion blur.
+ */
+ Vector<float> time_steps_;
+
+ /** Copy of input frame and sub-frame to restore after render. */
+ int initial_frame_;
+ float initial_subframe_;
+ /** Time of the frame we are rendering. */
+ float frame_time_;
+ /** Enum controlling when the shutter opens. See SceneEEVEE.motion_blur_position. */
+ int shutter_position_;
+ /** Time in scene frame the shutter is open. Controls the amount of blur. */
+ float shutter_time_;
+
+ /** True if motion blur is enabled as a module. */
+ bool enabled_ = false;
+ /** True if motion blur post-fx is enabled. */
+ float motion_blur_fx_enabled_ = false;
+ /** True if last viewport redraw state was already in navigation state. */
+ bool was_navigating_ = false;
+
+ int step_id_ = 0;
+
+ /** Velocity tiles used to guide and speedup the gather pass. */
+ TextureFromPool tiles_tx_;
+
+ GPUTexture *input_color_tx_ = nullptr;
+ GPUTexture *output_color_tx_ = nullptr;
+
+ PassSimple motion_blur_ps_ = {"MotionBlur"};
+
+ MotionBlurTileIndirectionBuf tile_indirection_buf_;
+ MotionBlurDataBuf data_;
+ /** Dispatch size for full-screen passes. */
+ int3 dispatch_flatten_size_ = int3(0);
+ int3 dispatch_dilate_size_ = int3(0);
+ int3 dispatch_gather_size_ = int3(0);
+
+ public:
+ MotionBlurModule(Instance &inst) : inst_(inst){};
+ ~MotionBlurModule(){};
+
+ void init();
+
+ void step();
+
+ void sync();
+
+ bool postfx_enabled() const
+ {
+ return motion_blur_fx_enabled_;
+ }
+
+ void render(View &view, GPUTexture **input_tx, GPUTexture **output_tx);
+
+ private:
+ float shutter_time_to_scene_time(float time);
+};
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
index 33853eba06c..16bdfb04d14 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
@@ -24,21 +24,35 @@ namespace blender::eevee {
void WorldPipeline::sync(GPUMaterial *gpumat)
{
- DRWState state = DRW_STATE_WRITE_COLOR;
- world_ps_ = DRW_pass_create("World", state);
-
- /* Push a matrix at the same location as the camera. */
- float4x4 camera_mat = float4x4::identity();
- // copy_v3_v3(camera_mat[3], inst_.camera.data_get().viewinv[3]);
-
- DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, world_ps_);
- DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
- DRW_shgroup_call_obmat(grp, DRW_cache_fullscreen_quad_get(), camera_mat.ptr());
+ Manager &manager = *inst_.manager;
+ RenderBuffers &rbufs = inst_.render_buffers;
+
+ ResourceHandle handle = manager.resource_handle(float4x4::identity().ptr());
+
+ world_ps_.init();
+ world_ps_.state_set(DRW_STATE_WRITE_COLOR);
+ world_ps_.material_set(manager, gpumat);
+ world_ps_.push_constant("world_opacity_fade", inst_.film.background_opacity_get());
+ world_ps_.bind_texture("utility_tx", inst_.pipelines.utility_tx);
+ /* AOVs. */
+ world_ps_.bind_image("aov_color_img", &rbufs.aov_color_tx);
+ world_ps_.bind_image("aov_value_img", &rbufs.aov_value_tx);
+ world_ps_.bind_ssbo("aov_buf", &inst_.film.aovs_info);
+ /* RenderPasses. Cleared by background (even if bad practice). */
+ world_ps_.bind_image("rp_normal_img", &rbufs.normal_tx);
+ world_ps_.bind_image("rp_light_img", &rbufs.light_tx);
+ world_ps_.bind_image("rp_diffuse_color_img", &rbufs.diffuse_color_tx);
+ world_ps_.bind_image("rp_specular_color_img", &rbufs.specular_color_tx);
+ world_ps_.bind_image("rp_emission_img", &rbufs.emission_tx);
+
+ world_ps_.draw(DRW_cache_fullscreen_quad_get(), handle);
+ /* To allow opaque pass rendering over it. */
+ world_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
-void WorldPipeline::render()
+void WorldPipeline::render(View &view)
{
- DRW_draw_pass(world_ps_);
+ inst_.manager->submit(world_ps_, view);
}
/** \} */
@@ -51,182 +65,164 @@ void WorldPipeline::render()
void ForwardPipeline::sync()
{
+ camera_forward_ = inst_.camera.forward();
+
+ DRWState state_depth_only = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
+ DRWState state_depth_color = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS |
+ DRW_STATE_WRITE_COLOR;
{
- DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
- prepass_ps_ = DRW_pass_create("Forward.Opaque.Prepass", state);
- prepass_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
- state | DRW_STATE_WRITE_COLOR);
+ prepass_ps_.init();
- state |= DRW_STATE_CULL_BACK;
- prepass_culled_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Culled", state);
- prepass_culled_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
- state | DRW_STATE_WRITE_COLOR);
+ {
+ /* Common resources. */
- DRW_pass_link(prepass_ps_, prepass_velocity_ps_);
- DRW_pass_link(prepass_velocity_ps_, prepass_culled_ps_);
- DRW_pass_link(prepass_culled_ps_, prepass_culled_velocity_ps_);
- }
- {
- DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
- opaque_ps_ = DRW_pass_create("Forward.Opaque", state);
+ /* Textures. */
+ prepass_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
- state |= DRW_STATE_CULL_BACK;
- opaque_culled_ps_ = DRW_pass_create("Forward.Opaque.Culled", state);
+ inst_.velocity.bind_resources(&prepass_ps_);
+ inst_.sampling.bind_resources(&prepass_ps_);
+ }
+
+ prepass_double_sided_static_ps_ = &prepass_ps_.sub("DoubleSided.Static");
+ prepass_double_sided_static_ps_->state_set(state_depth_only);
+
+ prepass_single_sided_static_ps_ = &prepass_ps_.sub("SingleSided.Static");
+ prepass_single_sided_static_ps_->state_set(state_depth_only | DRW_STATE_CULL_BACK);
- DRW_pass_link(opaque_ps_, opaque_culled_ps_);
+ prepass_double_sided_moving_ps_ = &prepass_ps_.sub("DoubleSided.Moving");
+ prepass_double_sided_moving_ps_->state_set(state_depth_color);
+
+ prepass_single_sided_moving_ps_ = &prepass_ps_.sub("SingleSided.Moving");
+ prepass_single_sided_moving_ps_->state_set(state_depth_color | DRW_STATE_CULL_BACK);
}
{
- DRWState state = DRW_STATE_DEPTH_LESS_EQUAL;
- transparent_ps_ = DRW_pass_create("Forward.Transparent", state);
+ opaque_ps_.init();
+
+ {
+ /* Common resources. */
+
+ /* RenderPasses. */
+ opaque_ps_.bind_image(RBUFS_NORMAL_SLOT, &inst_.render_buffers.normal_tx);
+ opaque_ps_.bind_image(RBUFS_LIGHT_SLOT, &inst_.render_buffers.light_tx);
+ opaque_ps_.bind_image(RBUFS_DIFF_COLOR_SLOT, &inst_.render_buffers.diffuse_color_tx);
+ opaque_ps_.bind_image(RBUFS_SPEC_COLOR_SLOT, &inst_.render_buffers.specular_color_tx);
+ opaque_ps_.bind_image(RBUFS_EMISSION_SLOT, &inst_.render_buffers.emission_tx);
+ /* AOVs. */
+ opaque_ps_.bind_image(RBUFS_AOV_COLOR_SLOT, &inst_.render_buffers.aov_color_tx);
+ opaque_ps_.bind_image(RBUFS_AOV_VALUE_SLOT, &inst_.render_buffers.aov_value_tx);
+ /* Storage Buf. */
+ opaque_ps_.bind_ssbo(RBUFS_AOV_BUF_SLOT, &inst_.film.aovs_info);
+ /* Textures. */
+ opaque_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+
+ inst_.lights.bind_resources(&opaque_ps_);
+ inst_.sampling.bind_resources(&opaque_ps_);
+ }
+
+ opaque_single_sided_ps_ = &opaque_ps_.sub("SingleSided");
+ opaque_single_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL |
+ DRW_STATE_CULL_BACK);
+
+ opaque_double_sided_ps_ = &opaque_ps_.sub("DoubleSided");
+ opaque_double_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL);
}
-}
+ {
+ transparent_ps_.init();
+ /* Workaround limitation of PassSortable. Use dummy pass that will be sorted first in all
+ * circumstances. */
+ PassMain::Sub &sub = transparent_ps_.sub("ResourceBind", -FLT_MAX);
-DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
-{
- DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
- // LightModule &lights = inst_.lights;
- // LightProbeModule &lightprobes = inst_.lightprobes;
- // RaytracingModule &raytracing = inst_.raytracing;
- // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
- DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
- // lights.shgroup_resources(grp);
- // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
- // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
- // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
- // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
- // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
- // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
- /* TODO(fclem): Make this only needed if material uses it ... somehow. */
- // if (true) {
- // DRW_shgroup_uniform_texture_ref(
- // grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
- // }
- // if (raytracing.enabled()) {
- // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
- // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
- // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
- // DRW_shgroup_uniform_texture_ref_ex(grp, "radiance_tx", &input_screen_radiance_tx_,
- // no_interp);
- // }
- // if (raytracing.enabled()) {
- // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
- // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
- // }
- return grp;
-}
+ /* Common resources. */
-DRWShadingGroup *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
- GPUMaterial *gpumat,
- bool has_motion)
-{
- DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
- (has_motion ? prepass_culled_velocity_ps_ : prepass_culled_ps_) :
- (has_motion ? prepass_velocity_ps_ : prepass_ps_);
- DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
- if (has_motion) {
- inst_.velocity.bind_resources(grp);
+ /* Textures. */
+ sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+
+ inst_.lights.bind_resources(&sub);
+ inst_.sampling.bind_resources(&sub);
}
- return grp;
}
-DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_mat,
- GPUMaterial *gpumat)
+PassMain::Sub *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
+ GPUMaterial *gpumat,
+ bool has_motion)
{
- // LightModule &lights = inst_.lights;
- // LightProbeModule &lightprobes = inst_.lightprobes;
- // RaytracingModule &raytracing = inst_.raytracing;
- // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
- DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
- // lights.shgroup_resources(grp);
- // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
- // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
- // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
- // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
- // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
- // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
- // DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
- /* TODO(fclem): Make this only needed if material uses it ... somehow. */
- // if (true) {
- // DRW_shgroup_uniform_texture_ref(
- // grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
- // }
- // if (raytracing.enabled()) {
- // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
- // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
- // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
- // DRW_shgroup_uniform_texture_ref_ex(
- // grp, "rt_radiance_tx", &input_screen_radiance_tx_, no_interp);
- // }
- // if (raytracing.enabled()) {
- // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
- // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
- // }
+ PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
+ (has_motion ? prepass_single_sided_moving_ps_ :
+ prepass_single_sided_static_ps_) :
+ (has_motion ? prepass_double_sided_moving_ps_ :
+ prepass_double_sided_static_ps_);
+ return &pass->sub(GPU_material_get_name(gpumat));
+}
- DRWState state_disable = DRW_STATE_WRITE_DEPTH;
- DRWState state_enable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
- if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
- state_enable |= DRW_STATE_CULL_BACK;
- }
- DRW_shgroup_state_disable(grp, state_disable);
- DRW_shgroup_state_enable(grp, state_enable);
- return grp;
+PassMain::Sub *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
+{
+ PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_single_sided_ps_ :
+ opaque_double_sided_ps_;
+ return &pass->sub(GPU_material_get_name(gpumat));
}
-DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_mat,
- GPUMaterial *gpumat)
+PassMain::Sub *ForwardPipeline::prepass_transparent_add(const Object *ob,
+ ::Material *blender_mat,
+ GPUMaterial *gpumat)
{
if ((blender_mat->blend_flag & MA_BL_HIDE_BACKFACE) == 0) {
return nullptr;
}
+ DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS_EQUAL;
+ if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
+ state |= DRW_STATE_CULL_BACK;
+ }
+ float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
+ PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
+ pass->state_set(state);
+ pass->material_set(*inst_.manager, gpumat);
+ return pass;
+}
- DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
-
- DRWState state_disable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
- DRWState state_enable = DRW_STATE_WRITE_DEPTH;
- if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
- state_enable |= DRW_STATE_CULL_BACK;
+PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
+ ::Material *blender_mat,
+ GPUMaterial *gpumat)
+{
+ DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL;
+ if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
+ state |= DRW_STATE_CULL_BACK;
}
- DRW_shgroup_state_disable(grp, state_disable);
- DRW_shgroup_state_enable(grp, state_enable);
- return grp;
+ float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
+ PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
+ pass->state_set(state);
+ pass->material_set(*inst_.manager, gpumat);
+ return pass;
}
-void ForwardPipeline::render(const DRWView *view,
+void ForwardPipeline::render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
- GPUTexture *depth_tx,
GPUTexture *UNUSED(combined_tx))
{
- UNUSED_VARS(view, depth_tx, prepass_fb, combined_fb);
- // HiZBuffer &hiz = inst_.hiz_front;
+ UNUSED_VARS(view);
- DRW_stats_group_start("ForwardOpaque");
+ DRW_stats_group_start("Forward.Opaque");
GPU_framebuffer_bind(prepass_fb);
- DRW_draw_pass(prepass_ps_);
+ inst_.manager->submit(prepass_ps_, view);
- // hiz.set_dirty();
+ // if (!DRW_pass_is_empty(prepass_ps_)) {
+ inst_.hiz_buffer.set_dirty();
+ // }
// if (inst_.raytracing.enabled()) {
// rt_buffer.radiance_copy(combined_tx);
- // hiz.update(depth_tx);
+ // inst_.hiz_buffer.update();
// }
// inst_.shadows.set_view(view, depth_tx);
GPU_framebuffer_bind(combined_fb);
- DRW_draw_pass(opaque_ps_);
+ inst_.manager->submit(opaque_ps_, view);
DRW_stats_group_end();
- DRW_stats_group_start("ForwardTransparent");
- /* TODO(fclem) This is suboptimal. We could sort during sync. */
- /* FIXME(fclem) This wont work for panoramic, where we need
- * to sort by distance to camera, not by z. */
- DRW_pass_sort_shgroup_z(transparent_ps_);
- DRW_draw_pass(transparent_ps_);
- DRW_stats_group_end();
+ inst_.manager->submit(transparent_ps_, view);
// if (inst_.raytracing.enabled()) {
// gbuffer.ray_radiance_tx.release();
diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
index 3bdc718767b..0614a963dec 100644
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
@@ -13,6 +13,7 @@
#pragma once
#include "DRW_render.h"
+#include "draw_shader_shared.h"
/* TODO(fclem): Move it to GPU/DRAW. */
#include "../eevee/eevee_lut.h"
@@ -31,13 +32,13 @@ class WorldPipeline {
private:
Instance &inst_;
- DRWPass *world_ps_ = nullptr;
+ PassSimple world_ps_ = {"World.Background"};
public:
WorldPipeline(Instance &inst) : inst_(inst){};
void sync(GPUMaterial *gpumat);
- void render();
+ void render(View &view);
};
/** \} */
@@ -52,13 +53,18 @@ class ForwardPipeline {
private:
Instance &inst_;
- DRWPass *prepass_ps_ = nullptr;
- DRWPass *prepass_velocity_ps_ = nullptr;
- DRWPass *prepass_culled_ps_ = nullptr;
- DRWPass *prepass_culled_velocity_ps_ = nullptr;
- DRWPass *opaque_ps_ = nullptr;
- DRWPass *opaque_culled_ps_ = nullptr;
- DRWPass *transparent_ps_ = nullptr;
+ PassMain prepass_ps_ = {"Prepass"};
+ PassMain::Sub *prepass_single_sided_static_ps_ = nullptr;
+ PassMain::Sub *prepass_single_sided_moving_ps_ = nullptr;
+ PassMain::Sub *prepass_double_sided_static_ps_ = nullptr;
+ PassMain::Sub *prepass_double_sided_moving_ps_ = nullptr;
+
+ PassMain opaque_ps_ = {"Shading"};
+ PassMain::Sub *opaque_single_sided_ps_ = nullptr;
+ PassMain::Sub *opaque_double_sided_ps_ = nullptr;
+
+ PassSortable transparent_ps_ = {"Forward.Transparent"};
+ float3 camera_forward_;
// GPUTexture *input_screen_radiance_tx_ = nullptr;
@@ -67,31 +73,19 @@ class ForwardPipeline {
void sync();
- DRWShadingGroup *material_add(::Material *blender_mat, GPUMaterial *gpumat)
- {
- return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
- material_transparent_add(blender_mat, gpumat) :
- material_opaque_add(blender_mat, gpumat);
- }
+ PassMain::Sub *prepass_opaque_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion);
+ PassMain::Sub *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
- DRWShadingGroup *prepass_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion)
- {
- return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
- prepass_transparent_add(blender_mat, gpumat) :
- prepass_opaque_add(blender_mat, gpumat, has_motion);
- }
-
- DRWShadingGroup *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
- DRWShadingGroup *prepass_opaque_add(::Material *blender_mat,
- GPUMaterial *gpumat,
- bool has_motion);
- DRWShadingGroup *material_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
- DRWShadingGroup *prepass_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
+ PassMain::Sub *prepass_transparent_add(const Object *ob,
+ ::Material *blender_mat,
+ GPUMaterial *gpumat);
+ PassMain::Sub *material_transparent_add(const Object *ob,
+ ::Material *blender_mat,
+ GPUMaterial *gpumat);
- void render(const DRWView *view,
+ void render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
- GPUTexture *depth_tx,
GPUTexture *combined_tx);
};
@@ -193,26 +187,36 @@ class PipelineModule {
// velocity.sync();
}
- DRWShadingGroup *material_add(::Material *blender_mat,
- GPUMaterial *gpumat,
- eMaterialPipeline pipeline_type)
+ PassMain::Sub *material_add(Object *ob,
+ ::Material *blender_mat,
+ GPUMaterial *gpumat,
+ eMaterialPipeline pipeline_type)
{
switch (pipeline_type) {
case MAT_PIPE_DEFERRED_PREPASS:
// return deferred.prepass_add(blender_mat, gpumat, false);
- break;
+ case MAT_PIPE_FORWARD_PREPASS:
+ if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+ return forward.prepass_transparent_add(ob, blender_mat, gpumat);
+ }
+ return forward.prepass_opaque_add(blender_mat, gpumat, false);
+
case MAT_PIPE_DEFERRED_PREPASS_VELOCITY:
// return deferred.prepass_add(blender_mat, gpumat, true);
- break;
- case MAT_PIPE_FORWARD_PREPASS:
- return forward.prepass_add(blender_mat, gpumat, false);
case MAT_PIPE_FORWARD_PREPASS_VELOCITY:
- return forward.prepass_add(blender_mat, gpumat, true);
+ if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+ return forward.prepass_transparent_add(ob, blender_mat, gpumat);
+ }
+ return forward.prepass_opaque_add(blender_mat, gpumat, true);
+
case MAT_PIPE_DEFERRED:
// return deferred.material_add(blender_mat, gpumat);
- break;
case MAT_PIPE_FORWARD:
- return forward.material_add(blender_mat, gpumat);
+ if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
+ return forward.material_transparent_add(ob, blender_mat, gpumat);
+ }
+ return forward.material_opaque_add(blender_mat, gpumat);
+
case MAT_PIPE_VOLUME:
/* TODO(fclem) volume pass. */
return nullptr;
diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
new file mode 100644
index 00000000000..c18c913d797
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * A film is a fullscreen buffer (usually at output extent)
+ * that will be able to accumulate sample in any distorted camera_type
+ * using a pixel filter.
+ *
+ * Input needs to be jittered so that the filter converges to the right result.
+ */
+
+#include "BLI_rect.h"
+
+#include "GPU_framebuffer.h"
+#include "GPU_texture.h"
+
+#include "DRW_render.h"
+
+#include "eevee_film.hh"
+#include "eevee_instance.hh"
+
+namespace blender::eevee {
+
+void RenderBuffers::acquire(int2 extent)
+{
+ const eViewLayerEEVEEPassType enabled_passes = inst_.film.enabled_passes_get();
+
+ auto pass_extent = [&](eViewLayerEEVEEPassType pass_bit) -> int2 {
+ /* Use dummy texture for disabled passes. Allows correct bindings. */
+ return (enabled_passes & pass_bit) ? extent : int2(1);
+ };
+
+ eGPUTextureFormat color_format = GPU_RGBA16F;
+ eGPUTextureFormat float_format = GPU_R16F;
+
+ /* Depth and combined are always needed. */
+ depth_tx.acquire(extent, GPU_DEPTH24_STENCIL8);
+ combined_tx.acquire(extent, color_format);
+
+ bool do_vector_render_pass = (enabled_passes & EEVEE_RENDER_PASS_VECTOR) ||
+ (inst_.motion_blur.postfx_enabled() && !inst_.is_viewport());
+ uint32_t max_light_color_layer = max_ii(enabled_passes & EEVEE_RENDER_PASS_DIFFUSE_LIGHT ?
+ (int)RENDER_PASS_LAYER_DIFFUSE_LIGHT :
+ -1,
+ enabled_passes & EEVEE_RENDER_PASS_SPECULAR_LIGHT ?
+ (int)RENDER_PASS_LAYER_SPECULAR_LIGHT :
+ -1) +
+ 1;
+ /* Only RG16F when only doing only reprojection or motion blur. */
+ eGPUTextureFormat vector_format = do_vector_render_pass ? GPU_RGBA16F : GPU_RG16F;
+ /* TODO(fclem): Make vector pass allocation optional if no TAA or motion blur is needed. */
+ vector_tx.acquire(extent, vector_format);
+
+ normal_tx.acquire(pass_extent(EEVEE_RENDER_PASS_NORMAL), color_format);
+ diffuse_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_DIFFUSE_COLOR), color_format);
+ specular_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SPECULAR_COLOR), color_format);
+ volume_light_tx.acquire(pass_extent(EEVEE_RENDER_PASS_VOLUME_LIGHT), color_format);
+ emission_tx.acquire(pass_extent(EEVEE_RENDER_PASS_EMIT), color_format);
+ environment_tx.acquire(pass_extent(EEVEE_RENDER_PASS_ENVIRONMENT), color_format);
+ shadow_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SHADOW), float_format);
+ ambient_occlusion_tx.acquire(pass_extent(EEVEE_RENDER_PASS_AO), float_format);
+
+ light_tx.ensure_2d_array(color_format,
+ max_light_color_layer > 0 ? extent : int2(1),
+ max_ii(1, max_light_color_layer));
+
+ const AOVsInfoData &aovs = inst_.film.aovs_info;
+ aov_color_tx.ensure_2d_array(
+ color_format, (aovs.color_len > 0) ? extent : int2(1), max_ii(1, aovs.color_len));
+ aov_value_tx.ensure_2d_array(
+ float_format, (aovs.value_len > 0) ? extent : int2(1), max_ii(1, aovs.value_len));
+}
+
+void RenderBuffers::release()
+{
+ depth_tx.release();
+ combined_tx.release();
+
+ normal_tx.release();
+ vector_tx.release();
+ diffuse_color_tx.release();
+ specular_color_tx.release();
+ volume_light_tx.release();
+ emission_tx.release();
+ environment_tx.release();
+ shadow_tx.release();
+ ambient_occlusion_tx.release();
+}
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh
new file mode 100644
index 00000000000..0b761d618cc
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Render buffers are textures that are filled during a view rendering.
+ * Their content is then added to the accumulation buffers of the film class.
+ * They are short lived and can be reused when doing multi view rendering.
+ */
+
+#pragma once
+
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+class RenderBuffers {
+ public:
+ TextureFromPool depth_tx;
+ TextureFromPool combined_tx;
+
+ // TextureFromPool mist_tx; /* Derived from depth_tx during accumulation. */
+ TextureFromPool normal_tx;
+ TextureFromPool vector_tx;
+ TextureFromPool diffuse_color_tx;
+ TextureFromPool specular_color_tx;
+ TextureFromPool volume_light_tx;
+ TextureFromPool emission_tx;
+ TextureFromPool environment_tx;
+ TextureFromPool shadow_tx;
+ TextureFromPool ambient_occlusion_tx;
+ // TextureFromPool cryptomatte_tx; /* TODO */
+ /* TODO(fclem): Use texture from pool once they support texture array. */
+ Texture light_tx;
+ Texture aov_color_tx;
+ Texture aov_value_tx;
+
+ private:
+ Instance &inst_;
+
+ public:
+ RenderBuffers(Instance &inst) : inst_(inst){};
+
+ /* Acquires (also ensures) the render buffer before rendering to them. */
+ void acquire(int2 extent);
+ void release();
+};
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.cc b/source/blender/draw/engines/eevee_next/eevee_sampling.cc
new file mode 100644
index 00000000000..76a0e98638b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.cc
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Random number generator, contains persistent state and sample count logic.
+ */
+
+#include "BLI_rand.h"
+
+#include "eevee_instance.hh"
+#include "eevee_sampling.hh"
+
+namespace blender::eevee {
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling
+ * \{ */
+
+void Sampling::init(const Scene *scene)
+{
+ sample_count_ = inst_.is_viewport() ? scene->eevee.taa_samples : scene->eevee.taa_render_samples;
+
+ if (sample_count_ == 0) {
+ BLI_assert(inst_.is_viewport());
+ sample_count_ = infinite_sample_count_;
+ }
+
+ motion_blur_steps_ = !inst_.is_viewport() ? scene->eevee.motion_blur_steps : 1;
+ sample_count_ = divide_ceil_u(sample_count_, motion_blur_steps_);
+
+ if (scene->eevee.flag & SCE_EEVEE_DOF_JITTER) {
+ if (sample_count_ == infinite_sample_count_) {
+ /* Special case for viewport continuous rendering. We clamp to a max sample
+ * to avoid the jittered dof never converging. */
+ dof_ring_count_ = 6;
+ }
+ else {
+ dof_ring_count_ = sampling_web_ring_count_get(dof_web_density_, sample_count_);
+ }
+ dof_sample_count_ = sampling_web_sample_count_get(dof_web_density_, dof_ring_count_);
+ /* Change total sample count to fill the web pattern entirely. */
+ sample_count_ = divide_ceil_u(sample_count_, dof_sample_count_) * dof_sample_count_;
+ }
+ else {
+ dof_ring_count_ = 0;
+ dof_sample_count_ = 1;
+ }
+
+ /* Only multiply after to have full the full DoF web pattern for each time steps. */
+ sample_count_ *= motion_blur_steps_;
+}
+
+void Sampling::end_sync()
+{
+ if (reset_) {
+ viewport_sample_ = 0;
+ }
+
+ if (inst_.is_viewport()) {
+
+ interactive_mode_ = viewport_sample_ < interactive_mode_threshold;
+
+ bool interactive_mode_disabled = (inst_.scene->eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0;
+ if (interactive_mode_disabled) {
+ interactive_mode_ = false;
+ sample_ = viewport_sample_;
+ }
+ else if (interactive_mode_) {
+ int interactive_sample_count = min_ii(interactive_sample_max_, sample_count_);
+
+ if (viewport_sample_ < interactive_sample_count) {
+ /* Loop over the same starting samples. */
+ sample_ = sample_ % interactive_sample_count;
+ }
+ else {
+ /* Break out of the loop and resume normal pattern. */
+ sample_ = interactive_sample_count;
+ }
+ }
+ }
+}
+
+void Sampling::step()
+{
+ {
+ /* TODO(fclem) we could use some persistent states to speedup the computation. */
+ double2 r, offset = {0, 0};
+ /* Using 2,3 primes as per UE4 Temporal AA presentation.
+ * http://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx (slide 14) */
+ uint2 primes = {2, 3};
+ BLI_halton_2d(primes, offset, sample_ + 1, r);
+ /* WORKAROUND: We offset the distribution to make the first sample (0,0). This way, we are
+ * assured that at least one of the samples inside the TAA rotation will match the one from the
+ * draw manager. This makes sure overlays are correctly composited in static scene. */
+ data_.dimensions[SAMPLING_FILTER_U] = fractf(r[0] + (1.0 / 2.0));
+ data_.dimensions[SAMPLING_FILTER_V] = fractf(r[1] + (2.0 / 3.0));
+ /* TODO de-correlate. */
+ data_.dimensions[SAMPLING_TIME] = r[0];
+ data_.dimensions[SAMPLING_CLOSURE] = r[1];
+ data_.dimensions[SAMPLING_RAYTRACE_X] = r[0];
+ }
+ {
+ double2 r, offset = {0, 0};
+ uint2 primes = {5, 7};
+ BLI_halton_2d(primes, offset, sample_ + 1, r);
+ data_.dimensions[SAMPLING_LENS_U] = r[0];
+ data_.dimensions[SAMPLING_LENS_V] = r[1];
+ /* TODO de-correlate. */
+ data_.dimensions[SAMPLING_LIGHTPROBE] = r[0];
+ data_.dimensions[SAMPLING_TRANSPARENCY] = r[1];
+ }
+ {
+ /* Using leaped Halton sequence so we can reused the same primes as lens. */
+ double3 r, offset = {0, 0, 0};
+ uint64_t leap = 11;
+ uint3 primes = {5, 4, 7};
+ BLI_halton_3d(primes, offset, sample_ * leap, r);
+ data_.dimensions[SAMPLING_SHADOW_U] = r[0];
+ data_.dimensions[SAMPLING_SHADOW_V] = r[1];
+ data_.dimensions[SAMPLING_SHADOW_W] = r[2];
+ /* TODO de-correlate. */
+ data_.dimensions[SAMPLING_RAYTRACE_U] = r[0];
+ data_.dimensions[SAMPLING_RAYTRACE_V] = r[1];
+ data_.dimensions[SAMPLING_RAYTRACE_W] = r[2];
+ }
+ {
+ /* Using leaped Halton sequence so we can reused the same primes. */
+ double2 r, offset = {0, 0};
+ uint64_t leap = 5;
+ uint2 primes = {2, 3};
+ BLI_halton_2d(primes, offset, sample_ * leap, r);
+ data_.dimensions[SAMPLING_SHADOW_X] = r[0];
+ data_.dimensions[SAMPLING_SHADOW_Y] = r[1];
+ /* TODO de-correlate. */
+ data_.dimensions[SAMPLING_SSS_U] = r[0];
+ data_.dimensions[SAMPLING_SSS_V] = r[1];
+ }
+
+ data_.push_update();
+
+ viewport_sample_++;
+ sample_++;
+
+ reset_ = false;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling patterns
+ * \{ */
+
+float3 Sampling::sample_ball(const float3 &rand)
+{
+ float3 sample;
+ sample.z = rand.x * 2.0f - 1.0f; /* cos theta */
+
+ float r = sqrtf(fmaxf(0.0f, 1.0f - square_f(sample.z))); /* sin theta */
+
+ float omega = rand.y * 2.0f * M_PI;
+ sample.x = r * cosf(omega);
+ sample.y = r * sinf(omega);
+
+ sample *= sqrtf(sqrtf(rand.z));
+ return sample;
+}
+
+float2 Sampling::sample_disk(const float2 &rand)
+{
+ float omega = rand.y * 2.0f * M_PI;
+ return sqrtf(rand.x) * float2(cosf(omega), sinf(omega));
+}
+
+float2 Sampling::sample_spiral(const float2 &rand)
+{
+ /* Fibonacci spiral. */
+ float omega = 4.0f * M_PI * (1.0f + sqrtf(5.0f)) * rand.x;
+ float r = sqrtf(rand.x);
+ /* Random rotation. */
+ omega += rand.y * 2.0f * M_PI;
+ return r * float2(cosf(omega), sinf(omega));
+}
+
+void Sampling::dof_disk_sample_get(float *r_radius, float *r_theta) const
+{
+ if (dof_ring_count_ == 0) {
+ *r_radius = *r_theta = 0.0f;
+ return;
+ }
+
+ int s = sample_ - 1;
+ int ring = 0;
+ int ring_sample_count = 1;
+ int ring_sample = 1;
+
+ s = s * (dof_web_density_ - 1);
+ s = s % dof_sample_count_;
+
+ /* Choosing sample to we get faster convergence.
+ * The issue here is that we cannot map a low discrepancy sequence to this sampling pattern
+ * because the same sample could be chosen twice in relatively short intervals. */
+ /* For now just use an ascending sequence with an offset. This gives us relatively quick
+ * initial coverage and relatively high distance between samples. */
+ /* TODO(@fclem) We can try to order samples based on a LDS into a table to avoid duplicates.
+ * The drawback would be some memory consumption and initialize time. */
+ int samples_passed = 1;
+ while (s >= samples_passed) {
+ ring++;
+ ring_sample_count = ring * dof_web_density_;
+ ring_sample = s - samples_passed;
+ ring_sample = (ring_sample + 1) % ring_sample_count;
+ samples_passed += ring_sample_count;
+ }
+
+ *r_radius = ring / (float)dof_ring_count_;
+ *r_theta = 2.0f * M_PI * ring_sample / (float)ring_sample_count;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Cumulative Distribution Function (CDF)
+ * \{ */
+
+/* Creates a discrete cumulative distribution function table from a given curvemapping.
+ * Output cdf vector is expected to already be sized according to the wanted resolution. */
+void Sampling::cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf)
+{
+ BLI_assert(cdf.size() > 1);
+ cdf[0] = 0.0f;
+ /* Actual CDF evaluation. */
+ for (int u : IndexRange(cdf.size() - 1)) {
+ float x = (float)(u + 1) / (float)(cdf.size() - 1);
+ cdf[u + 1] = cdf[u] + BKE_curvemapping_evaluateF(&curve, 0, x);
+ }
+ /* Normalize the CDF. */
+ for (int u : cdf.index_range()) {
+ cdf[u] /= cdf.last();
+ }
+ /* Just to make sure. */
+ cdf.last() = 1.0f;
+}
+
+/* Inverts a cumulative distribution function.
+ * Output vector is expected to already be sized according to the wanted resolution. */
+void Sampling::cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf)
+{
+ for (int u : inverted_cdf.index_range()) {
+ float x = (float)u / (float)(inverted_cdf.size() - 1);
+ for (int i : cdf.index_range()) {
+ if (i == cdf.size() - 1) {
+ inverted_cdf[u] = 1.0f;
+ }
+ else if (cdf[i] >= x) {
+ float t = (x - cdf[i]) / (cdf[i + 1] - cdf[i]);
+ inverted_cdf[u] = ((float)i + t) / (float)(cdf.size() - 1);
+ break;
+ }
+ }
+ }
+}
+
+/** \} */
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.hh b/source/blender/draw/engines/eevee_next/eevee_sampling.hh
new file mode 100644
index 00000000000..c2bf23d20fc
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.hh
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup eevee
+ *
+ * Random number generator, contains persistent state and sample count logic.
+ */
+
+#pragma once
+
+#include "BKE_colortools.h"
+#include "BLI_system.h"
+#include "BLI_vector.hh"
+#include "DNA_scene_types.h"
+#include "DRW_render.h"
+
+#include "eevee_shader_shared.hh"
+
+namespace blender::eevee {
+
+class Instance;
+
+class Sampling {
+ private:
+ Instance &inst_;
+
+ /* Number of samples in the first ring of jittered depth of field. */
+ static constexpr uint64_t dof_web_density_ = 6;
+ /* High number of sample for viewport infinite rendering. */
+ static constexpr uint64_t infinite_sample_count_ = 0xFFFFFFu;
+ /* During interactive rendering, loop over the first few samples. */
+ static constexpr uint64_t interactive_sample_max_ = 8;
+
+ /** 0 based current sample. Might not increase sequentially in viewport. */
+ uint64_t sample_ = 0;
+ /** Target sample count. */
+ uint64_t sample_count_ = 64;
+ /** Number of ring in the web pattern of the jittered Depth of Field. */
+ uint64_t dof_ring_count_ = 0;
+ /** Number of samples in the web pattern of the jittered Depth of Field. */
+ uint64_t dof_sample_count_ = 1;
+ /** Motion blur steps. */
+ uint64_t motion_blur_steps_ = 1;
+ /** Increases if the view and the scene is static. Does increase sequentially. */
+ int64_t viewport_sample_ = 0;
+ /** Tag to reset sampling for the next sample. */
+ bool reset_ = false;
+ /**
+ * Switch between interactive and static accumulation.
+ * In interactive mode, image stability is prioritized over quality.
+ */
+ bool interactive_mode_ = false;
+ /**
+ * Sample count after which we use the static accumulation.
+ * Interactive sampling from sample 0 to (interactive_mode_threshold - 1).
+ * Accumulation sampling from sample interactive_mode_threshold to sample_count_.
+ */
+ static constexpr int interactive_mode_threshold = 3;
+
+ SamplingDataBuf data_;
+
+ public:
+ Sampling(Instance &inst) : inst_(inst){};
+ ~Sampling(){};
+
+ void init(const Scene *scene);
+ void end_sync();
+ void step();
+
+ /* Viewport Only: Function to call to notify something in the scene changed.
+ * This will reset accumulation. Do not call after end_sync() or during sample rendering. */
+ void reset()
+ {
+ reset_ = true;
+ }
+
+ /* Viewport Only: true if an update happened in the scene and accumulation needs reset. */
+ bool is_reset() const
+ {
+ return reset_;
+ }
+
+ void bind_resources(DRWShadingGroup *grp)
+ {
+ DRW_shgroup_storage_block_ref(grp, "sampling_buf", &data_);
+ }
+
+ template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
+ {
+ /* Storage Buf. */
+ pass->bind_ssbo(SAMPLING_BUF_SLOT, &data_);
+ }
+
+ /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+ float rng_get(eSamplingDimension dimension) const
+ {
+ return data_.dimensions[dimension];
+ }
+
+ /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+ float2 rng_2d_get(eSamplingDimension starting_dimension) const
+ {
+ return *reinterpret_cast<const float2 *>(&data_.dimensions[starting_dimension]);
+ }
+
+ /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
+ float3 rng_3d_get(eSamplingDimension starting_dimension) const
+ {
+ return *reinterpret_cast<const float3 *>(&data_.dimensions[starting_dimension]);
+ }
+
+ /* Returns true if rendering has finished. */
+ bool finished() const
+ {
+ return (sample_ >= sample_count_);
+ }
+
+ /* Returns true if viewport smoothing and sampling has finished. */
+ bool finished_viewport() const
+ {
+ return (viewport_sample_ >= sample_count_) && !interactive_mode_;
+ }
+
+ /* Returns true if viewport renderer is in interactive mode and should use TAA. */
+ bool interactive_mode() const
+ {
+ return interactive_mode_;
+ }
+
+ uint64_t sample_count() const
+ {
+ return sample_count_;
+ }
+
+ /* Return true if we are starting a new motion blur step. We need to run sync again since
+ * depsgraph was updated by MotionBlur::step(). */
+ bool do_render_sync() const
+ {
+ return ((sample_ % (sample_count_ / motion_blur_steps_)) == 0);
+ }
+
+ /**
+ * Special ball distribution:
+ * Point are distributed in a way that when they are orthogonally
+ * projected into any plane, the resulting distribution is (close to)
+ * a uniform disc distribution.
+ * \a rand is 3 random float in the [0..1] range.
+ * Returns point in a ball of radius 1 and centered on the origin.
+ */
+ static float3 sample_ball(const float3 &rand);
+
+ /**
+ * Uniform disc distribution.
+ * \a rand is 2 random float in the [0..1] range.
+ * Returns point in a disk of radius 1 and centered on the origin.
+ */
+ static float2 sample_disk(const float2 &rand);
+
+ /**
+ * Uniform disc distribution using Fibonacci spiral sampling.
+ * \a rand is 2 random float in the [0..1] range.
+ * Returns point in a disk of radius 1 and centered on the origin.
+ */
+ static float2 sample_spiral(const float2 &rand);
+
+ /**
+ * Special RNG for depth of field.
+ * Returns \a radius and \a theta angle offset to apply to the web sampling pattern.
+ */
+ void dof_disk_sample_get(float *r_radius, float *r_theta) const;
+
+ /**
+ * Returns sample count inside the jittered depth of field web pattern.
+ */
+ uint64_t dof_ring_count_get() const
+ {
+ return dof_ring_count_;
+ }
+
+ /**
+ * Returns sample count inside the jittered depth of field web pattern.
+ */
+ uint64_t dof_sample_count_get() const
+ {
+ return dof_sample_count_;
+ }
+
+ /* Cumulative Distribution Function Utils. */
+ static void cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf);
+ static void cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf);
+};
+
+} // namespace blender::eevee
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index 09aa97e49e9..7ff343d14a8 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -9,6 +9,8 @@
* and static shader usage.
*/
+#include "GPU_capabilities.h"
+
#include "gpu_shader_create_info.hh"
#include "eevee_shader.hh"
@@ -78,8 +80,66 @@ ShaderModule::~ShaderModule()
const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)
{
switch (shader_type) {
- case VELOCITY_RESOLVE:
- return "eevee_velocity_resolve";
+ case FILM_FRAG:
+ return "eevee_film_frag";
+ case FILM_COMP:
+ return "eevee_film_comp";
+ case HIZ_DEBUG:
+ return "eevee_hiz_debug";
+ case HIZ_UPDATE:
+ return "eevee_hiz_update";
+ case MOTION_BLUR_GATHER:
+ return "eevee_motion_blur_gather";
+ case MOTION_BLUR_TILE_DILATE:
+ return "eevee_motion_blur_tiles_dilate";
+ case MOTION_BLUR_TILE_FLATTEN_RENDER:
+ return "eevee_motion_blur_tiles_flatten_render";
+ case MOTION_BLUR_TILE_FLATTEN_VIEWPORT:
+ return "eevee_motion_blur_tiles_flatten_viewport";
+ case DOF_BOKEH_LUT:
+ return "eevee_depth_of_field_bokeh_lut";
+ case DOF_DOWNSAMPLE:
+ return "eevee_depth_of_field_downsample";
+ case DOF_FILTER:
+ return "eevee_depth_of_field_filter";
+ case DOF_GATHER_FOREGROUND_LUT:
+ return "eevee_depth_of_field_gather_foreground_lut";
+ case DOF_GATHER_FOREGROUND:
+ return "eevee_depth_of_field_gather_foreground_no_lut";
+ case DOF_GATHER_BACKGROUND_LUT:
+ return "eevee_depth_of_field_gather_background_lut";
+ case DOF_GATHER_BACKGROUND:
+ return "eevee_depth_of_field_gather_background_no_lut";
+ case DOF_GATHER_HOLE_FILL:
+ return "eevee_depth_of_field_hole_fill";
+ case DOF_REDUCE:
+ return "eevee_depth_of_field_reduce";
+ case DOF_RESOLVE:
+ return "eevee_depth_of_field_resolve_no_lut";
+ case DOF_RESOLVE_LUT:
+ return "eevee_depth_of_field_resolve_lut";
+ case DOF_SETUP:
+ return "eevee_depth_of_field_setup";
+ case DOF_SCATTER:
+ return "eevee_depth_of_field_scatter";
+ case DOF_STABILIZE:
+ return "eevee_depth_of_field_stabilize";
+ case DOF_TILES_DILATE_MINABS:
+ return "eevee_depth_of_field_tiles_dilate_minabs";
+ case DOF_TILES_DILATE_MINMAX:
+ return "eevee_depth_of_field_tiles_dilate_minmax";
+ case DOF_TILES_FLATTEN:
+ return "eevee_depth_of_field_tiles_flatten";
+ case LIGHT_CULLING_DEBUG:
+ return "eevee_light_culling_debug";
+ case LIGHT_CULLING_SELECT:
+ return "eevee_light_culling_select";
+ case LIGHT_CULLING_SORT:
+ return "eevee_light_culling_sort";
+ case LIGHT_CULLING_TILE:
+ return "eevee_light_culling_tile";
+ case LIGHT_CULLING_ZBIN:
+ return "eevee_light_culling_zbin";
/* To avoid compiler warning about missing case. */
case MAX_SHADER_TYPE:
return "";
@@ -122,11 +182,41 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
GPUCodegenOutput &codegen = *codegen_;
ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info);
- info.auto_resource_location(true);
+ /* WORKAROUND: Replace by new ob info. */
+ int64_t ob_info_index = info.additional_infos_.first_index_of_try("draw_object_infos");
+ if (ob_info_index != -1) {
+ info.additional_infos_[ob_info_index] = "draw_object_infos_new";
+ }
+
+ /* WORKAROUND: Add new ob attr buffer. */
+ if (GPU_material_uniform_attributes(gpumat) != nullptr) {
+ info.additional_info("draw_object_attribute_new");
+ }
+
+ /* WORKAROUND: Avoid utility texture merge error. TODO: find a cleaner fix. */
+ for (auto &resource : info.batch_resources_) {
+ if (resource.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
+ if (resource.slot == RBUFS_UTILITY_TEX_SLOT) {
+ resource.slot = GPU_max_textures_frag() - 1;
+ }
+ }
+ }
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
info.define("MAT_TRANSPARENT");
+ /* Transparent material do not have any velocity specific pipeline. */
+ if (pipeline_type == MAT_PIPE_FORWARD_PREPASS_VELOCITY) {
+ pipeline_type = MAT_PIPE_FORWARD_PREPASS;
+ }
}
+
+ if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT) == false &&
+ pipeline_type == MAT_PIPE_FORWARD) {
+ /* Opaque forward do support AOVs and render pass. */
+ info.additional_info("eevee_aov_out");
+ info.additional_info("eevee_render_pass_out");
+ }
+
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_BARYCENTRIC)) {
switch (geometry_type) {
case MAT_GEOM_MESH:
@@ -161,7 +251,6 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
}
}
info.vertex_inputs_.clear();
- info.additional_info("draw_curves_infos");
break;
case MAT_GEOM_WORLD:
/**
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh
index 0f42e880a10..9ef42c84373 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@@ -26,7 +26,40 @@ namespace blender::eevee {
/* Keep alphabetical order and clean prefix. */
enum eShaderType {
- VELOCITY_RESOLVE = 0,
+ FILM_FRAG = 0,
+ FILM_COMP,
+
+ DOF_BOKEH_LUT,
+ DOF_DOWNSAMPLE,
+ DOF_FILTER,
+ DOF_GATHER_BACKGROUND_LUT,
+ DOF_GATHER_BACKGROUND,
+ DOF_GATHER_FOREGROUND_LUT,
+ DOF_GATHER_FOREGROUND,
+ DOF_GATHER_HOLE_FILL,
+ DOF_REDUCE,
+ DOF_RESOLVE_LUT,
+ DOF_RESOLVE,
+ DOF_SCATTER,
+ DOF_SETUP,
+ DOF_STABILIZE,
+ DOF_TILES_DILATE_MINABS,
+ DOF_TILES_DILATE_MINMAX,
+ DOF_TILES_FLATTEN,
+
+ HIZ_UPDATE,
+ HIZ_DEBUG,
+
+ LIGHT_CULLING_DEBUG,
+ LIGHT_CULLING_SELECT,
+ LIGHT_CULLING_SORT,
+ LIGHT_CULLING_TILE,
+ LIGHT_CULLING_ZBIN,
+
+ MOTION_BLUR_GATHER,
+ MOTION_BLUR_TILE_DILATE,
+ MOTION_BLUR_TILE_FLATTEN_RENDER,
+ MOTION_BLUR_TILE_FLATTEN_VIEWPORT,
MAX_SHADER_TYPE,
};
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
index eb409f076f3..bcdb42c0093 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -12,22 +12,132 @@
# include "BLI_memory_utils.hh"
# include "DRW_gpu_wrapper.hh"
-// # include "eevee_defines.hh"
+# include "draw_manager.hh"
+# include "draw_pass.hh"
+
+# include "eevee_defines.hh"
# include "GPU_shader_shared.h"
namespace blender::eevee {
-using draw::Framebuffer;
-using draw::SwapChain;
-using draw::Texture;
-using draw::TextureFromPool;
+using namespace draw;
+
+constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
+constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
#endif
#define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14
/* -------------------------------------------------------------------- */
+/** \name Debug Mode
+ * \{ */
+
+/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */
+enum eDebugMode : uint32_t {
+ DEBUG_NONE = 0u,
+ /**
+ * Gradient showing light evaluation hot-spots.
+ */
+ DEBUG_LIGHT_CULLING = 1u,
+ /**
+ * Show incorrectly downsample tiles in red.
+ */
+ DEBUG_HIZ_VALIDATION = 2u,
+ /**
+ * Tile-maps to screen. Is also present in other modes.
+ * - Black pixels, no pages allocated.
+ * - Green pixels, pages cached.
+ * - Red pixels, pages allocated.
+ */
+ DEBUG_SHADOW_TILEMAPS = 10u,
+ /**
+ * Random color per pages. Validates page density allocation and sampling.
+ */
+ DEBUG_SHADOW_PAGES = 11u,
+ /**
+ * Outputs random color per tile-map (or tile-map level). Validates tile-maps coverage.
+ * Black means not covered by any tile-maps LOD of the shadow.
+ */
+ DEBUG_SHADOW_LOD = 12u,
+ /**
+ * Outputs white pixels for pages allocated and black pixels for unused pages.
+ * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work.
+ */
+ DEBUG_SHADOW_PAGE_ALLOCATION = 13u,
+ /**
+ * Outputs the tile-map atlas. Default tile-map is too big for the usual screen resolution.
+ * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option.
+ */
+ DEBUG_SHADOW_TILE_ALLOCATION = 14u,
+ /**
+ * Visualize linear depth stored in the atlas regions of the active light.
+ * This way, one can check if the rendering, the copying and the shadow sampling functions works.
+ */
+ DEBUG_SHADOW_SHADOW_DEPTH = 15u
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling
+ * \{ */
+
+enum eSamplingDimension : uint32_t {
+ SAMPLING_FILTER_U = 0u,
+ SAMPLING_FILTER_V = 1u,
+ SAMPLING_LENS_U = 2u,
+ SAMPLING_LENS_V = 3u,
+ SAMPLING_TIME = 4u,
+ SAMPLING_SHADOW_U = 5u,
+ SAMPLING_SHADOW_V = 6u,
+ SAMPLING_SHADOW_W = 7u,
+ SAMPLING_SHADOW_X = 8u,
+ SAMPLING_SHADOW_Y = 9u,
+ SAMPLING_CLOSURE = 10u,
+ SAMPLING_LIGHTPROBE = 11u,
+ SAMPLING_TRANSPARENCY = 12u,
+ SAMPLING_SSS_U = 13u,
+ SAMPLING_SSS_V = 14u,
+ SAMPLING_RAYTRACE_U = 15u,
+ SAMPLING_RAYTRACE_V = 16u,
+ SAMPLING_RAYTRACE_W = 17u,
+ SAMPLING_RAYTRACE_X = 18u
+};
+
+/**
+ * IMPORTANT: Make sure the array can contain all sampling dimensions.
+ * Also note that it needs to be multiple of 4.
+ */
+#define SAMPLING_DIMENSION_COUNT 20
+
+/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */
+struct SamplingData {
+ /** Array containing random values from Low Discrepancy Sequence in [0..1) range. */
+ float dimensions[SAMPLING_DIMENSION_COUNT];
+};
+BLI_STATIC_ASSERT_ALIGN(SamplingData, 16)
+
+/* Returns total sample count in a web pattern of the given size. */
+static inline int sampling_web_sample_count_get(int web_density, int ring_count)
+{
+ return ((ring_count * ring_count + ring_count) / 2) * web_density + 1;
+}
+
+/* Returns lowest possible ring count that contains at least sample_count samples. */
+static inline int sampling_web_ring_count_get(int web_density, int sample_count)
+{
+ /* Inversion of web_sample_count_get(). */
+ float x = 2.0f * (float(sample_count) - 1.0f) / float(web_density);
+ /* Solving polynomial. We only search positive solution. */
+ float discriminant = 1.0f + 4.0f * x;
+ return int(ceilf(0.5f * (sqrtf(discriminant) - 1.0f)));
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Camera
* \{ */
@@ -65,15 +175,164 @@ struct CameraData {
/** Clipping distances. */
float clip_near;
float clip_far;
- /** Film pixel filter radius. */
- float filter_size;
eCameraType type;
+
+ bool1 initialized;
+
+#ifdef __cplusplus
+ /* Small constructor to allow detecting new buffers. */
+ CameraData() : initialized(false){};
+#endif
};
BLI_STATIC_ASSERT_ALIGN(CameraData, 16)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Film
+ * \{ */
+
+#define FILM_PRECOMP_SAMPLE_MAX 16
+
+enum eFilmWeightLayerIndex : uint32_t {
+ FILM_WEIGHT_LAYER_ACCUMULATION = 0u,
+ FILM_WEIGHT_LAYER_DISTANCE = 1u,
+};
+
+struct FilmSample {
+ int2 texel;
+ float weight;
+ /** Used for accumulation. */
+ float weight_sum_inv;
+};
+BLI_STATIC_ASSERT_ALIGN(FilmSample, 16)
+
+struct FilmData {
+ /** Size of the film in pixels. */
+ int2 extent;
+ /** Offset of the film in the full-res frame, in pixels. */
+ int2 offset;
+ /** Extent used by the render buffers when rendering the main views. */
+ int2 render_extent;
+ /** Sub-pixel offset applied to the window matrix.
+ * NOTE: In final film pixel unit.
+ * NOTE: Positive values makes the view translate in the negative axes direction.
+ * NOTE: The origin is the center of the lower left film pixel of the area covered by a render
+ * pixel if using scaled resolution rendering.
+ */
+ float2 subpixel_offset;
+ /** Scaling factor to convert texel to uvs. */
+ float2 extent_inv;
+ /** Is true if history is valid and can be sampled. Bypass history to resets accumulation. */
+ bool1 use_history;
+ /** Is true if combined buffer is valid and can be re-projected to reduce variance. */
+ bool1 use_reprojection;
+ /** Is true if accumulation of non-filtered passes is needed. */
+ bool1 has_data;
+ /** Is true if accumulation of filtered passes is needed. */
+ bool1 any_render_pass_1;
+ bool1 any_render_pass_2;
+ /** Controlled by user in lookdev mode or by render settings. */
+ float background_opacity;
+ float _pad0;
+ /** Output counts per type. */
+ int color_len, value_len;
+ /** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */
+ int mist_id;
+ int normal_id;
+ int vector_id;
+ int diffuse_light_id;
+ int diffuse_color_id;
+ int specular_light_id;
+ int specular_color_id;
+ int volume_light_id;
+ int emission_id;
+ int environment_id;
+ int shadow_id;
+ int ambient_occlusion_id;
+ /** Not indexed but still not -1 if enabled. */
+ int depth_id;
+ int combined_id;
+ /** Id of the render-pass to be displayed. -1 for combined. */
+ int display_id;
+ /** True if the render-pass to be displayed is from the value accum buffer. */
+ bool1 display_is_value;
+ /** True if we bypass the accumulation and directly output the accumulation buffer. */
+ bool1 display_only;
+ /** Start of AOVs and number of aov. */
+ int aov_color_id, aov_color_len;
+ int aov_value_id, aov_value_len;
+ /** Settings to render mist pass */
+ float mist_scale, mist_bias, mist_exponent;
+ /** Scene exposure used for better noise reduction. */
+ float exposure_scale;
+ /** Scaling factor for scaled resolution rendering. */
+ int scaling_factor;
+ /** Film pixel filter radius. */
+ float filter_radius;
+ /** Precomputed samples. First in the table is the closest one. The rest is unordered. */
+ int samples_len;
+ /** Sum of the weights of all samples in the sample table. */
+ float samples_weight_total;
+ FilmSample samples[FILM_PRECOMP_SAMPLE_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(FilmData, 16)
+
+static inline float film_filter_weight(float filter_radius, float sample_distance_sqr)
+{
+#if 1 /* Faster */
+ /* Gaussian fitted to Blackman-Harris. */
+ float r = sample_distance_sqr / (filter_radius * filter_radius);
+ const float sigma = 0.284;
+ const float fac = -0.5 / (sigma * sigma);
+ float weight = expf(fac * r);
+#else
+ /* Blackman-Harris filter. */
+ float r = M_2PI * saturate(0.5 + sqrtf(sample_distance_sqr) / (2.0 * filter_radius));
+ float weight = 0.35875 - 0.48829 * cosf(r) + 0.14128 * cosf(2.0 * r) - 0.01168 * cosf(3.0 * r);
+#endif
+ return weight;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Render passes
+ * \{ */
+
+enum eRenderPassLayerIndex : uint32_t {
+ RENDER_PASS_LAYER_DIFFUSE_LIGHT = 0u,
+ RENDER_PASS_LAYER_SPECULAR_LIGHT = 1u,
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Arbitrary Output Variables
+ * \{ */
+
+/* Theoretical max is 128 as we are using texture array and VRAM usage.
+ * However, the output_aov() function perform a linear search inside all the hashes.
+ * If we find a way to avoid this we could bump this number up. */
+#define AOV_MAX 16
+
+/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */
+struct AOVsInfoData {
+ uint hash_value[AOV_MAX];
+ uint hash_color[AOV_MAX];
+ /* Length of used data. */
+ uint color_len;
+ uint value_len;
+ /** Id of the AOV to be displayed (from the start of the AOV array). -1 for combined. */
+ int display_id;
+ /** True if the AOV to be displayed is from the value accum buffer. */
+ bool1 display_is_value;
+};
+BLI_STATIC_ASSERT_ALIGN(AOVsInfoData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name VelocityModule
* \{ */
@@ -122,6 +381,272 @@ BLI_STATIC_ASSERT_ALIGN(VelocityGeometryIndex, 16)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Motion Blur
+ * \{ */
+
+#define MOTION_BLUR_TILE_SIZE 32
+#define MOTION_BLUR_MAX_TILE 512 /* 16384 / MOTION_BLUR_TILE_SIZE */
+struct MotionBlurData {
+ /** As the name suggests. Used to avoid a division in the sampling. */
+ float2 target_size_inv;
+ /** Viewport motion scaling factor. Make blur relative to frame time not render time. */
+ float2 motion_scale;
+ /** Depth scaling factor. Avoid blurring background behind moving objects. */
+ float depth_scale;
+
+ float _pad0, _pad1, _pad2;
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16)
+
+/* For some reasons some GLSL compilers do not like this struct.
+ * So we declare it as a uint array instead and do indexing ourselves. */
+#ifdef __cplusplus
+struct MotionBlurTileIndirection {
+ /**
+ * Stores indirection to the tile with the highest velocity covering each tile.
+ * This is stored using velocity in the MSB to be able to use atomicMax operations.
+ */
+ uint prev[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+ uint next[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE];
+};
+BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16)
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Depth of field
+ * \{ */
+
+/* 5% error threshold. */
+#define DOF_FAST_GATHER_COC_ERROR 0.05
+#define DOF_GATHER_RING_COUNT 5
+#define DOF_DILATE_RING_COUNT 3
+
+struct DepthOfFieldData {
+ /** Size of the render targets for gather & scatter passes. */
+ int2 extent;
+ /** Size of a pixel in uv space (1.0 / extent). */
+ float2 texel_size;
+ /** Scale factor for anisotropic bokeh. */
+ float2 bokeh_anisotropic_scale;
+ float2 bokeh_anisotropic_scale_inv;
+ /* Correction factor to align main target pixels with the filtered mipmap chain texture. */
+ float2 gather_uv_fac;
+ /** Scatter parameters. */
+ float scatter_coc_threshold;
+ float scatter_color_threshold;
+ float scatter_neighbor_max_color;
+ int scatter_sprite_per_row;
+ /** Number of side the bokeh shape has. */
+ float bokeh_blades;
+ /** Rotation of the bokeh shape. */
+ float bokeh_rotation;
+ /** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */
+ float coc_mul, coc_bias;
+ /** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */
+ float coc_abs_max;
+ /** Copy of camera type. */
+ eCameraType camera_type;
+ /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */
+ float4 filter_samples_weight;
+ float filter_center_weight;
+ /** Max number of sprite in the scatter pass for each ground. */
+ int scatter_max_rect;
+
+ int _pad0, _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16)
+
+struct ScatterRect {
+ /** Color and CoC of the 4 pixels the scatter sprite represents. */
+ float4 color_and_coc[4];
+ /** Rect center position in half pixel space. */
+ float2 offset;
+ /** Rect half extent in half pixel space. */
+ float2 half_extent;
+};
+BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16)
+
+/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */
+#ifndef M_PI
+# define EEVEE_PI
+# define M_PI 3.14159265358979323846 /* pi */
+#endif
+
+static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth)
+{
+ depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth;
+ return dof.coc_mul * depth + dof.coc_bias;
+}
+
+static inline float regular_polygon_side_length(float sides_count)
+{
+ return 2.0f * sinf(M_PI / sides_count);
+}
+
+/* Returns intersection ratio between the radius edge at theta and the regular polygon edge.
+ * Start first corners at theta == 0. */
+static inline float circle_to_polygon_radius(float sides_count, float theta)
+{
+ /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide
+ * 36). */
+ float side_angle = (2.0f * M_PI) / sides_count;
+ return cosf(side_angle * 0.5f) /
+ cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI)));
+}
+
+/* Remap input angle to have homogenous spacing of points along a polygon edge.
+ * Expects theta to be in [0..2pi] range. */
+static inline float circle_to_polygon_angle(float sides_count, float theta)
+{
+ float side_angle = (2.0f * M_PI) / sides_count;
+ float halfside_angle = side_angle * 0.5f;
+ float side = floorf(theta / side_angle);
+ /* Length of segment from center to the middle of polygon side. */
+ float adjacent = circle_to_polygon_radius(sides_count, 0.0f);
+
+ /* This is the relative position of the sample on the polygon half side. */
+ float local_theta = theta - side * side_angle;
+ float ratio = (local_theta - halfside_angle) / halfside_angle;
+
+ float halfside_len = regular_polygon_side_length(sides_count) * 0.5f;
+ float opposite = ratio * halfside_len;
+
+ /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */
+ float final_local_theta = atanf(opposite / adjacent);
+
+ return side * side_angle + final_local_theta;
+}
+
+#ifdef EEVEE_PI
+# undef M_PI
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Light Culling
+ * \{ */
+
+/* Number of items we can cull. Limited by how we store CullingZBin. */
+#define CULLING_MAX_ITEM 65536
+/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */
+#define CULLING_ZBIN_COUNT 4096
+/* Max tile map resolution per axes. */
+#define CULLING_TILE_RES 16
+
+struct LightCullingData {
+ /** Scale applied to tile pixel coordinates to get target UV coordinate. */
+ float2 tile_to_uv_fac;
+ /** Scale and bias applied to linear Z to get zbin. */
+ float zbin_scale;
+ float zbin_bias;
+ /** Valid item count in the source data array. */
+ uint items_count;
+ /** Items that are processed by the 2.5D culling. */
+ uint local_lights_len;
+ /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */
+ uint sun_lights_len;
+ /** Number of items that passes the first culling test. */
+ uint visible_count;
+ /** Extent of one square tile in pixels. */
+ float tile_size;
+ /** Number of tiles on the X/Y axis. */
+ uint tile_x_len;
+ uint tile_y_len;
+ /** Number of word per tile. Depends on the maximum number of lights. */
+ uint tile_word_len;
+};
+BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Lights
+ * \{ */
+
+#define LIGHT_NO_SHADOW -1
+
+enum eLightType : uint32_t {
+ LIGHT_SUN = 0u,
+ LIGHT_POINT = 1u,
+ LIGHT_SPOT = 2u,
+ LIGHT_RECT = 3u,
+ LIGHT_ELLIPSE = 4u
+};
+
+static inline bool is_area_light(eLightType type)
+{
+ return type >= LIGHT_RECT;
+}
+
+struct LightData {
+ /** Normalized object matrix. Last column contains data accessible using the following macros. */
+ float4x4 object_mat;
+ /** Packed data in the last column of the object_mat. */
+#define _area_size_x object_mat[0][3]
+#define _area_size_y object_mat[1][3]
+#define _radius _area_size_x
+#define _spot_mul object_mat[2][3]
+#define _spot_bias object_mat[3][3]
+ /** Aliases for axes. */
+#ifndef USE_GPU_SHADER_CREATE_INFO
+# define _right object_mat[0]
+# define _up object_mat[1]
+# define _back object_mat[2]
+# define _position object_mat[3]
+#else
+# define _right object_mat[0].xyz
+# define _up object_mat[1].xyz
+# define _back object_mat[2].xyz
+# define _position object_mat[3].xyz
+#endif
+ /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */
+ float influence_radius_invsqr_surface;
+ float influence_radius_invsqr_volume;
+ /** Maximum influence radius. Used for culling. */
+ float influence_radius_max;
+ /** Index of the shadow struct on CPU. -1 means no shadow. */
+ int shadow_id;
+ /** NOTE: It is ok to use float3 here. A float is declared right after it.
+ * float3 is also aligned to 16 bytes. */
+ float3 color;
+ /** Power depending on shader type. */
+ float diffuse_power;
+ float specular_power;
+ float volume_power;
+ float transmit_power;
+ /** Special radius factor for point lighting. */
+ float radius_squared;
+ /** Light Type. */
+ eLightType type;
+ /** Spot angle tangent. */
+ float spot_tan;
+ /** Spot size. Aligned to size of float2. */
+ float2 spot_size_inv;
+ /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */
+ // ShadowData shadow_data;
+};
+BLI_STATIC_ASSERT_ALIGN(LightData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Hierarchical-Z Buffer
+ * \{ */
+
+struct HiZData {
+ /** Scale factor to remove HiZBuffer padding. */
+ float2 uv_scale;
+
+ float2 _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(HiZData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Ray-Tracing
* \{ */
@@ -142,6 +667,34 @@ enum eClosureBits : uint32_t {
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Subsurface
+ * \{ */
+
+#define SSS_SAMPLE_MAX 64
+#define SSS_BURLEY_TRUNCATE 16.0
+#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328
+#define SSS_TRANSMIT_LUT_SIZE 64.0
+#define SSS_TRANSMIT_LUT_RADIUS 1.218
+#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE))
+#define SSS_TRANSMIT_LUT_STEP_RES 64.0
+
+struct SubsurfaceData {
+ /** xy: 2D sample position [-1..1], zw: sample_bounds. */
+ /* NOTE(fclem) Using float4 for alignment. */
+ float4 samples[SSS_SAMPLE_MAX];
+ /** Sample index after which samples are not randomly rotated anymore. */
+ int jitter_threshold;
+ /** Number of samples precomputed in the set. */
+ int sample_len;
+ int _pad0;
+ int _pad1;
+};
+BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Utility Texture
* \{ */
@@ -178,10 +731,25 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
#ifdef __cplusplus
+using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
using CameraDataBuf = draw::UniformBuffer<CameraData>;
+using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
+using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
+using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
+using FilmDataBuf = draw::UniformBuffer<FilmData>;
+using HiZDataBuf = draw::UniformBuffer<HiZData>;
+using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>;
+using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>;
+using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>;
+using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>;
+using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>;
+using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
+using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
+using SamplingDataBuf = draw::StorageBuffer<SamplingData>;
+using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;
-using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
} // namespace blender::eevee
#endif
diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.cc b/source/blender/draw/engines/eevee_next/eevee_sync.cc
index 42af251d770..5f8b87c24b9 100644
--- a/source/blender/draw/engines/eevee_next/eevee_sync.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_sync.cc
@@ -47,7 +47,7 @@ ObjectHandle &SyncModule::sync_object(Object *ob)
const int recalc_flags = ID_RECALC_COPY_ON_WRITE | ID_RECALC_TRANSFORM | ID_RECALC_SHADING |
ID_RECALC_GEOMETRY;
if ((eevee_dd.recalc & recalc_flags) != 0) {
- // inst_.sampling.reset();
+ inst_.sampling.reset();
UNUSED_VARS(inst_);
}
@@ -63,7 +63,7 @@ WorldHandle &SyncModule::sync_world(::World *world)
const int recalc_flags = ID_RECALC_ALL;
if ((eevee_dd.recalc & recalc_flags) != 0) {
- // inst_.sampling.reset();
+ inst_.sampling.reset();
}
return eevee_dd;
}
@@ -74,25 +74,12 @@ WorldHandle &SyncModule::sync_world(::World *world)
/** \name Common
* \{ */
-static inline void shgroup_geometry_call(DRWShadingGroup *grp,
- Object *ob,
- GPUBatch *geom,
- int v_first = -1,
- int v_count = -1,
- bool use_instancing = false)
+static inline void geometry_call(PassMain::Sub *sub_pass,
+ GPUBatch *geom,
+ ResourceHandle resource_handle)
{
- if (grp == nullptr) {
- return;
- }
-
- if (v_first == -1) {
- DRW_shgroup_call(grp, geom, ob);
- }
- else if (use_instancing) {
- DRW_shgroup_call_instance_range(grp, ob, geom, v_first, v_count);
- }
- else {
- DRW_shgroup_call_range(grp, ob, geom, v_first, v_count);
+ if (sub_pass != nullptr) {
+ sub_pass->draw(geom, resource_handle);
}
}
@@ -102,9 +89,13 @@ static inline void shgroup_geometry_call(DRWShadingGroup *grp,
/** \name Mesh
* \{ */
-void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
+void SyncModule::sync_mesh(Object *ob,
+ ObjectHandle &ob_handle,
+ ResourceHandle res_handle,
+ const ObjectRef &ob_ref)
{
- bool has_motion = inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc);
+ bool has_motion = inst_.velocity.step_object_sync(
+ ob, ob_handle.object_key, res_handle, ob_handle.recalc);
MaterialArray &material_array = inst_.materials.material_array_get(ob, has_motion);
@@ -123,14 +114,16 @@ void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
continue;
}
Material *material = material_array.materials[i];
- shgroup_geometry_call(material->shading.shgrp, ob, geom);
- shgroup_geometry_call(material->prepass.shgrp, ob, geom);
- shgroup_geometry_call(material->shadow.shgrp, ob, geom);
+ geometry_call(material->shading.sub_pass, geom, res_handle);
+ geometry_call(material->prepass.sub_pass, geom, res_handle);
+ geometry_call(material->shadow.sub_pass, geom, res_handle);
- is_shadow_caster = is_shadow_caster || material->shadow.shgrp != nullptr;
+ is_shadow_caster = is_shadow_caster || material->shadow.sub_pass != nullptr;
is_alpha_blend = is_alpha_blend || material->is_alpha_blend_transparent;
}
+ inst_.manager->extract_object_attributes(res_handle, ob_ref, material_array.gpu_materials);
+
// shadows.sync_object(ob, ob_handle, is_shadow_caster, is_alpha_blend);
}
@@ -155,11 +148,13 @@ struct gpIterData {
int vcount = 0;
bool instancing = false;
- gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle)
+ gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle, ResourceHandle resource_handle)
: inst(inst_),
ob(ob_),
material_array(inst_.materials.material_array_get(
- ob_, inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc)))
+ ob_,
+ inst_.velocity.step_object_sync(
+ ob, ob_handle.object_key, resource_handle, ob_handle.recalc)))
{
cfra = DEG_get_ctime(inst.depsgraph);
};
@@ -167,26 +162,28 @@ struct gpIterData {
static void gpencil_drawcall_flush(gpIterData &iter)
{
+#if 0 /* Incompatible with new draw manager. */
if (iter.geom != nullptr) {
- shgroup_geometry_call(iter.material->shading.shgrp,
+ geometry_call(iter.material->shading.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
- shgroup_geometry_call(iter.material->prepass.shgrp,
+ geometry_call(iter.material->prepass.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
- shgroup_geometry_call(iter.material->shadow.shgrp,
+ geometry_call(iter.material->shadow.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
}
+#endif
iter.geom = nullptr;
iter.vfirst = -1;
iter.vcount = 0;
@@ -250,18 +247,22 @@ static void gpencil_stroke_sync(bGPDlayer *UNUSED(gpl),
}
}
-void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle)
+void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle)
{
/* TODO(fclem): Waiting for a user option to use the render engine instead of gpencil engine. */
- return;
+ if (true) {
+ inst_.gpencil_engine_enabled = true;
+ return;
+ }
+ UNUSED_VARS(res_handle);
- gpIterData iter(inst_, ob, ob_handle);
+ gpIterData iter(inst_, ob, ob_handle, res_handle);
BKE_gpencil_visible_stroke_iter((bGPdata *)ob->data, nullptr, gpencil_stroke_sync, &iter);
gpencil_drawcall_flush(iter);
- // bool is_caster = true; /* TODO material.shadow.shgrp. */
+ // bool is_caster = true; /* TODO material.shadow.sub_pass. */
// bool is_alpha_blend = true; /* TODO material.is_alpha_blend. */
// shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
}
@@ -277,14 +278,24 @@ static void shgroup_curves_call(MaterialPass &matpass,
ParticleSystem *part_sys = nullptr,
ModifierData *modifier_data = nullptr)
{
- if (matpass.shgrp == nullptr) {
+ UNUSED_VARS(ob, modifier_data);
+ if (matpass.sub_pass == nullptr) {
return;
}
- DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.shgrp, matpass.gpumat);
+ if (part_sys != nullptr) {
+ // DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.sub_pass, matpass.gpumat);
+ }
+ else {
+ // DRW_shgroup_curves_create_sub(ob, matpass.sub_pass, matpass.gpumat);
+ }
}
-void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data)
+void SyncModule::sync_curves(Object *ob,
+ ObjectHandle &ob_handle,
+ ResourceHandle res_handle,
+ ModifierData *modifier_data)
{
+ UNUSED_VARS(res_handle);
int mat_nr = CURVES_MATERIAL_NR;
ParticleSystem *part_sys = nullptr;
@@ -312,7 +323,7 @@ void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *
/* TODO(fclem) Hair velocity. */
// shading_passes.velocity.gpencil_add(ob, ob_handle);
- // bool is_caster = material.shadow.shgrp != nullptr;
+ // bool is_caster = material.shadow.sub_pass != nullptr;
// bool is_alpha_blend = material.is_alpha_blend_transparent;
// shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.hh b/source/blender/draw/engines/eevee_next/eevee_sync.hh
index bd8147a2882..ab883ce44c2 100644
--- a/source/blender/draw/engines/eevee_next/eevee_sync.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_sync.hh
@@ -150,9 +150,15 @@ class SyncModule {
ObjectHandle &sync_object(Object *ob);
WorldHandle &sync_world(::World *world);
- void sync_mesh(Object *ob, ObjectHandle &ob_handle);
- void sync_gpencil(Object *ob, ObjectHandle &ob_handle);
- void sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data = nullptr);
+ void sync_mesh(Object *ob,
+ ObjectHandle &ob_handle,
+ ResourceHandle res_handle,
+ const ObjectRef &ob_ref);
+ void sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle);
+ void sync_curves(Object *ob,
+ ObjectHandle &ob_handle,
+ ResourceHandle res_handle,
+ ModifierData *modifier_data = nullptr);
};
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc
index ceae9df44d0..7af311a8ccc 100644
--- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc
@@ -9,10 +9,6 @@
* temporal re-projection or motion blur.
*
* It is the module that tracks the objects between frames updates.
- *
- * #VelocityModule contains all motion steps data and logic.
- * #VelocityPass contains the resolve pass for static geometry.
- * #VelocityView is a per view instance that contain the velocity buffer.
*/
#include "BKE_duplilist.h"
@@ -36,16 +32,21 @@ namespace blender::eevee {
void VelocityModule::init()
{
-#if 0 /* TODO renderpasses */
- if (inst_.render && (inst_.render_passes.vector != nullptr)) {
- /* No motion blur and the vector pass was requested. Do the step sync here. */
+ if (inst_.render && (inst_.film.enabled_passes_get() & EEVEE_RENDER_PASS_VECTOR) != 0) {
+ /* No motion blur and the vector pass was requested. Do the steps sync here. */
const Scene *scene = inst_.scene;
float initial_time = scene->r.cfra + scene->r.subframe;
step_sync(STEP_PREVIOUS, initial_time - 1.0f);
step_sync(STEP_NEXT, initial_time + 1.0f);
+
inst_.set_time(initial_time);
+ step_ = STEP_CURRENT;
+ /* Let the main sync loop handle the current step. */
}
-#endif
+
+ /* For viewport, only previous motion is supported.
+ * Still bind previous step to avoid undefined behavior. */
+ next_step_ = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
}
static void step_object_sync_render(void *velocity,
@@ -54,7 +55,9 @@ static void step_object_sync_render(void *velocity,
Depsgraph *UNUSED(depsgraph))
{
ObjectKey object_key(ob);
- reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key);
+ /* NOTE: Dummy resource handle since this will not be used for drawing. */
+ ResourceHandle resource_handle(0);
+ reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key, resource_handle);
}
void VelocityModule::step_sync(eVelocityStep step, float time)
@@ -70,10 +73,18 @@ void VelocityModule::step_camera_sync()
{
inst_.camera.sync();
*camera_steps[step_] = inst_.camera.data_get();
+ step_time[step_] = inst_.scene->r.cfra + inst_.scene->r.subframe;
+ /* Fix undefined camera steps when rendering is starting. */
+ if ((step_ == STEP_CURRENT) && (camera_steps[STEP_PREVIOUS]->initialized == false)) {
+ *camera_steps[STEP_PREVIOUS] = *static_cast<CameraData *>(camera_steps[step_]);
+ camera_steps[STEP_PREVIOUS]->initialized = true;
+ step_time[STEP_PREVIOUS] = step_time[step_];
+ }
}
bool VelocityModule::step_object_sync(Object *ob,
ObjectKey &object_key,
+ ResourceHandle resource_handle,
int /*IDRecalcFlag*/ recalc)
{
bool has_motion = object_has_velocity(ob) || (recalc & ID_RECALC_TRANSFORM);
@@ -85,8 +96,6 @@ bool VelocityModule::step_object_sync(Object *ob,
return false;
}
- uint32_t resource_id = DRW_object_resource_id_get(ob);
-
/* Object motion. */
/* FIXME(fclem) As we are using original objects pointers, there is a chance the previous
* object key matches a totally different object if the scene was changed by user or python
@@ -95,7 +104,7 @@ bool VelocityModule::step_object_sync(Object *ob,
* We live with that until we have a correct way of identifying new objects. */
VelocityObjectData &vel = velocity_map.lookup_or_add_default(object_key);
vel.obj.ofs[step_] = object_steps_usage[step_]++;
- vel.obj.resource_id = resource_id;
+ vel.obj.resource_id = resource_handle.resource_index();
vel.id = (ID *)ob->data;
object_steps[step_]->get_or_resize(vel.obj.ofs[step_]) = ob->obmat;
if (step_ == STEP_CURRENT) {
@@ -162,7 +171,7 @@ bool VelocityModule::step_object_sync(Object *ob,
}
/* TODO(@fclem): Reset sampling here? Should ultimately be covered by depsgraph update tags. */
- // inst_.sampling.reset();
+ inst_.sampling.reset();
return true;
}
@@ -213,6 +222,7 @@ void VelocityModule::step_swap()
SWAP(VelocityObjectBuf *, object_steps[step_a], object_steps[step_b]);
SWAP(VelocityGeometryBuf *, geometry_steps[step_a], geometry_steps[step_b]);
SWAP(CameraDataBuf *, camera_steps[step_a], camera_steps[step_b]);
+ SWAP(float, step_time[step_a], step_time[step_b]);
for (VelocityObjectData &vel : velocity_map.values()) {
vel.obj.ofs[step_a] = vel.obj.ofs[step_b];
@@ -239,10 +249,7 @@ void VelocityModule::step_swap()
void VelocityModule::begin_sync()
{
- if (inst_.is_viewport()) {
- /* Viewport always evaluate current step. */
- step_ = STEP_CURRENT;
- }
+ step_ = STEP_CURRENT;
step_camera_sync();
object_steps_usage[step_] = 0;
}
@@ -255,7 +262,7 @@ void VelocityModule::end_sync()
uint32_t max_resource_id_ = 0u;
for (Map<ObjectKey, VelocityObjectData>::Item item : velocity_map.items()) {
- if (item.value.obj.resource_id == (uint)-1) {
+ if (item.value.obj.resource_id == (uint32_t)-1) {
deleted_obj.append(item.key);
}
else {
@@ -264,14 +271,18 @@ void VelocityModule::end_sync()
}
if (deleted_obj.size() > 0) {
- // inst_.sampling.reset();
+ inst_.sampling.reset();
+ }
+
+ if (inst_.is_viewport() && camera_has_motion()) {
+ inst_.sampling.reset();
}
- for (auto key : deleted_obj) {
+ for (auto &key : deleted_obj) {
velocity_map.remove(key);
}
- indirection_buf.resize(power_of_2_max_u(max_resource_id_ + 1));
+ indirection_buf.resize(ceil_to_multiple_u(max_resource_id_, 128));
/* Avoid uploading more data to the GPU as well as an extra level of
* indirection on the GPU by copying back offsets the to VelocityIndex. */
@@ -300,19 +311,6 @@ void VelocityModule::end_sync()
camera_steps[STEP_CURRENT]->push_update();
camera_steps[STEP_NEXT]->push_update();
indirection_buf.push_update();
-
- {
- resolve_ps_ = DRW_pass_create("Velocity.Resolve", (DRWState)0);
- GPUShader *sh = inst_.shaders.static_shader_get(VELOCITY_RESOLVE);
- DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_);
- DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_);
- DRW_shgroup_uniform_image_ref(grp, "velocity_view_img", &velocity_view_tx_);
- DRW_shgroup_uniform_image_ref(grp, "velocity_camera_img", &velocity_camera_tx_);
- DRW_shgroup_uniform_block(grp, "camera_prev", *camera_steps[STEP_PREVIOUS]);
- DRW_shgroup_uniform_block(grp, "camera_curr", *camera_steps[STEP_CURRENT]);
- DRW_shgroup_uniform_block(grp, "camera_next", *camera_steps[STEP_NEXT]);
- DRW_shgroup_call_compute_ref(grp, resolve_dispatch_size_);
- }
}
bool VelocityModule::object_has_velocity(const Object *ob)
@@ -359,60 +357,30 @@ void VelocityModule::bind_resources(DRWShadingGroup *grp)
DRW_shgroup_storage_block_ref(grp, "velocity_indirection_buf", &indirection_buf);
}
-/* Resolve pass for static geometry and to camera space projection. */
-void VelocityModule::resolve_camera_motion(GPUTexture *depth_tx,
- GPUTexture *velocity_view_tx,
- GPUTexture *velocity_camera_tx)
+bool VelocityModule::camera_has_motion() const
{
- input_depth_tx_ = depth_tx;
- velocity_view_tx_ = velocity_view_tx;
- velocity_camera_tx_ = velocity_camera_tx;
-
- resolve_dispatch_size_.x = divide_ceil_u(GPU_texture_width(depth_tx), 8);
- resolve_dispatch_size_.y = divide_ceil_u(GPU_texture_height(depth_tx), 8);
-
- DRW_draw_pass(resolve_ps_);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Velocity View
- * \{ */
-
-void VelocityView::sync()
-{
- /* TODO: Remove. */
- velocity_view_tx_.sync();
- velocity_camera_tx_.sync();
-}
-
-void VelocityView::acquire(int2 extent)
-{
- /* WORKAROUND: View name should be unique and static.
- * With this, we can reuse the same texture across views. */
- DrawEngineType *owner = (DrawEngineType *)view_name_.c_str();
-
- /* Only RG16F when only doing only reprojection or motion blur. */
- eGPUTextureFormat format = inst_.is_viewport() ? GPU_RG16F : GPU_RGBA16F;
- velocity_view_tx_.acquire(extent, format, owner);
- if (false /* TODO(fclem): Panoramic camera. */) {
- velocity_camera_tx_.acquire(extent, format, owner);
- }
- else {
- velocity_camera_tx_.acquire(int2(1), format, owner);
+ /* Only valid after sync. */
+ if (inst_.is_viewport()) {
+ /* Viewport has no next step. */
+ return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT];
}
+ return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT] &&
+ *camera_steps[STEP_NEXT] != *camera_steps[STEP_CURRENT];
}
-void VelocityView::resolve(GPUTexture *depth_tx)
+bool VelocityModule::camera_changed_projection() const
{
- inst_.velocity.resolve_camera_motion(depth_tx, velocity_view_tx_, velocity_camera_tx_);
+ /* Only valid after sync. */
+ if (inst_.is_viewport()) {
+ return camera_steps[STEP_PREVIOUS]->type != camera_steps[STEP_CURRENT]->type;
+ }
+ /* Cannot happen in render mode since we set the type during the init phase. */
+ return false;
}
-void VelocityView::release()
+float VelocityModule::step_time_delta_get(eVelocityStep start, eVelocityStep end) const
{
- velocity_view_tx_.release();
- velocity_camera_tx_.release();
+ return step_time[end] - step_time[start];
}
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.hh b/source/blender/draw/engines/eevee_next/eevee_velocity.hh
index e2606c061e1..6f18b05d476 100644
--- a/source/blender/draw/engines/eevee_next/eevee_velocity.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_velocity.hh
@@ -27,8 +27,6 @@ namespace blender::eevee {
/** Container for scene velocity data. */
class VelocityModule {
- friend class VelocityView;
-
public:
struct VelocityObjectData : public VelocityIndex {
/** ID to retrieve the corresponding #VelocityGeometryData after copy. */
@@ -58,6 +56,8 @@ class VelocityModule {
int3 object_steps_usage = int3(0);
/** Buffer of all #VelocityIndex used in this frame. Indexed by draw manager resource id. */
VelocityIndexBuf indirection_buf;
+ /** Frame time at which each steps were evaluated. */
+ float3 step_time;
/**
* Copies of camera data. One for previous and one for next time step.
@@ -67,16 +67,10 @@ class VelocityModule {
private:
Instance &inst_;
+ /** Step being synced. */
eVelocityStep step_ = STEP_CURRENT;
-
- DRWPass *resolve_ps_ = nullptr;
-
- /** Reference only. Not owned. */
- GPUTexture *input_depth_tx_;
- GPUTexture *velocity_view_tx_;
- GPUTexture *velocity_camera_tx_;
-
- int3 resolve_dispatch_size_ = int3(1, 1, 1);
+ /** Step referenced as next step. */
+ eVelocityStep next_step_ = STEP_NEXT;
public:
VelocityModule(Instance &inst) : inst_(inst)
@@ -111,7 +105,10 @@ class VelocityModule {
void step_sync(eVelocityStep step, float time);
/* Gather motion data. Returns true if the object **can** have motion. */
- bool step_object_sync(Object *ob, ObjectKey &object_key, int recalc = 0);
+ bool step_object_sync(Object *ob,
+ ObjectKey &object_key,
+ ResourceHandle resource_handle,
+ int recalc = 0);
/* Moves next frame data to previous frame data. Nullify next frame data. */
void step_swap();
@@ -121,56 +118,29 @@ class VelocityModule {
void bind_resources(DRWShadingGroup *grp);
- private:
- bool object_has_velocity(const Object *ob);
- bool object_is_deform(const Object *ob);
-
- void resolve_camera_motion(GPUTexture *depth_tx,
- GPUTexture *velocity_view_tx,
- GPUTexture *velocity_camera_tx);
-};
+ template<typename T> void bind_resources(draw::detail::Pass<T> *pass)
+ {
+ /* Storage Buf. */
+ pass->bind_ssbo(VELOCITY_OBJ_PREV_BUF_SLOT, &(*object_steps[STEP_PREVIOUS]));
+ pass->bind_ssbo(VELOCITY_OBJ_NEXT_BUF_SLOT, &(*object_steps[next_step_]));
+ pass->bind_ssbo(VELOCITY_GEO_PREV_BUF_SLOT, &(*geometry_steps[STEP_PREVIOUS]));
+ pass->bind_ssbo(VELOCITY_GEO_NEXT_BUF_SLOT, &(*geometry_steps[next_step_]));
+ pass->bind_ssbo(VELOCITY_INDIRECTION_BUF_SLOT, &indirection_buf);
+ /* Uniform Buf. */
+ pass->bind_ubo(VELOCITY_CAMERA_PREV_BUF, &(*camera_steps[STEP_PREVIOUS]));
+ pass->bind_ubo(VELOCITY_CAMERA_CURR_BUF, &(*camera_steps[STEP_CURRENT]));
+ pass->bind_ubo(VELOCITY_CAMERA_NEXT_BUF, &(*camera_steps[next_step_]));
+ }
-/** \} */
+ bool camera_has_motion() const;
+ bool camera_changed_projection() const;
-/* -------------------------------------------------------------------- */
-/** \name Velocity
- *
- * \{ */
+ /* Returns frame time difference between two steps. */
+ float step_time_delta_get(eVelocityStep start, eVelocityStep end) const;
-/**
- * Per view module.
- */
-class VelocityView {
private:
- Instance &inst_;
-
- StringRefNull view_name_;
-
- TextureFromPool velocity_camera_tx_ = {"velocity_camera_tx_"};
- TextureFromPool velocity_view_tx_ = {"velocity_view_tx_"};
-
- public:
- VelocityView(Instance &inst, const char *name) : inst_(inst), view_name_(name){};
- ~VelocityView(){};
-
- void sync();
-
- void acquire(int2 extent);
- void release();
-
- void resolve(GPUTexture *depth_tx);
-
- /**
- * Getters
- **/
- GPUTexture *view_vectors_get() const
- {
- return velocity_view_tx_;
- }
- GPUTexture *camera_vectors_get() const
- {
- return (velocity_camera_tx_.is_valid()) ? velocity_camera_tx_ : velocity_view_tx_;
- }
+ bool object_has_velocity(const Object *ob);
+ bool object_is_deform(const Object *ob);
};
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc
index e21342c5ef6..48951c2bae7 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@@ -34,17 +34,19 @@ void ShadingView::init()
// mb_.init();
}
-void ShadingView::sync(int2 render_extent_)
+void ShadingView::sync()
{
+ int2 render_extent = inst_.film.render_extent_get();
+
if (false /* inst_.camera.is_panoramic() */) {
- int64_t render_pixel_count = render_extent_.x * (int64_t)render_extent_.y;
+ int64_t render_pixel_count = render_extent.x * (int64_t)render_extent.y;
/* Divide pixel count between the 6 views. Rendering to a square target. */
extent_[0] = extent_[1] = ceilf(sqrtf(1 + (render_pixel_count / 6)));
/* TODO(@fclem): Clip unused views here. */
is_enabled_ = true;
}
else {
- extent_ = render_extent_;
+ extent_ = render_extent;
/* Only enable -Z view. */
is_enabled_ = (StringRefNull(name_) == "negZ_view");
}
@@ -54,47 +56,34 @@ void ShadingView::sync(int2 render_extent_)
}
/* Create views. */
- // const CameraData &data = inst_.camera.data_get();
+ const CameraData &cam = inst_.camera.data_get();
float4x4 viewmat, winmat;
const float(*viewmat_p)[4] = viewmat.ptr(), (*winmat_p)[4] = winmat.ptr();
-#if 0
if (false /* inst_.camera.is_panoramic() */) {
/* TODO(@fclem) Over-scans. */
/* For now a mandatory 5% over-scan for DoF. */
- float side = data.clip_near * 1.05f;
- float near = data.clip_near;
- float far = data.clip_far;
+ float side = cam.clip_near * 1.05f;
+ float near = cam.clip_near;
+ float far = cam.clip_far;
perspective_m4(winmat.ptr(), -side, side, -side, side, near, far);
- viewmat = face_matrix_ * data.viewmat;
+ viewmat = face_matrix_ * cam.viewmat;
}
else {
- viewmat_p = data.viewmat.ptr();
- winmat_p = data.winmat.ptr();
+ viewmat_p = cam.viewmat.ptr();
+ winmat_p = cam.winmat.ptr();
}
-#else
- /* TEMP */
- UNUSED_VARS(face_matrix_);
- const DRWView *default_view = DRW_view_default_get();
- DRW_view_winmat_get(default_view, winmat.ptr(), false);
- DRW_view_viewmat_get(default_view, viewmat.ptr(), false);
-#endif
main_view_ = DRW_view_create(viewmat_p, winmat_p, nullptr, nullptr, nullptr);
sub_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
render_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
// dof_.sync(winmat_p, extent_);
- // mb_.sync(extent_);
- velocity_.sync();
// rt_buffer_opaque_.sync(extent_);
// rt_buffer_refract_.sync(extent_);
// inst_.hiz_back.view_sync(extent_);
// inst_.hiz_front.view_sync(extent_);
// inst_.gbuffer.view_sync(extent_);
-
- combined_tx_.sync();
- postfx_tx_.sync();
}
void ShadingView::render()
@@ -103,29 +92,25 @@ void ShadingView::render()
return;
}
- /* Query temp textures and create framebuffers. */
- /* HACK: View name should be unique and static.
- * With this, we can reuse the same texture across views. */
- DrawEngineType *owner = (DrawEngineType *)name_;
-
- DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
-
- depth_tx_.ensure_2d(GPU_DEPTH24_STENCIL8, extent_);
- combined_tx_.acquire(extent_, GPU_RGBA16F, owner);
- velocity_.acquire(extent_);
- // combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_));
- // prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_),
- // GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get()));
- combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth), GPU_ATTACHMENT_TEXTURE(dtxl->color));
- prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth),
- GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get()));
+ /* Query temp textures and create frame-buffers. */
+ RenderBuffers &rbufs = inst_.render_buffers;
+ rbufs.acquire(extent_);
+ combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
+ GPU_ATTACHMENT_TEXTURE(rbufs.combined_tx));
+ prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
+ GPU_ATTACHMENT_TEXTURE(rbufs.vector_tx));
update_view();
+ inst_.hiz_buffer.set_dirty();
+
DRW_stats_group_start(name_);
- // DRW_view_set_active(render_view_);
+ DRW_view_set_active(render_view_);
+
+ /* If camera has any motion, compute motion vector in the film pass. Otherwise, we avoid float
+ * precision issue by setting the motion of all static geometry to 0. */
+ float4 clear_velocity = float4(inst_.velocity.camera_has_motion() ? VELOCITY_INVALID : 0.0f);
- float4 clear_velocity(VELOCITY_INVALID);
GPU_framebuffer_bind(prepass_fb_);
GPU_framebuffer_clear_color(prepass_fb_, clear_velocity);
/* Alpha stores transmittance. So start at 1. */
@@ -133,7 +118,10 @@ void ShadingView::render()
GPU_framebuffer_bind(combined_fb_);
GPU_framebuffer_clear_color_depth(combined_fb_, clear_color, 1.0f);
- inst_.pipelines.world.render();
+ inst_.pipelines.world.render(render_view_new_);
+
+ /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
+ inst_.lights.set_view(render_view_new_, extent_);
// inst_.pipelines.deferred.render(
// render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_);
@@ -142,52 +130,36 @@ void ShadingView::render()
// inst_.lookdev.render_overlay(view_fb_);
- inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, depth_tx_, combined_tx_);
+ inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx);
- // inst_.lights.debug_draw(view_fb_);
- // inst_.shadows.debug_draw(view_fb_);
+ inst_.lights.debug_draw(render_view_new_, combined_fb_);
+ inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_);
- // velocity_.resolve(depth_tx_);
- velocity_.resolve(dtxl->depth);
+ GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
- // if (inst_.render_passes.vector) {
- // inst_.render_passes.vector->accumulate(velocity_.camera_vectors_get(), sub_view_);
- // }
+ inst_.film.accumulate(sub_view_, combined_final_tx);
- // GPUTexture *final_radiance_tx = render_post(combined_tx_);
+ // inst_.shadows.debug_draw();
- // if (inst_.render_passes.combined) {
- // inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_);
- // }
-
- // if (inst_.render_passes.depth) {
- // inst_.render_passes.depth->accumulate(depth_tx_, sub_view_);
- // }
+ rbufs.release();
+ postfx_tx_.release();
DRW_stats_group_end();
-
- combined_tx_.release();
- postfx_tx_.release();
- velocity_.release();
}
-GPUTexture *ShadingView::render_post(GPUTexture *input_tx)
+GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
{
-#if 0
- if (!dof_.postfx_enabled() && !mb_.enabled()) {
+ if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) {
return input_tx;
}
- /* HACK: View name should be unique and static.
- * With this, we can reuse the same texture across views. */
- postfx_tx_.acquire(extent_, GPU_RGBA16F, (void *)name_);
+ postfx_tx_.acquire(extent_, GPU_RGBA16F);
- GPUTexture *velocity_tx = velocity_.view_vectors_get();
GPUTexture *output_tx = postfx_tx_;
/* Swapping is done internally. Actual output is set to the next input. */
- dof_.render(depth_tx_, &input_tx, &output_tx);
- mb_.render(depth_tx_, velocity_tx, &input_tx, &output_tx);
-#endif
+ inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_);
+ inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx);
+
return input_tx;
}
@@ -197,20 +169,25 @@ void ShadingView::update_view()
DRW_view_viewmat_get(main_view_, viewmat.ptr(), false);
DRW_view_winmat_get(main_view_, winmat.ptr(), false);
+ /* TODO(fclem): Mixed-resolution rendering: We need to make sure we render with exactly the same
+ * distances between pixels to line up render samples and target pixels.
+ * So if the target resolution is not a multiple of the resolution divisor, we need to make the
+ * projection window bigger in the +X and +Y directions. */
+
/* Anti-Aliasing / Super-Sampling jitter. */
- // float jitter_u = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_U) - 0.5f) / extent_[0];
- // float jitter_v = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_V) - 0.5f) / extent_[1];
+ float2 jitter = inst_.film.pixel_jitter_get() / float2(extent_);
+ /* Transform to NDC space. */
+ jitter *= 2.0f;
- // window_translate_m4(winmat.ptr(), winmat.ptr(), jitter_u, jitter_v);
+ window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter));
DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr());
- /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop
+ /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop
* out of the blurring radius. To fix this, use custom enlarged culling matrix. */
- // dof_.jitter_apply(winmat, viewmat);
+ inst_.depth_of_field.jitter_apply(winmat, viewmat);
DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
- // inst_.lightprobes.set_view(render_view_, extent_);
- // inst_.lights.set_view(render_view_, extent_, !inst_.use_scene_lights());
+ render_view_new_.sync(viewmat, winmat);
}
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh
index fb74412f557..74e513357cd 100644
--- a/source/blender/draw/engines/eevee_next/eevee_view.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_view.hh
@@ -41,19 +41,13 @@ class ShadingView {
/** Matrix to apply to the viewmat. */
const float (*face_matrix_)[4];
- /** Post-FX modules. */
- // DepthOfField dof_;
- // MotionBlur mb_;
- VelocityView velocity_;
-
/** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */
// RaytraceBuffer rt_buffer_opaque_;
// RaytraceBuffer rt_buffer_refract_;
+ DepthOfFieldBuffer dof_buffer_;
Framebuffer prepass_fb_;
Framebuffer combined_fb_;
- Texture depth_tx_;
- TextureFromPool combined_tx_;
TextureFromPool postfx_tx_;
/** Main views is created from the camera (or is from the viewport). It is not jittered. */
@@ -63,6 +57,7 @@ class ShadingView {
DRWView *sub_view_ = nullptr;
/** Same as sub_view_ but has Depth Of Field jitter applied. */
DRWView *render_view_ = nullptr;
+ View render_view_new_;
/** Render size of the view. Can change between scene sample eval. */
int2 extent_ = {-1, -1};
@@ -71,17 +66,17 @@ class ShadingView {
public:
ShadingView(Instance &inst, const char *name, const float (*face_matrix)[4])
- : inst_(inst), name_(name), face_matrix_(face_matrix), velocity_(inst, name){};
+ : inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){};
~ShadingView(){};
void init();
- void sync(int2 render_extent_);
+ void sync();
void render();
- GPUTexture *render_post(GPUTexture *input_tx);
+ GPUTexture *render_postfx(GPUTexture *input_tx);
private:
void update_view();
@@ -94,7 +89,7 @@ class ShadingView {
*
* Container for all views needed to render the final image.
* We might need up to 6 views for panoramic cameras.
- * All views are always available but only enabled for if need.
+ * All views are always available but only enabled for if needed.
* \{ */
class MainView {
@@ -109,8 +104,6 @@ class MainView {
ShadingView shading_views_4;
ShadingView shading_views_5;
#define shading_views_ (&shading_views_0)
- /** Internal render size. */
- int render_extent_[2];
public:
MainView(Instance &inst)
@@ -123,15 +116,8 @@ class MainView {
{
}
- void init(const int2 full_extent_)
+ void init()
{
- /* TODO(fclem) parameter hidden in experimental. We need to figure out mipmap bias to preserve
- * texture crispiness. */
- float resolution_scale = 1.0f;
- for (int i = 0; i < 2; i++) {
- render_extent_[i] = max_ii(1, roundf(full_extent_[i] * resolution_scale));
- }
-
for (auto i : IndexRange(6)) {
shading_views_[i].init();
}
@@ -140,7 +126,7 @@ class MainView {
void sync()
{
for (auto i : IndexRange(6)) {
- shading_views_[i].sync(render_extent_);
+ shading_views_[i].sync();
}
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_world.cc b/source/blender/draw/engines/eevee_next/eevee_world.cc
index b9cb24fe30a..313c0bda42e 100644
--- a/source/blender/draw/engines/eevee_next/eevee_world.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_world.cc
@@ -42,10 +42,10 @@ DefaultWorldNodeTree::~DefaultWorldNodeTree()
MEM_SAFE_FREE(ntree_);
}
-/* Configure a default nodetree with the given world. */
+/* Configure a default node-tree with the given world. */
bNodeTree *DefaultWorldNodeTree::nodetree_get(::World *wo)
{
- /* WARNING: This function is not threadsafe. Which is not a problem for the moment. */
+ /* WARNING: This function is not thread-safe. Which is not a problem for the moment. */
copy_v3_fl3(color_socket_->value, wo->horr, wo->horg, wo->horb);
return ntree_;
}
@@ -79,7 +79,7 @@ void World::sync()
/* TODO(fclem) This should be detected to scene level. */
::World *orig_world = (::World *)DEG_get_original_id(&bl_world->id);
if (assign_if_different(prev_original_world, orig_world)) {
- // inst_.sampling.reset();
+ inst_.sampling.reset();
}
bNodeTree *ntree = (bl_world->nodetree && bl_world->use_nodes) ?
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
index a65bb7decb6..6fe5fa01fa3 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl
@@ -3,6 +3,8 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
+#define EEVEE_ATTRIBUTE_LIB
+
#if defined(MAT_GEOM_MESH)
/* -------------------------------------------------------------------- */
@@ -131,7 +133,7 @@ int g_curves_attr_id = 0;
int curves_attribute_element_id()
{
int id = interp.curves_strand_id;
- if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) {
+ if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) {
# ifdef COMMON_HAIR_LIB
id = hair_get_base_id();
# endif
@@ -282,43 +284,3 @@ vec3 attr_load_uv(vec3 attr)
/** \} */
#endif
-
-/* -------------------------------------------------------------------- */
-/** \name Volume Attribute post
- *
- * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
- * the engine side. But as of now, the engines are responsible for loading the attributes.
- *
- * \{ */
-
-#if defined(MAT_GEOM_VOLUME)
-
-float attr_load_temperature_post(float attr)
-{
- /* Bring the into standard range without having to modify the grid values */
- attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0;
- return attr;
-}
-vec4 attr_load_color_post(vec4 attr)
-{
- /* Density is premultiplied for interpolation, divide it out here. */
- attr.rgb *= safe_rcp(attr.a);
- attr.rgb *= drw_volume.color_mul.rgb;
- attr.a = 1.0;
- return attr;
-}
-
-#else /* Noop for any other surface. */
-
-float attr_load_temperature_post(float attr)
-{
- return attr;
-}
-vec4 attr_load_color_post(vec4 attr)
-{
- return attr;
-}
-
-#endif
-
-/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
index f79e9102d76..2611f714b59 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl
@@ -143,24 +143,10 @@ vec2 camera_uv_from_view(CameraData cam, vec3 vV)
}
}
-vec2 camera_uv_from_world(CameraData cam, vec3 V)
+vec2 camera_uv_from_world(CameraData cam, vec3 P)
{
- vec3 vV = transform_point(cam.viewmat, V);
- switch (cam.type) {
- default:
- case CAMERA_ORTHO:
- return camera_uv_from_view(cam.persmat, false, V);
- case CAMERA_PERSP:
- return camera_uv_from_view(cam.persmat, true, V);
- case CAMERA_PANO_EQUIRECT:
- return camera_equirectangular_from_direction(cam, vV);
- case CAMERA_PANO_EQUISOLID:
- /* ATTR_FALLTHROUGH; */
- case CAMERA_PANO_EQUIDISTANT:
- return camera_fisheye_from_direction(cam, vV);
- case CAMERA_PANO_MIRROR:
- return camera_mirror_ball_from_direction(cam, vV);
- }
+ vec3 vV = transform_direction(cam.viewmat, normalize(P));
+ return camera_uv_from_view(cam, vV);
}
/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
new file mode 100644
index 00000000000..d5fdaae6fc1
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl
@@ -0,0 +1,37 @@
+
+/* -------------------------------------------------------------------- */
+/** \name YCoCg
+ * \{ */
+
+vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color)
+{
+ const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1), /* Y */
+ vec3(2, 0, -2), /* Co */
+ vec3(-1, 2, -1))); /* Cg */
+ return colorspace_tx * rgb_color;
+}
+
+vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color)
+{
+ return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a);
+}
+
+vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color)
+{
+ float Y = ycocg_color.x;
+ float Co = ycocg_color.y;
+ float Cg = ycocg_color.z;
+
+ vec3 rgb_color;
+ rgb_color.r = Y + Co - Cg;
+ rgb_color.g = Y + Cg;
+ rgb_color.b = Y - Co - Cg;
+ return rgb_color * 0.25;
+}
+
+vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color)
+{
+ return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
new file mode 100644
index 00000000000..99a47c541e9
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
@@ -0,0 +1,680 @@
+
+/**
+ * Depth of Field Gather accumulator.
+ * We currently have only 2 which are very similar.
+ * One is for the halfres gather passes and the other one for slight in focus regions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Options.
+ * \{ */
+
+/* Quality options */
+#ifdef DOF_HOLEFILL_PASS
+/* No need for very high density for hole_fill. */
+const int gather_ring_count = 3;
+const int gather_ring_density = 3;
+const int gather_max_density_change = 0;
+const int gather_density_change_ring = 1;
+#else
+const int gather_ring_count = DOF_GATHER_RING_COUNT;
+const int gather_ring_density = 3;
+const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */
+const int gather_density_change_ring = 1;
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Constants.
+ * \{ */
+
+const float unit_ring_radius = 1.0 / float(gather_ring_count);
+const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5);
+const float large_kernel_radius = 0.5 + float(gather_ring_count);
+const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring);
+/* NOTE(fclem) the bias is reducing issues with density change visible transition. */
+const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius;
+const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1);
+const float coc_radius_error = 2.0;
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather common.
+ * \{ */
+
+struct DofGatherData {
+ vec4 color;
+ float weight;
+ float dist; /* TODO remove */
+ /* For scatter occlusion. */
+ float coc;
+ float coc_sqr;
+ /* For ring bucket merging. */
+ float transparency;
+
+ float layer_opacity;
+};
+
+#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+/* Intersection with the center of the kernel. */
+float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier)
+{
+ if (no_smooth_intersection) {
+ return step(0.0, (abs(coc) - distance_from_center));
+ }
+ else {
+ /* (Slide 64). */
+ return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5);
+ }
+}
+
+/* Returns weight of the sample for the outer bucket (containing previous
+ * rings). */
+float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring)
+{
+ /* First ring has nothing to be mixed against. */
+ if (first_ring) {
+ return 0.0;
+ }
+ return saturate(coc - bordering_radius);
+}
+
+void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight)
+{
+ sample_data.color *= weight;
+ sample_data.coc *= weight;
+ sample_data.coc_sqr *= weight;
+ sample_data.weight *= weight;
+}
+
+void dof_gather_accumulate_sample(DofGatherData sample_data,
+ float weight,
+ inout DofGatherData accum_data)
+{
+ accum_data.color += sample_data.color * weight;
+ accum_data.coc += sample_data.coc * weight;
+ accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight);
+ accum_data.weight += weight;
+}
+
+void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2],
+ float bordering_radius,
+ float intersection_multiplier,
+ bool first_ring,
+ const bool do_fast_gather,
+ const bool is_foreground,
+ inout DofGatherData ring_data,
+ inout DofGatherData accum_data)
+{
+ if (do_fast_gather) {
+ for (int i = 0; i < 2; i++) {
+ dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data);
+ accum_data.layer_opacity += 1.0;
+ }
+ return;
+ }
+
+#if 0
+ const float mirroring_threshold = -dof_layer_threshold - dof_layer_offset;
+ /* TODO(fclem) Promote to parameter? dither with Noise? */
+ const float mirroring_min_distance = 15.0;
+ if (pair_data[0].coc < mirroring_threshold &&
+ (pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) {
+ pair_data[1].coc = pair_data[0].coc;
+ }
+ else if (pair_data[1].coc < mirroring_threshold &&
+ (pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) {
+ pair_data[0].coc = pair_data[1].coc;
+ }
+#endif
+
+ for (int i = 0; i < 2; i++) {
+ float sample_weight = dof_sample_weight(pair_data[i].coc);
+ float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground);
+ float inter_weight = dof_intersection_weight(
+ pair_data[i].coc, pair_data[i].dist, intersection_multiplier);
+ float weight = inter_weight * layer_weight * sample_weight;
+
+ /**
+ * If a CoC is larger than bordering radius we accumulate it to the general accumulator.
+ * If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion.
+ **/
+ float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring);
+ dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data);
+ dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data);
+
+ accum_data.layer_opacity += layer_weight;
+
+ if (is_foreground) {
+ ring_data.transparency += 1.0 - inter_weight * layer_weight;
+ }
+ else {
+ float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc;
+ ring_data.transparency += saturate(coc - bordering_radius);
+ }
+ }
+}
+
+void dof_gather_accumulate_sample_ring(DofGatherData ring_data,
+ int sample_count,
+ bool first_ring,
+ const bool do_fast_gather,
+ /* accum_data occludes the ring_data if true. */
+ const bool reversed_occlusion,
+ inout DofGatherData accum_data)
+{
+ if (do_fast_gather) {
+ /* Do nothing as ring_data contains nothing. All samples are already in
+ * accum_data. */
+ return;
+ }
+
+ if (first_ring) {
+ /* Layer opacity is directly accumulated into accum_data data. */
+ accum_data.color = ring_data.color;
+ accum_data.coc = ring_data.coc;
+ accum_data.coc_sqr = ring_data.coc_sqr;
+ accum_data.weight = ring_data.weight;
+
+ accum_data.transparency = ring_data.transparency / float(sample_count);
+ return;
+ }
+
+ if (ring_data.weight == 0.0) {
+ return;
+ }
+
+ float ring_avg_coc = ring_data.coc / ring_data.weight;
+ float accum_avg_coc = accum_data.coc / accum_data.weight;
+
+ /* Smooth test to set opacity to see if the ring average coc occludes the
+ * accumulation. Test is reversed to be multiplied against opacity. */
+ float ring_occlu = saturate(accum_avg_coc - ring_avg_coc);
+ /* The bias here is arbitrary. Seems to avoid weird looking foreground in most
+ * cases. We might need to make it a parameter or find a relative bias. */
+ float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0);
+
+ if (is_resolve) {
+ ring_occlu = accum_occlu = 0.0;
+ }
+
+ if (no_gather_occlusion) {
+ ring_occlu = 0.0;
+ accum_occlu = 0.0;
+ }
+
+ /* (Slide 40) */
+ float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count));
+ float accum_opacity = 1.0 - accum_data.transparency;
+
+ if (reversed_occlusion) {
+ /* Accum_data occludes the ring. */
+ float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu;
+ float one_minus_alpha = 1.0 - alpha;
+
+ accum_data.color += ring_data.color * one_minus_alpha;
+ accum_data.coc += ring_data.coc * one_minus_alpha;
+ accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha;
+ accum_data.weight += ring_data.weight * one_minus_alpha;
+
+ accum_data.transparency *= 1.0 - ring_opacity;
+ }
+ else {
+ /* Ring occludes the accum_data (Same as reference). */
+ float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu);
+ float one_minus_alpha = 1.0 - alpha;
+
+ accum_data.color = accum_data.color * one_minus_alpha + ring_data.color;
+ accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc;
+ accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr;
+ accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight;
+ }
+}
+
+/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for
+ * slightfocus gather. */
+/* This should be replaced by web_sample_count_get() but doing so is breaking other things. */
+int dof_gather_total_sample_count(const int ring_count, const int ring_density)
+{
+ return (ring_count * ring_count - ring_count) * ring_density + 1;
+}
+
+void dof_gather_accumulate_center_sample(DofGatherData center_data,
+ float bordering_radius,
+ int i_radius,
+ const bool do_fast_gather,
+ const bool is_foreground,
+ const bool is_resolve,
+ inout DofGatherData accum_data)
+{
+ float layer_weight = dof_layer_weight(center_data.coc, is_foreground);
+ float sample_weight = dof_sample_weight(center_data.coc);
+ float weight = layer_weight * sample_weight;
+ float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false);
+
+ if (do_fast_gather) {
+ /* Hope for the compiler to optimize the above. */
+ layer_weight = 1.0;
+ sample_weight = 1.0;
+ accum_weight = 1.0;
+ weight = 1.0;
+ }
+
+ center_data.transparency = 1.0 - weight;
+
+ dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data);
+
+ if (!do_fast_gather) {
+ if (is_resolve) {
+ /* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */
+ int total_sample_count = dof_gather_total_sample_count(i_radius + 1,
+ DOF_SLIGHT_FOCUS_DENSITY);
+ float fac = saturate(1.0 - abs(center_data.coc) / float(dof_layer_threshold));
+ accum_data.layer_opacity += float(total_sample_count) * fac * fac;
+ }
+ accum_data.layer_opacity += layer_weight;
+
+ /* Logic of dof_gather_accumulate_sample(). */
+ weight *= (1.0 - accum_weight);
+ center_data.coc_sqr = center_data.coc * (center_data.coc * weight);
+ center_data.color *= weight;
+ center_data.coc *= weight;
+ center_data.weight = weight;
+
+ if (is_foreground && !is_resolve) {
+ /* Reduce issue with closer foreground over distant foreground. */
+ float ring_area = sqr(bordering_radius);
+ dof_gather_ammend_weight(center_data, ring_area);
+ }
+
+ /* Accumulate center as its own ring. */
+ dof_gather_accumulate_sample_ring(
+ center_data, 1, false, do_fast_gather, is_foreground, accum_data);
+ }
+}
+
+int dof_gather_total_sample_count_with_density_change(const int ring_count,
+ const int ring_density,
+ int density_change)
+{
+ int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) -
+ dof_gather_total_sample_count(
+ ring_count - gather_density_change_ring, ring_density);
+
+ return dof_gather_total_sample_count(ring_count, ring_density) +
+ sample_count_per_density_change * density_change;
+}
+
+void dof_gather_accumulate_resolve(int total_sample_count,
+ DofGatherData accum_data,
+ out vec4 out_col,
+ out float out_weight,
+ out vec2 out_occlusion)
+{
+ float weight_inv = safe_rcp(accum_data.weight);
+ out_col = accum_data.color * weight_inv;
+ out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv;
+
+ if (is_foreground) {
+ out_weight = 1.0 - accum_data.transparency;
+ }
+ else if (accum_data.weight > 0.0) {
+ out_weight = accum_data.layer_opacity / float(total_sample_count);
+ }
+ else {
+ out_weight = 0.0;
+ }
+ /* Gathering may not accumulate to 1.0 alpha because of float precision. */
+ if (out_weight > 0.99) {
+ out_weight = 1.0;
+ }
+ else if (out_weight < 0.01) {
+ out_weight = 0.0;
+ }
+ /* Same thing for alpha channel. */
+ if (out_col.a > 0.993) {
+ out_col.a = 1.0;
+ }
+ else if (out_col.a < 0.003) {
+ out_col.a = 0.0;
+ }
+}
+
+float dof_load_gather_coc(sampler2D gather_input_coc_tx, vec2 uv, float lod)
+{
+ float coc = textureLod(gather_input_coc_tx, uv, lod).r;
+ /* We gather at halfres. CoC must be divided by 2 to be compared against radii. */
+ return coc * 0.5;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Common Gather accumulator.
+ * \{ */
+
+/* Radii needs to be halfres CoC sizes. */
+bool dof_do_density_change(float base_radius, float min_intersectable_radius)
+{
+ /* Reduce artifact for very large blur. */
+ min_intersectable_radius *= 0.1;
+
+ bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius);
+ bool larger_than_min_density = (base_radius * radius_downscale_factor >
+ float(gather_ring_count));
+
+ return need_new_density && larger_than_min_density;
+}
+
+void dof_gather_init(float base_radius,
+ vec2 noise,
+ out vec2 center_co,
+ out float lod,
+ out float intersection_multiplier)
+{
+ /* Jitter center half a ring to reduce undersampling. */
+ vec2 jitter_ofs = 0.499 * sample_disk(noise);
+ if (DOF_BOKEH_TEXTURE) {
+ jitter_ofs *= dof_buf.bokeh_anisotropic_scale;
+ }
+ vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+ center_co = frag_coord + jitter_ofs * base_radius * unit_sample_radius;
+
+ /* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving out
+ * of focus shapes. */
+ const float lod_bias = -2.0;
+ lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0);
+
+ if (no_gather_mipmaps) {
+ lod = 0.0;
+ }
+ /* (Slide 64). */
+ intersection_multiplier = pow(0.5, lod);
+}
+
+void dof_gather_accumulator(sampler2D color_tx,
+ sampler2D color_bilinear_tx,
+ sampler2D coc_tx,
+ sampler2D bkh_lut_tx, /* Renamed because of ugly macro. */
+ float base_radius,
+ float min_intersectable_radius,
+ const bool do_fast_gather,
+ const bool do_density_change,
+ out vec4 out_color,
+ out float out_weight,
+ out vec2 out_occlusion)
+{
+ vec2 frag_coord = vec2(gl_GlobalInvocationID.xy);
+ vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U);
+ vec2 noise = no_gather_random ? vec2(0.0, 0.0) :
+ vec2(interlieved_gradient_noise(frag_coord, 0, noise_offset.x),
+ interlieved_gradient_noise(frag_coord, 1, noise_offset.y));
+
+ if (!do_fast_gather) {
+ /* Jitter the radius to reduce noticeable density changes. */
+ base_radius += noise.x * unit_ring_radius * base_radius;
+ }
+ else {
+ /* Jittering the radius more than we need means we are going to feather the bokeh shape half a
+ * ring. So we need to compensate for fast gather that does not check CoC intersection. */
+ base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius;
+ }
+ /* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */
+ noise.x = fract(noise.x * 6.1803398875);
+
+ float lod, isect_mul;
+ vec2 center_co;
+ dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
+
+ bool first_ring = true;
+
+ DofGatherData accum_data = GATHER_DATA_INIT;
+
+ int density_change = 0;
+ for (int ring = gather_ring_count; ring > 0; ring--) {
+ int sample_pair_count = gather_ring_density * ring;
+
+ float step_rot = M_PI / float(sample_pair_count);
+ mat2 step_rot_mat = rot2_from_angle(step_rot);
+
+ float angle_offset = noise.y * step_rot;
+ vec2 offset = vec2(cos(angle_offset), sin(angle_offset));
+
+ float ring_radius = float(ring) * unit_sample_radius * base_radius;
+
+ /* Slide 38. */
+ float bordering_radius = ring_radius +
+ (0.5 + coc_radius_error) * base_radius * unit_sample_radius;
+ DofGatherData ring_data = GATHER_DATA_INIT;
+ for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) {
+ offset = step_rot_mat * offset;
+
+ DofGatherData pair_data[2];
+ for (int i = 0; i < 2; i++) {
+ vec2 offset_co = ((i == 0) ? offset : -offset);
+ if (DOF_BOKEH_TEXTURE) {
+ /* Scaling to 0.25 for speed. Improves texture cache hit. */
+ offset_co = texture(bkh_lut_tx, offset_co * 0.25 + 0.5).rg;
+ offset_co *= (is_foreground) ? -dof_buf.bokeh_anisotropic_scale :
+ dof_buf.bokeh_anisotropic_scale;
+ }
+ vec2 sample_co = center_co + offset_co * ring_radius;
+ vec2 sample_uv = sample_co * dof_buf.gather_uv_fac;
+ if (do_fast_gather) {
+ pair_data[i].color = textureLod(color_bilinear_tx, sample_uv, lod);
+ }
+ else {
+ pair_data[i].color = textureLod(color_tx, sample_uv, lod);
+ }
+ pair_data[i].coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
+ pair_data[i].dist = ring_radius;
+ }
+
+ dof_gather_accumulate_sample_pair(pair_data,
+ bordering_radius,
+ isect_mul,
+ first_ring,
+ do_fast_gather,
+ is_foreground,
+ ring_data,
+ accum_data);
+ }
+
+ if (is_foreground) {
+ /* Reduce issue with closer foreground over distant foreground. */
+ /* TODO(fclem) this seems to not be completely correct as the issue remains. */
+ float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) -
+ sqr(float(ring) - 0.5 + coc_radius_error)) *
+ sqr(base_radius * unit_sample_radius);
+ dof_gather_ammend_weight(ring_data, ring_area);
+ }
+
+ dof_gather_accumulate_sample_ring(
+ ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data);
+
+ first_ring = false;
+
+ if (do_density_change && (ring == change_density_at_ring) &&
+ (density_change < gather_max_density_change)) {
+ if (dof_do_density_change(base_radius, min_intersectable_radius)) {
+ base_radius *= radius_downscale_factor;
+ ring += gather_density_change_ring;
+ /* We need to account for the density change in the weights (slide 62).
+ * For that multiply old kernel data by its area divided by the new kernel area. */
+ const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor);
+ /* Samples are already weighted per ring in foreground pass. */
+ if (!is_foreground) {
+ dof_gather_ammend_weight(accum_data, outer_rings_weight);
+ }
+ /* Re-init kernel position & sampling parameters. */
+ dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
+ density_change++;
+ }
+ }
+ }
+
+ {
+ /* Center sample. */
+ vec2 sample_uv = center_co * dof_buf.gather_uv_fac;
+ DofGatherData center_data;
+ if (do_fast_gather) {
+ center_data.color = textureLod(color_bilinear_tx, sample_uv, lod);
+ }
+ else {
+ center_data.color = textureLod(color_tx, sample_uv, lod);
+ }
+ center_data.coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
+ center_data.dist = 0.0;
+
+ /* Slide 38. */
+ float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius;
+
+ dof_gather_accumulate_center_sample(
+ center_data, bordering_radius, 0, do_fast_gather, is_foreground, false, accum_data);
+ }
+
+ int total_sample_count = dof_gather_total_sample_count_with_density_change(
+ gather_ring_count, gather_ring_density, density_change);
+ dof_gather_accumulate_resolve(
+ total_sample_count, accum_data, out_color, out_weight, out_occlusion);
+
+ if (debug_gather_perf && density_change > 0) {
+ float fac = saturate(float(density_change) / float(10.0));
+ out_color.rgb = avg(out_color.rgb) * neon_gradient(fac);
+ }
+ if (debug_gather_perf && do_fast_gather) {
+ out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
+ }
+ if (debug_scatter_perf) {
+ out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
+ }
+
+ /* Output premultiplied color so we can use bilinear sampler in resolve pass. */
+ out_color *= out_weight;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Slight focus accumulator.
+ *
+ * The full pixel neighborhood is gathered.
+ * \{ */
+
+void dof_slight_focus_gather(sampler2D depth_tx,
+ sampler2D color_tx,
+ sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
+ float radius,
+ out vec4 out_color,
+ out float out_weight,
+ out float out_center_coc)
+{
+ vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+ vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U);
+ vec2 noise = no_gather_random ? vec2(0.0) :
+ vec2(interlieved_gradient_noise(frag_coord, 3, noise_offset.x),
+ interlieved_gradient_noise(frag_coord, 5, noise_offset.y));
+
+ DofGatherData fg_accum = GATHER_DATA_INIT;
+ DofGatherData bg_accum = GATHER_DATA_INIT;
+
+ int i_radius = clamp(int(radius), 0, int(dof_layer_threshold));
+
+ const float sample_count_max = float(DOF_SLIGHT_FOCUS_SAMPLE_MAX);
+ /* Scale by search area. */
+ float sample_count = sample_count_max * saturate(sqr(radius) / sqr(dof_layer_threshold));
+
+ bool first_ring = true;
+
+ for (float s = 0.0; s < sample_count; s++) {
+ vec2 rand2 = fract(hammersley_2d(s, sample_count) + noise);
+ vec2 offset = sample_disk(rand2);
+ float ring_dist = sqrt(rand2.y);
+
+ DofGatherData pair_data[2];
+ for (int i = 0; i < 2; i++) {
+ vec2 sample_offset = ((i == 0) ? offset : -offset);
+ /* OPTI: could precompute the factor. */
+ vec2 sample_uv = (frag_coord + sample_offset) / vec2(textureSize(depth_tx, 0));
+ float depth = textureLod(depth_tx, sample_uv, 0.0).r;
+ pair_data[i].coc = dof_coc_from_depth(dof_buf, sample_uv, depth);
+ pair_data[i].color = safe_color(textureLod(color_tx, sample_uv, 0.0));
+ pair_data[i].dist = ring_dist;
+ if (DOF_BOKEH_TEXTURE) {
+ /* Contains subpixel distance to bokeh shape. */
+ ivec2 lut_texel = ivec2(round(sample_offset)) + dof_max_slight_focus_radius;
+ pair_data[i].dist = texelFetch(bkh_lut_tx, lut_texel, 0).r;
+ }
+ pair_data[i].coc = clamp(pair_data[i].coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+ }
+
+ float bordering_radius = ring_dist + 0.5;
+ const float isect_mul = 1.0;
+ DofGatherData bg_ring = GATHER_DATA_INIT;
+ dof_gather_accumulate_sample_pair(
+ pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum);
+ /* Treat each sample as a ring. */
+ dof_gather_accumulate_sample_ring(bg_ring, 2, first_ring, false, false, bg_accum);
+
+ if (DOF_BOKEH_TEXTURE) {
+ /* Swap distances in order to flip bokeh shape for foreground. */
+ float tmp = pair_data[0].dist;
+ pair_data[0].dist = pair_data[1].dist;
+ pair_data[1].dist = tmp;
+ }
+ DofGatherData fg_ring = GATHER_DATA_INIT;
+ dof_gather_accumulate_sample_pair(
+ pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum);
+ /* Treat each sample as a ring. */
+ dof_gather_accumulate_sample_ring(fg_ring, 2, first_ring, false, true, fg_accum);
+
+ first_ring = false;
+ }
+
+ /* Center sample. */
+ vec2 sample_uv = frag_coord / vec2(textureSize(depth_tx, 0));
+ DofGatherData center_data;
+ center_data.color = safe_color(textureLod(color_tx, sample_uv, 0.0));
+ center_data.coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+ center_data.coc = clamp(center_data.coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+ center_data.dist = 0.0;
+
+ out_center_coc = center_data.coc;
+
+ /* Slide 38. */
+ float bordering_radius = 0.5;
+
+ dof_gather_accumulate_center_sample(
+ center_data, bordering_radius, i_radius, false, true, true, fg_accum);
+ dof_gather_accumulate_center_sample(
+ center_data, bordering_radius, i_radius, false, false, true, bg_accum);
+
+ vec4 bg_col, fg_col;
+ float bg_weight, fg_weight;
+ vec2 unused_occlusion;
+
+ int total_sample_count = int(sample_count) * 2 + 1;
+ dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion);
+ dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion);
+
+ /* Fix weighting issues on perfectly focus to slight focus transitioning areas. */
+ if (abs(center_data.coc) < 0.5) {
+ bg_col = center_data.color;
+ bg_weight = 1.0;
+ }
+
+ /* Alpha Over */
+ float alpha = 1.0 - fg_weight;
+ out_weight = bg_weight * alpha + fg_weight;
+ out_color = bg_col * bg_weight * alpha + fg_col * fg_weight;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
new file mode 100644
index 00000000000..26a597b04e8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
@@ -0,0 +1,55 @@
+
+/**
+ * Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or
+ * the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not
+ * a perfect circle.
+ * We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh
+ * as it is way more complex and expensive to do.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+ vec2 gather_uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) / float(DOF_BOKEH_LUT_SIZE));
+ /* Center uv in range [-1..1]. */
+ gather_uv = gather_uv * 2.0 - 1.0;
+
+ vec2 slight_focus_texel = vec2(gl_GlobalInvocationID.xy) - float(dof_max_slight_focus_radius);
+
+ float radius = length(gather_uv);
+
+ if (dof_buf.bokeh_blades > 0.0) {
+ /* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */
+ float theta = atan(gather_uv.y, gather_uv.x) + M_2PI;
+ float r = length(gather_uv);
+
+ radius /= circle_to_polygon_radius(dof_buf.bokeh_blades, theta - dof_buf.bokeh_rotation);
+
+ float theta_new = circle_to_polygon_angle(dof_buf.bokeh_blades, theta);
+ float r_new = circle_to_polygon_radius(dof_buf.bokeh_blades, theta_new);
+
+ theta_new -= dof_buf.bokeh_rotation;
+
+ gather_uv = r_new * vec2(-cos(theta_new), sin(theta_new));
+
+ {
+ /* Slight focus distance */
+ slight_focus_texel *= dof_buf.bokeh_anisotropic_scale_inv;
+ float theta = atan(slight_focus_texel.y, -slight_focus_texel.x) + M_2PI;
+ slight_focus_texel /= circle_to_polygon_radius(dof_buf.bokeh_blades,
+ theta + dof_buf.bokeh_rotation);
+ }
+ }
+ else {
+ gather_uv *= safe_rcp(length(gather_uv));
+ }
+
+ ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+ /* For gather store the normalized UV. */
+ imageStore(out_gather_lut_img, texel, gather_uv.xyxy);
+ /* For scatter store distance. LUT will be scaled by COC. */
+ imageStore(out_scatter_lut_img, texel, vec4(radius));
+ /* For slight focus gather store pixel perfect distance. */
+ imageStore(out_resolve_lut_img, texel, vec4(length(slight_focus_texel)));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
new file mode 100644
index 00000000000..3d45f285da9
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
@@ -0,0 +1,32 @@
+
+/**
+ * Downsample pass: CoC aware downsample to quarter resolution.
+ *
+ * Pretty much identical to the setup pass but get CoC from buffer.
+ * Also does not weight luma for the bilateral weights.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+ vec2 halfres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
+ /* Center uv around the 4 halfres pixels. */
+ vec2 quad_center = vec2(gl_GlobalInvocationID * 2 + 1) * halfres_texel_size;
+
+ vec4 colors[4];
+ vec4 cocs;
+ for (int i = 0; i < 4; i++) {
+ vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size;
+ colors[i] = textureLod(color_tx, sample_uv, 0.0);
+ cocs[i] = textureLod(coc_tx, sample_uv, 0.0).r;
+ }
+
+ vec4 weights = dof_bilateral_coc_weights(cocs);
+ /* Normalize so that the sum is 1. */
+ weights *= safe_rcp(sum(weights));
+
+ vec4 out_color = weighted_sum_array(colors, weights);
+
+ imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
new file mode 100644
index 00000000000..49c93ca63cd
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
@@ -0,0 +1,163 @@
+
+/**
+ * Gather Filter pass: Filter the gather pass result to reduce noise.
+ *
+ * This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if
+ * cheaper.
+ */
+
+struct FilterSample {
+ vec4 color;
+ float weight;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name Pixel cache.
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float weight_cache[cache_size][cache_size];
+
+void cache_init()
+{
+ /**
+ * Load enough values into LDS to perform the filter.
+ *
+ * ┌──────────────────────────────┐
+ * │ │ < Border texels that needs to be loaded.
+ * │ x x x x x x x x │ ─┐
+ * │ x x x x x x x x │ │
+ * │ x x x x x x x x │ │
+ * │ x x x x x x x x │ │ Thread Group Size 8x8.
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ ─┘
+ * │ L L L L L │ < Border texels that needs to be loaded.
+ * └──────────────────────────────┘
+ * └───────────┘
+ * Load using 5x5 threads.
+ */
+
+ ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
+ if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+ for (int y = 0; y < 2; y++) {
+ for (int x = 0; x < 2; x++) {
+ ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+ ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+ ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1);
+
+ color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
+ weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
+ }
+ }
+ }
+ barrier();
+}
+
+FilterSample cache_sample(int x, int y)
+{
+ return FilterSample(color_cache[y][x], weight_cache[y][x]);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Median filter
+ * From:
+ * Implementing Median Filters in XC4000E FPGAs
+ * JOHN L. SMITH, Univision Technologies Inc., Billerica, MA
+ * http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf
+ * Figure 1
+ * \{ */
+
+FilterSample filter_min(FilterSample a, FilterSample b)
+{
+ return FilterSample(min(a.color, b.color), min(a.weight, b.weight));
+}
+
+FilterSample filter_max(FilterSample a, FilterSample b)
+{
+ return FilterSample(max(a.color, b.color), max(a.weight, b.weight));
+}
+
+FilterSample filter_min(FilterSample a, FilterSample b, FilterSample c)
+{
+ return FilterSample(min(a.color, min(c.color, b.color)), min(a.weight, min(c.weight, b.weight)));
+}
+
+FilterSample filter_max(FilterSample a, FilterSample b, FilterSample c)
+{
+ return FilterSample(max(a.color, max(c.color, b.color)), max(a.weight, max(c.weight, b.weight)));
+}
+
+FilterSample filter_median(FilterSample s1, FilterSample s2, FilterSample s3)
+{
+ /* From diagram, with nodes numbered from top to bottom. */
+ FilterSample l1 = filter_min(s2, s3);
+ FilterSample h1 = filter_max(s2, s3);
+ FilterSample h2 = filter_max(s1, l1);
+ FilterSample l3 = filter_min(h2, h1);
+ return l3;
+}
+
+struct FilterLmhResult {
+ FilterSample low;
+ FilterSample median;
+ FilterSample high;
+};
+
+FilterLmhResult filter_lmh(FilterSample s1, FilterSample s2, FilterSample s3)
+{
+ /* From diagram, with nodes numbered from top to bottom. */
+ FilterSample h1 = filter_max(s2, s3);
+ FilterSample l1 = filter_min(s2, s3);
+
+ FilterSample h2 = filter_max(s1, l1);
+ FilterSample l2 = filter_min(s1, l1);
+
+ FilterSample h3 = filter_max(h2, h1);
+ FilterSample l3 = filter_min(h2, h1);
+
+ FilterLmhResult result;
+ result.low = l2;
+ result.median = l3;
+ result.high = h3;
+
+ return result;
+}
+
+/** \} */
+
+void main()
+{
+ /**
+ * NOTE: We can **NOT** optimize by discarding some tiles as the result is sampled using bilinear
+ * filtering in the resolve pass. Not outputting to a tile means that border texels have
+ * undefined value and tile border will be noticeable in the final image.
+ */
+
+ cache_init();
+
+ ivec2 texel = ivec2(gl_LocalInvocationID.xy);
+
+ FilterLmhResult rows[3];
+ for (int y = 0; y < 3; y++) {
+ rows[y] = filter_lmh(cache_sample(texel.x + 0, texel.y + y),
+ cache_sample(texel.x + 1, texel.y + y),
+ cache_sample(texel.x + 2, texel.y + y));
+ }
+ /* Left nodes. */
+ FilterSample high = filter_max(rows[0].low, rows[1].low, rows[2].low);
+ /* Right nodes. */
+ FilterSample low = filter_min(rows[0].high, rows[1].high, rows[2].high);
+ /* Center nodes. */
+ FilterSample median = filter_median(rows[0].median, rows[1].median, rows[2].median);
+ /* Last bottom nodes. */
+ median = filter_median(low, median, high);
+
+ ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+ imageStore(out_color_img, out_texel, median.color);
+ imageStore(out_weight_img, out_texel, vec4(median.weight));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
new file mode 100644
index 00000000000..cf8dd7a36e6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
@@ -0,0 +1,99 @@
+
+/**
+ * Gather pass: Convolve foreground and background parts in separate passes.
+ *
+ * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene
+ *color. A fast gather path is taken if there is not many CoC variation inside the tile.
+ *
+ * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
+ * rotation to ensure maximum coverage.
+ *
+ * Outputs:
+ * - Color * Weight, Weight, Occlusion 'CoC' Depth (mean and variance)
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+void main()
+{
+ ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
+ CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+ CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+ float base_radius, min_radius, min_intersectable_radius;
+ bool can_early_out;
+ if (is_foreground) {
+ base_radius = -coc_tile.fg_min_coc;
+ min_radius = -coc_tile.fg_max_coc;
+ min_intersectable_radius = -coc_tile.fg_max_intersectable_coc;
+ can_early_out = !prediction.do_foreground;
+ }
+ else {
+ base_radius = coc_tile.bg_max_coc;
+ min_radius = coc_tile.bg_min_coc;
+ min_intersectable_radius = coc_tile.bg_min_intersectable_coc;
+ can_early_out = !prediction.do_background;
+ }
+
+ bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
+
+ /* Gather at half resolution. Divide CoC by 2. */
+ base_radius *= 0.5;
+ min_intersectable_radius *= 0.5;
+
+ bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
+
+ vec4 out_color;
+ float out_weight;
+ vec2 out_occlusion;
+
+ if (can_early_out) {
+ out_color = vec4(0.0);
+ out_weight = 0.0;
+ out_occlusion = vec2(0.0, 0.0);
+ }
+ else if (do_fast_gather) {
+ dof_gather_accumulator(color_tx,
+ color_bilinear_tx,
+ coc_tx,
+ bokeh_lut_tx,
+ base_radius,
+ min_intersectable_radius,
+ true,
+ false,
+ out_color,
+ out_weight,
+ out_occlusion);
+ }
+ else if (do_density_change) {
+ dof_gather_accumulator(color_tx,
+ color_bilinear_tx,
+ coc_tx,
+ bokeh_lut_tx,
+ base_radius,
+ min_intersectable_radius,
+ false,
+ true,
+ out_color,
+ out_weight,
+ out_occlusion);
+ }
+ else {
+ dof_gather_accumulator(color_tx,
+ color_bilinear_tx,
+ coc_tx,
+ bokeh_lut_tx,
+ base_radius,
+ min_intersectable_radius,
+ false,
+ false,
+ out_color,
+ out_weight,
+ out_occlusion);
+ }
+
+ ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+ imageStore(out_color_img, out_texel, out_color);
+ imageStore(out_weight_img, out_texel, vec4(out_weight));
+ imageStore(out_occlusion_img, out_texel, out_occlusion.xyxy);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
new file mode 100644
index 00000000000..5cdabbc2d4b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
@@ -0,0 +1,70 @@
+
+/**
+ * Holefill pass: Gather background parts where foreground is present.
+ *
+ * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene
+ *color. A fast gather path is taken if there is not many CoC variation inside the tile.
+ *
+ * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
+ * rotation to ensure maximum coverage.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+void main()
+{
+ ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
+ CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+ CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+ float base_radius = -coc_tile.fg_min_coc;
+ float min_radius = -coc_tile.fg_max_coc;
+ float min_intersectable_radius = dof_tile_large_coc;
+ bool can_early_out = !prediction.do_hole_fill;
+
+ bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
+
+ /* Gather at half resolution. Divide CoC by 2. */
+ base_radius *= 0.5;
+ min_intersectable_radius *= 0.5;
+
+ bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
+
+ vec4 out_color = vec4(0.0);
+ float out_weight = 0.0;
+ vec2 unused_occlusion = vec2(0.0, 0.0);
+
+ if (can_early_out) {
+ /* Early out. */
+ }
+ else if (do_fast_gather) {
+ dof_gather_accumulator(color_tx,
+ color_bilinear_tx,
+ coc_tx,
+ coc_tx,
+ base_radius,
+ min_intersectable_radius,
+ true,
+ false,
+ out_color,
+ out_weight,
+ unused_occlusion);
+ }
+ else {
+ dof_gather_accumulator(color_tx,
+ color_bilinear_tx,
+ coc_tx,
+ coc_tx,
+ base_radius,
+ min_intersectable_radius,
+ false,
+ false,
+ out_color,
+ out_weight,
+ unused_occlusion);
+ }
+
+ ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+ imageStore(out_color_img, out_texel, out_color);
+ imageStore(out_weight_img, out_texel, vec4(out_weight));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
new file mode 100644
index 00000000000..f89da641446
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
@@ -0,0 +1,327 @@
+
+/**
+ * Depth of Field utils.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Constants.
+ * \{ */
+
+#ifndef DOF_SLIGHT_FOCUS_DENSITY
+# define DOF_SLIGHT_FOCUS_DENSITY 2
+#endif
+
+#ifdef DOF_RESOLVE_PASS
+const bool is_resolve = true;
+#else
+const bool is_resolve = false;
+#endif
+#ifdef DOF_FOREGROUND_PASS
+const bool is_foreground = DOF_FOREGROUND_PASS;
+#else
+const bool is_foreground = false;
+#endif
+/* Debug options */
+const bool debug_gather_perf = false;
+const bool debug_scatter_perf = false;
+const bool debug_resolve_perf = false;
+
+const bool no_smooth_intersection = false;
+const bool no_gather_occlusion = false;
+const bool no_gather_mipmaps = false;
+const bool no_gather_random = false;
+const bool no_gather_filtering = false;
+const bool no_scatter_occlusion = false;
+const bool no_scatter_pass = false;
+const bool no_foreground_pass = false;
+const bool no_background_pass = false;
+const bool no_slight_focus_pass = false;
+const bool no_focus_pass = false;
+const bool no_hole_fill_pass = false;
+
+/* Distribute weights between near/slightfocus/far fields (slide 117). */
+const float dof_layer_threshold = 4.0;
+/* Make sure it overlaps. */
+const float dof_layer_offset_fg = 0.5 + 1.0;
+/* Extra offset for convolution layers to avoid light leaking from background. */
+const float dof_layer_offset = 0.5 + 0.5;
+
+const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS;
+
+const vec2 quad_offsets[4] = vec2[4](
+ vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Weighting and downsampling utils.
+ * \{ */
+
+float dof_hdr_color_weight(vec4 color)
+{
+ /* Very fast "luma" weighting. */
+ float luma = (color.g * 2.0) + (color.r + color.b);
+ /* TODO(fclem) Pass correct exposure. */
+ const float exposure = 1.0;
+ return 1.0 / (luma * exposure + 4.0);
+}
+
+float dof_coc_select(vec4 cocs)
+{
+ /* Select biggest coc. */
+ float selected_coc = cocs.x;
+ if (abs(cocs.y) > abs(selected_coc)) {
+ selected_coc = cocs.y;
+ }
+ if (abs(cocs.z) > abs(selected_coc)) {
+ selected_coc = cocs.z;
+ }
+ if (abs(cocs.w) > abs(selected_coc)) {
+ selected_coc = cocs.w;
+ }
+ return selected_coc;
+}
+
+/* NOTE: Do not forget to normalize weights afterwards. */
+vec4 dof_bilateral_coc_weights(vec4 cocs)
+{
+ float chosen_coc = dof_coc_select(cocs);
+
+ const float scale = 4.0; /* TODO(fclem) revisit. */
+ /* NOTE: The difference between the cocs should be inside a abs() function,
+ * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */
+ return saturate(1.0 - (chosen_coc - cocs) * scale);
+}
+
+/* NOTE: Do not forget to normalize weights afterwards. */
+vec4 dof_bilateral_color_weights(vec4 colors[4])
+{
+ vec4 weights;
+ for (int i = 0; i < 4; i++) {
+ weights[i] = dof_hdr_color_weight(colors[i]);
+ }
+ return weights;
+}
+
+/* Returns signed Circle of confusion radius (in pixel) based on depth buffer value [0..1]. */
+float dof_coc_from_depth(DepthOfFieldData dof_data, vec2 uv, float depth)
+{
+ if (is_panoramic(dof_data.camera_type)) {
+ /* Use radial depth. */
+ depth = -length(get_view_space_from_depth(uv, depth));
+ }
+ else {
+ depth = get_view_z_from_depth(depth);
+ }
+ return coc_radius_from_camera_depth(dof_data, depth);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather & Scatter Weighting
+ * \{ */
+
+float dof_layer_weight(float coc, const bool is_foreground)
+{
+ /* NOTE: These are fullres pixel CoC value. */
+ if (is_resolve) {
+ return saturate(-abs(coc) + dof_layer_threshold + dof_layer_offset) *
+ float(is_foreground ? (coc <= 0.5) : (coc > -0.5));
+ }
+ else {
+ coc *= 2.0; /* Account for half pixel gather. */
+ float threshold = dof_layer_threshold -
+ ((is_foreground) ? dof_layer_offset_fg : dof_layer_offset);
+ return saturate(((is_foreground) ? -coc : coc) - threshold);
+ }
+}
+vec4 dof_layer_weight(vec4 coc)
+{
+ /* NOTE: Used for scatter pass which already flipped the sign correctly. */
+ coc *= 2.0; /* Account for half pixel gather. */
+ return saturate(coc - dof_layer_threshold + dof_layer_offset);
+}
+
+/* NOTE: This is halfres CoC radius. */
+float dof_sample_weight(float coc)
+{
+#if 1 /* Optimized */
+ return min(1.0, 1.0 / sqr(coc));
+#else
+ /* Full intensity if CoC radius is below the pixel footprint. */
+ const float min_coc = 1.0;
+ coc = max(min_coc, abs(coc));
+ return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
+#endif
+}
+vec4 dof_sample_weight(vec4 coc)
+{
+#if 1 /* Optimized */
+ return min(vec4(1.0), 1.0 / sqr(coc));
+#else
+ /* Full intensity if CoC radius is below the pixel footprint. */
+ const float min_coc = 1.0;
+ coc = max(vec4(min_coc), abs(coc));
+ return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Circle of Confusion tiles
+ * \{ */
+
+struct CocTile {
+ float fg_min_coc;
+ float fg_max_coc;
+ float fg_max_intersectable_coc;
+ float bg_min_coc;
+ float bg_max_coc;
+ float bg_min_intersectable_coc;
+};
+
+/* WATCH: Might have to change depending on the texture format. */
+const float dof_tile_large_coc = 1024.0;
+
+/* Init a CoC tile for reduction algorithms. */
+CocTile dof_coc_tile_init()
+{
+ CocTile tile;
+ tile.fg_min_coc = 0.0;
+ tile.fg_max_coc = -dof_tile_large_coc;
+ tile.fg_max_intersectable_coc = dof_tile_large_coc;
+ tile.bg_min_coc = dof_tile_large_coc;
+ tile.bg_max_coc = 0.0;
+ tile.bg_min_intersectable_coc = dof_tile_large_coc;
+ return tile;
+}
+
+CocTile dof_coc_tile_unpack(vec3 fg, vec3 bg)
+{
+ CocTile tile;
+ tile.fg_min_coc = -fg.x;
+ tile.fg_max_coc = -fg.y;
+ tile.fg_max_intersectable_coc = -fg.z;
+ tile.bg_min_coc = bg.x;
+ tile.bg_max_coc = bg.y;
+ tile.bg_min_intersectable_coc = bg.z;
+ return tile;
+}
+
+/* WORKAROUND(fclem): GLSL compilers differs in what qualifiers are requires to pass images as
+ * parameters. Workaround by using defines. */
+#define dof_coc_tile_load(tiles_fg_img_, tiles_bg_img_, texel_) \
+ dof_coc_tile_unpack( \
+ imageLoad(tiles_fg_img_, clamp(texel_, ivec2(0), imageSize(tiles_fg_img_) - 1)).xyz, \
+ imageLoad(tiles_bg_img_, clamp(texel_, ivec2(0), imageSize(tiles_bg_img_) - 1)).xyz)
+
+void dof_coc_tile_pack(CocTile tile, out vec3 out_fg, out vec3 out_bg)
+{
+ out_fg.x = -tile.fg_min_coc;
+ out_fg.y = -tile.fg_max_coc;
+ out_fg.z = -tile.fg_max_intersectable_coc;
+ out_bg.x = tile.bg_min_coc;
+ out_bg.y = tile.bg_max_coc;
+ out_bg.z = tile.bg_min_intersectable_coc;
+}
+
+#define dof_coc_tile_store(tiles_fg_img_, tiles_bg_img_, texel_out_, tile_data_) \
+ if (true) { \
+ vec3 out_fg; \
+ vec3 out_bg; \
+ dof_coc_tile_pack(tile_data_, out_fg, out_bg); \
+ imageStore(tiles_fg_img_, texel_out_, out_fg.xyzz); \
+ imageStore(tiles_bg_img_, texel_out_, out_bg.xyzz); \
+ }
+
+bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground)
+{
+ float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc,
+ is_foreground);
+ if (min_weight < 1.0) {
+ return false;
+ }
+ /* FIXME(fclem): This is a workaround to fast gather triggering too early. Since we use custom
+ * opacity mask, the opacity is not given to be 100% even for after normal threshold. */
+ if (is_foreground && min_absolute_coc < dof_layer_threshold) {
+ return false;
+ }
+ return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc);
+}
+
+struct CocTilePrediction {
+ bool do_foreground;
+ bool do_slight_focus;
+ bool do_focus;
+ bool do_background;
+ bool do_hole_fill;
+};
+
+/**
+ * Using the tile CoC infos, predict which convolutions are required and the ones that can be
+ * skipped.
+ */
+CocTilePrediction dof_coc_tile_prediction_get(CocTile tile)
+{
+ /* Based on tile value, predict what pass we need to load. */
+ CocTilePrediction predict;
+
+ predict.do_foreground = (-tile.fg_min_coc > dof_layer_threshold - dof_layer_offset_fg);
+ bool fg_fully_opaque = predict.do_foreground &&
+ dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true);
+ predict.do_background = !fg_fully_opaque &&
+ (tile.bg_max_coc > dof_layer_threshold - dof_layer_offset);
+ bool bg_fully_opaque = predict.do_background &&
+ dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false);
+ predict.do_hole_fill = !fg_fully_opaque && -tile.fg_min_coc > 0.0;
+ predict.do_focus = !fg_fully_opaque;
+ predict.do_slight_focus = !fg_fully_opaque;
+
+#if 0 /* Debug */
+ predict.do_foreground = predict.do_background = predict.do_hole_fill = true;
+#endif
+ return predict;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gathering
+ * \{ */
+
+/**
+ * Generate samples in a square pattern with the ring radius. X is the center tile.
+ *
+ * Dist1 Dist2
+ * 6 5 4 3 2
+ * 3 2 1 7 1
+ * . X 0 . X 0
+ * . . . . .
+ * . . . . .
+ *
+ * Samples are expected to be mirrored to complete the pattern.
+ **/
+ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id)
+{
+ ivec2 offset;
+ if (sample_id < ring_distance) {
+ offset.x = ring_distance;
+ offset.y = sample_id;
+ }
+ else if (sample_id < ring_distance * 3) {
+ offset.x = ring_distance - sample_id + ring_distance;
+ offset.y = ring_distance;
+ }
+ else {
+ offset.x = -ring_distance;
+ offset.y = ring_distance - sample_id + 3 * ring_distance;
+ }
+ return offset;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
new file mode 100644
index 00000000000..a6426cd06e4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -0,0 +1,247 @@
+
+/**
+ * Reduce copy pass: filter fireflies and split color between scatter and gather input.
+ *
+ * NOTE: The texture can end up being too big because of the mipmap padding. We correct for
+ * that during the convolution phase.
+ *
+ * Inputs:
+ * - Output of setup pass (halfres) and reduce downsample pass (quarter res).
+ * Outputs:
+ * - Halfres padded to avoid mipmap misalignment (so possibly not matching input size).
+ * - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain).
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */
+float dof_scatter_neighborhood_rejection(vec3 color)
+{
+ color = min(vec3(dof_buf.scatter_neighbor_max_color), color);
+
+ float validity = 0.0;
+
+ /* Centered in the middle of 4 quarter res texel. */
+ vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy);
+ vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size;
+
+ vec3 max_diff = vec3(0.0);
+ for (int i = 0; i < 4; i++) {
+ vec2 sample_uv = uv + quad_offsets[i] * texel_size;
+ vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb;
+
+ ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref);
+ float diff = max_v3(max(vec3(0.0), abs(ref - color)));
+
+ const float rejection_threshold = 0.7;
+ diff = saturate(diff / rejection_threshold - 1.0);
+ validity = max(validity, diff);
+ }
+
+ return validity;
+}
+
+/* This avoids Bokeh sprite popping in and out at the screen border and
+ * drawing Bokeh sprites larger than the screen. */
+float dof_scatter_screen_border_rejection(float coc, ivec2 texel)
+{
+ vec2 screen_size = vec2(imageSize(inout_color_lod0_img));
+ vec2 uv = (vec2(texel) + 0.5) / screen_size;
+ vec2 screen_pos = uv * screen_size;
+ float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos));
+ /* Fullres to halfres CoC. */
+ coc *= 0.5;
+ /* Allow 10px transition. */
+ const float rejection_hardeness = 1.0 / 10.0;
+ return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0);
+}
+
+float dof_scatter_luminosity_rejection(vec3 color)
+{
+ const float rejection_hardness = 1.0;
+ return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness);
+}
+
+float dof_scatter_coc_radius_rejection(float coc)
+{
+ const float rejection_hardness = 0.3;
+ return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness);
+}
+
+float fast_luma(vec3 color)
+{
+ return (2.0 * color.g) + color.r + color.b;
+}
+
+const uint cache_size = gl_WorkGroupSize.x;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+shared float do_scatter[cache_size][cache_size];
+
+void main()
+{
+ ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1);
+ uvec2 texel_local = gl_LocalInvocationID.xy;
+ /* Increase readablility. */
+#define LOCAL_INDEX texel_local.y][texel_local.x
+#define LOCAL_OFFSET(x_, y_) texel_local.y + (y_)][texel_local.x + (x_)
+
+ /* Load level 0 into cache. */
+ color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel);
+ coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r;
+
+ /* Only scatter if luminous enough. */
+ do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb);
+ /* Only scatter if CoC is big enough. */
+ do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]);
+ /* Only scatter if CoC is not too big to avoid performance issues. */
+ do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel);
+ /* Only scatter if neighborhood is different enough. */
+ do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb);
+ /* For debugging. */
+ if (no_scatter_pass) {
+ do_scatter[LOCAL_INDEX] = 0.0;
+ }
+
+ barrier();
+
+ /* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */
+ if (all(equal(texel_local & 1u, uvec2(0)))) {
+ vec4 do_scatter4;
+ /* Follows quad_offsets order. */
+ do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)];
+ do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)];
+ do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)];
+ do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)];
+ if (any(greaterThan(do_scatter4, vec4(0.0)))) {
+ /* Apply energy conservation to anamorphic scattered bokeh. */
+ do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv);
+
+ /* Circle of Confusion. */
+ vec4 coc4;
+ coc4.x = coc_cache[LOCAL_OFFSET(0, 1)];
+ coc4.y = coc_cache[LOCAL_OFFSET(1, 1)];
+ coc4.z = coc_cache[LOCAL_OFFSET(1, 0)];
+ coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
+ /* We are scattering at half resolution, so divide CoC by 2. */
+ coc4 *= 0.5;
+ /* Sprite center position. Center sprite around the 4 texture taps. */
+ vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1;
+ /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin
+ * and because we smooth the bokeh shape a bit in the pixel shader. */
+ vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5;
+ /* Issue a sprite for each field if any CoC matches. */
+ if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
+ /* Same value for all threads. Not an issue if we don't sync access to it. */
+ scatter_fg_indirect_buf.vertex_len = 4u;
+ /* Issue 1 strip instance per sprite. */
+ uint rect_id = atomicAdd(scatter_fg_indirect_buf.instance_len, 1u);
+ if (rect_id < dof_buf.scatter_max_rect) {
+
+ vec4 coc4_fg = max(vec4(0.0), -coc4);
+ vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4;
+ /* Filter NaNs. */
+ fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0)));
+
+ ScatterRect rect_fg;
+ rect_fg.offset = offset;
+ /* Negate extent to flip the sprite. Mimics optical phenomenon. */
+ rect_fg.half_extent = -half_extent;
+ /* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and
+ * (0,0) values so that quad_offsets is in the right order in the vertex shader. */
+
+ /* Circle of Confusion absolute radius in halfres pixels. */
+ rect_fg.color_and_coc[0].a = coc4_fg[0];
+ rect_fg.color_and_coc[1].a = coc4_fg[3];
+ rect_fg.color_and_coc[2].a = coc4_fg[2];
+ rect_fg.color_and_coc[3].a = coc4_fg[1];
+ /* Apply weights. */
+ rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0];
+ rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3];
+ rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2];
+ rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1];
+
+ scatter_fg_list_buf[rect_id] = rect_fg;
+ }
+ }
+ if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
+ /* Same value for all threads. Not an issue if we don't sync access to it. */
+ scatter_bg_indirect_buf.vertex_len = 4u;
+ /* Issue 1 strip instance per sprite. */
+ uint rect_id = atomicAdd(scatter_bg_indirect_buf.instance_len, 1u);
+ if (rect_id < dof_buf.scatter_max_rect) {
+ vec4 coc4_bg = max(vec4(0.0), coc4);
+ vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;
+ /* Filter NaNs. */
+ bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0)));
+
+ ScatterRect rect_bg;
+ rect_bg.offset = offset;
+ rect_bg.half_extent = half_extent;
+
+ /* Circle of Confusion absolute radius in halfres pixels. */
+ rect_bg.color_and_coc[0].a = coc4_bg[0];
+ rect_bg.color_and_coc[1].a = coc4_bg[1];
+ rect_bg.color_and_coc[2].a = coc4_bg[2];
+ rect_bg.color_and_coc[3].a = coc4_bg[3];
+ /* Apply weights. */
+ rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0];
+ rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1];
+ rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2];
+ rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3];
+
+ scatter_bg_list_buf[rect_id] = rect_bg;
+ }
+ }
+ }
+ }
+
+ /* Remove scatter color from gather. */
+ color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX];
+ imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
+
+ /* Recursive downsample. */
+ for (uint i = 1u; i < DOF_MIP_COUNT; i++) {
+ barrier();
+ uint mask = ~(~0u << i);
+ if (all(equal(gl_LocalInvocationID.xy & mask, uvec2(0)))) {
+ uint ofs = 1u << (i - 1u);
+
+ /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
+ vec4 coc4;
+ coc4.x = coc_cache[LOCAL_OFFSET(0, ofs)];
+ coc4.y = coc_cache[LOCAL_OFFSET(ofs, ofs)];
+ coc4.z = coc_cache[LOCAL_OFFSET(ofs, 0)];
+ coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
+
+ vec4 colors[4];
+ colors[0] = color_cache[LOCAL_OFFSET(0, ofs)];
+ colors[1] = color_cache[LOCAL_OFFSET(ofs, ofs)];
+ colors[2] = color_cache[LOCAL_OFFSET(ofs, 0)];
+ colors[3] = color_cache[LOCAL_OFFSET(0, 0)];
+
+ vec4 weights = dof_bilateral_coc_weights(coc4);
+ weights *= dof_bilateral_color_weights(colors);
+ /* Normalize so that the sum is 1. */
+ weights *= safe_rcp(sum(weights));
+
+ color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights);
+ coc_cache[LOCAL_INDEX] = dot(coc4, weights);
+
+ ivec2 texel = ivec2(gl_GlobalInvocationID.xy >> i);
+
+ if (i == 1) {
+ imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]);
+ imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+ }
+ else if (i == 2) {
+ imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]);
+ imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+ }
+ else /* if (i == 3) */ {
+ imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]);
+ imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX]));
+ }
+ }
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
new file mode 100644
index 00000000000..5123eb0c238
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
@@ -0,0 +1,178 @@
+
+/**
+ * Recombine Pass: Load separate convolution layer and composite with self
+ * slight defocus convolution and in-focus fields.
+ *
+ * The halfres gather methods are fast but lack precision for small CoC areas.
+ * To fix this we do a bruteforce gather to have a smooth transition between
+ * in-focus and defocus regions.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
+
+shared uint shared_max_slight_focus_abs_coc;
+
+/**
+ * Returns The max CoC in the Slight Focus range inside this compute tile.
+ */
+float dof_slight_focus_coc_tile_get(vec2 frag_coord)
+{
+ if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
+ shared_max_slight_focus_abs_coc = floatBitsToUint(0.0);
+ }
+ barrier();
+
+ float local_abs_max = 0.0;
+ /* Sample in a cross (X) pattern. This covers all pixels over the whole tile, as long as
+ * dof_max_slight_focus_radius is less than the group size. */
+ for (int i = 0; i < 4; i++) {
+ vec2 sample_uv = (frag_coord + quad_offsets[i] * 2.0 * dof_max_slight_focus_radius) /
+ vec2(textureSize(color_tx, 0));
+ float coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+ coc = clamp(coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+ if (abs(coc) < dof_max_slight_focus_radius) {
+ local_abs_max = max(local_abs_max, abs(coc));
+ }
+ }
+ /* Use atomic reduce operation. */
+ atomicMax(shared_max_slight_focus_abs_coc, floatBitsToUint(local_abs_max));
+ /* "Broadcast" result across all threads. */
+ barrier();
+
+ return uintBitsToFloat(shared_max_slight_focus_abs_coc);
+}
+
+vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float weight)
+{
+ /* Stabilize color by clamping with the stable half res neighborhood. */
+ vec3 neighbor_min, neighbor_max;
+ const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
+ for (int i = 0; i < 4; i++) {
+ /**
+ * Visit the 4 half-res texels around (and containing) the fullres texel.
+ * Here a diagram of a fullscreen texel (f) in the bottom left corner of a half res texel.
+ * We sample the stable half-resolution texture at the 4 location denoted by (h).
+ * ┌───────┬───────┐
+ * │ h │ h │
+ * │ │ │
+ * │ │ f │
+ * ├───────┼───────┤
+ * │ h │ h │
+ * │ │ │
+ * │ │ │
+ * └───────┴───────┘
+ */
+ vec2 uv_sample = ((frag_coord + corners[i]) * 0.5) / vec2(textureSize(stable_color_tx, 0));
+ /* Reminder: The content of this buffer is YCoCg + CoC. */
+ vec3 ycocg_sample = textureLod(stable_color_tx, uv_sample, 0.0).rgb;
+ neighbor_min = (i == 0) ? ycocg_sample : min(neighbor_min, ycocg_sample);
+ neighbor_max = (i == 0) ? ycocg_sample : max(neighbor_max, ycocg_sample);
+ }
+ /* Pad the bounds in the near in focus region to get back a bit of detail. */
+ float padding = 0.125 * saturate(1.0 - sqr(center_coc) / sqr(8.0));
+ neighbor_max += abs(neighbor_min) * padding;
+ neighbor_min -= abs(neighbor_min) * padding;
+ /* Progressively apply the clamp to avoid harsh transition. Also mask by weight. */
+ float fac = saturate(sqr(center_coc) * 4.0) * weight;
+ /* Clamp in YCoCg space to avoid too much color drift. */
+ color = colorspace_YCoCg_from_scene_linear(color);
+ color = mix(color, clamp(color, neighbor_min, neighbor_max), fac);
+ color = colorspace_scene_linear_from_YCoCg(color);
+ return color;
+}
+
+void main()
+{
+ vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
+ ivec2 tile_co = ivec2(frag_coord / float(DOF_TILES_SIZE * 2));
+
+ CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
+ CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
+
+ vec2 uv = frag_coord / vec2(textureSize(color_tx, 0));
+ vec2 uv_halfres = (frag_coord * 0.5) / vec2(textureSize(color_bg_tx, 0));
+
+ float slight_focus_max_coc = 0.0;
+ if (prediction.do_slight_focus) {
+ slight_focus_max_coc = dof_slight_focus_coc_tile_get(frag_coord);
+ prediction.do_slight_focus = slight_focus_max_coc >= 0.5;
+ if (prediction.do_slight_focus) {
+ prediction.do_focus = false;
+ }
+ }
+
+ if (prediction.do_focus) {
+ float center_coc = (dof_coc_from_depth(dof_buf, uv, textureLod(depth_tx, uv, 0.0).r));
+ prediction.do_focus = abs(center_coc) <= 0.5;
+ }
+
+ vec4 out_color = vec4(0.0);
+ float weight = 0.0;
+
+ vec4 layer_color;
+ float layer_weight;
+
+ if (!no_hole_fill_pass && prediction.do_hole_fill) {
+ layer_color = textureLod(color_hole_fill_tx, uv_halfres, 0.0);
+ layer_weight = textureLod(weight_hole_fill_tx, uv_halfres, 0.0).r;
+ out_color = layer_color * safe_rcp(layer_weight);
+ weight = float(layer_weight > 0.0);
+ }
+
+ if (!no_background_pass && prediction.do_background) {
+ layer_color = textureLod(color_bg_tx, uv_halfres, 0.0);
+ layer_weight = textureLod(weight_bg_tx, uv_halfres, 0.0).r;
+ /* Always prefer background to hole_fill pass. */
+ layer_color *= safe_rcp(layer_weight);
+ layer_weight = float(layer_weight > 0.0);
+ /* Composite background. */
+ out_color = out_color * (1.0 - layer_weight) + layer_color;
+ weight = weight * (1.0 - layer_weight) + layer_weight;
+ /* Fill holes with the composited background. */
+ out_color *= safe_rcp(weight);
+ weight = float(weight > 0.0);
+ }
+
+ if (!no_slight_focus_pass && prediction.do_slight_focus) {
+ float center_coc;
+ dof_slight_focus_gather(depth_tx,
+ color_tx,
+ bokeh_lut_tx,
+ slight_focus_max_coc,
+ layer_color,
+ layer_weight,
+ center_coc);
+
+ /* Composite slight defocus. */
+ out_color = out_color * (1.0 - layer_weight) + layer_color;
+ weight = weight * (1.0 - layer_weight) + layer_weight;
+
+ out_color.rgb = dof_neighborhood_clamp(frag_coord, out_color.rgb, center_coc, layer_weight);
+ }
+
+ if (!no_focus_pass && prediction.do_focus) {
+ layer_color = safe_color(textureLod(color_tx, uv, 0.0));
+ layer_weight = 1.0;
+ /* Composite in focus. */
+ out_color = out_color * (1.0 - layer_weight) + layer_color;
+ weight = weight * (1.0 - layer_weight) + layer_weight;
+ }
+
+ if (!no_foreground_pass && prediction.do_foreground) {
+ layer_color = textureLod(color_fg_tx, uv_halfres, 0.0);
+ layer_weight = textureLod(weight_fg_tx, uv_halfres, 0.0).r;
+ /* Composite foreground. */
+ out_color = out_color * (1.0 - layer_weight) + layer_color;
+ }
+
+ /* Fix float precision issue in alpha compositing. */
+ if (out_color.a > 0.99) {
+ out_color.a = 1.0;
+ }
+
+ if (debug_resolve_perf && prediction.do_slight_focus) {
+ out_color.rgb *= vec3(1.0, 0.1, 0.1);
+ }
+
+ imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
new file mode 100644
index 00000000000..cfb7fd2568b
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
@@ -0,0 +1,62 @@
+
+/**
+ * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
+ *
+ * We only scatter one quad per sprite and one sprite per 4 pixels to reduce vertex shader
+ * invocations and overdraw.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0))
+
+void main()
+{
+ vec4 coc4 = vec4(interp.color_and_coc1.w,
+ interp.color_and_coc2.w,
+ interp.color_and_coc3.w,
+ interp.color_and_coc4.w);
+ vec4 shapes;
+ if (use_bokeh_lut) {
+ shapes = vec4(texture(bokeh_lut_tx, interp.rect_uv1).r,
+ texture(bokeh_lut_tx, interp.rect_uv2).r,
+ texture(bokeh_lut_tx, interp.rect_uv3).r,
+ texture(bokeh_lut_tx, interp.rect_uv4).r);
+ }
+ else {
+ shapes = vec4(length(interp.rect_uv1),
+ length(interp.rect_uv2),
+ length(interp.rect_uv3),
+ length(interp.rect_uv4));
+ }
+ shapes *= interp.distance_scale;
+ /* Becomes signed distance field in pixel units. */
+ shapes -= coc4;
+ /* Smooth the edges a bit to fade out the undersampling artifacts. */
+ shapes = saturate(1.0 - linearstep(-0.8, 0.8, shapes));
+ /* Outside of bokeh shape. Try to avoid overloading ROPs. */
+ if (max_v4(shapes) == 0.0) {
+ discard;
+ }
+
+ if (!no_scatter_occlusion) {
+ /* Works because target is the same size as occlusion_tx. */
+ vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusion_tx, 0).xy);
+ vec2 occlusion_data = texture(occlusion_tx, uv).rg;
+ /* Fix tilling artifacts. (Slide 90) */
+ const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR;
+ /* Occlude the sprite with geometry from the same field using a chebychev test (slide 85). */
+ float mean = occlusion_data.x;
+ float variance = occlusion_data.y;
+ shapes *= variance * safe_rcp(variance + sqr(max(coc4 * correction_fac - mean, 0.0)));
+ }
+
+ out_color = (interp.color_and_coc1 * shapes[0] + interp.color_and_coc2 * shapes[1] +
+ interp.color_and_coc3 * shapes[2] + interp.color_and_coc4 * shapes[3]);
+ /* Do not accumulate alpha. This has already been accumulated by the gather pass. */
+ out_color.a = 0.0;
+
+ if (debug_scatter_perf) {
+ out_color.rgb = avg(out_color.rgb) * vec3(1.0, 0.0, 0.0);
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
new file mode 100644
index 00000000000..d870496a06c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
@@ -0,0 +1,45 @@
+
+/**
+ * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
+ *
+ * We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader
+ * invocations and overdraw.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+ ScatterRect rect = scatter_list_buf[gl_InstanceID];
+
+ interp.color_and_coc1 = rect.color_and_coc[0];
+ interp.color_and_coc2 = rect.color_and_coc[1];
+ interp.color_and_coc3 = rect.color_and_coc[2];
+ interp.color_and_coc4 = rect.color_and_coc[3];
+
+ vec2 uv = vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0 - 1.0;
+ uv = uv * rect.half_extent;
+
+ gl_Position = vec4(uv + rect.offset, 0.0, 1.0);
+ /* NDC range [-1..1]. */
+ gl_Position.xy = (gl_Position.xy / vec2(textureSize(occlusion_tx, 0).xy)) * 2.0 - 1.0;
+
+ if (use_bokeh_lut) {
+ /* Bias scale to avoid sampling at the texture's border. */
+ interp.distance_scale = (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1));
+ vec2 uv_div = 1.0 / (interp.distance_scale * abs(rect.half_extent));
+ interp.rect_uv1 = ((uv + quad_offsets[0]) * uv_div) * 0.5 + 0.5;
+ interp.rect_uv2 = ((uv + quad_offsets[1]) * uv_div) * 0.5 + 0.5;
+ interp.rect_uv3 = ((uv + quad_offsets[2]) * uv_div) * 0.5 + 0.5;
+ interp.rect_uv4 = ((uv + quad_offsets[3]) * uv_div) * 0.5 + 0.5;
+ /* Only for sampling. */
+ interp.distance_scale *= max_v2(abs(rect.half_extent));
+ }
+ else {
+ interp.distance_scale = 1.0;
+ interp.rect_uv1 = uv + quad_offsets[0];
+ interp.rect_uv2 = uv + quad_offsets[1];
+ interp.rect_uv3 = uv + quad_offsets[2];
+ interp.rect_uv4 = uv + quad_offsets[3];
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
new file mode 100644
index 00000000000..c017a5aa965
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
@@ -0,0 +1,46 @@
+
+/**
+ * Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer.
+ *
+ * An addition to the downsample CoC, we output the maximum slight out of focus CoC to be
+ * sure we don't miss a pixel.
+ *
+ * Input:
+ * Full-resolution color & depth buffer
+ * Output:
+ * Half-resolution Color, signed CoC (out_coc.x), and max slight focus abs CoC (out_coc.y).
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+void main()
+{
+ vec2 fullres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
+ /* Center uv around the 4 fullres pixels. */
+ vec2 quad_center = vec2(gl_GlobalInvocationID.xy * 2 + 1) * fullres_texel_size;
+
+ vec4 colors[4];
+ vec4 cocs;
+ for (int i = 0; i < 4; i++) {
+ vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size;
+ /* NOTE: We use samplers without filtering. */
+ colors[i] = safe_color(textureLod(color_tx, sample_uv, 0.0));
+ cocs[i] = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
+ }
+
+ cocs = clamp(cocs, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
+
+ vec4 weights = dof_bilateral_coc_weights(cocs);
+ weights *= dof_bilateral_color_weights(colors);
+ /* Normalize so that the sum is 1. */
+ weights *= safe_rcp(sum(weights));
+
+ ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
+ vec4 out_color = weighted_sum_array(colors, weights);
+ imageStore(out_color_img, out_texel, out_color);
+
+ float out_coc = dot(cocs, weights);
+ imageStore(out_coc_img, out_texel, vec4(out_coc));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
new file mode 100644
index 00000000000..46a25b84840
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
@@ -0,0 +1,367 @@
+
+/**
+ * Temporal Stabilization of the Depth of field input.
+ * Corresponds to the TAA pass in the paper.
+ * We actually duplicate the TAA logic but with a few changes:
+ * - We run this pass at half resolution.
+ * - We store CoC instead of Opacity in the alpha channel of the history.
+ *
+ * This is and adaption of the code found in eevee_film_lib.glsl
+ *
+ * Inputs:
+ * - Output of setup pass (halfres).
+ * Outputs:
+ * - Stabilized Color and CoC (halfres).
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+struct DofSample {
+ vec4 color;
+ float coc;
+};
+
+/* -------------------------------------------------------------------- */
+/** \name LDS Cache
+ * \{ */
+
+const uint cache_size = gl_WorkGroupSize.x + 2;
+shared vec4 color_cache[cache_size][cache_size];
+shared float coc_cache[cache_size][cache_size];
+/* Need 2 pixel border for depth. */
+const uint cache_depth_size = gl_WorkGroupSize.x + 4;
+shared float depth_cache[cache_depth_size][cache_depth_size];
+
+void dof_cache_init()
+{
+ /**
+ * Load enough values into LDS to perform the filter.
+ *
+ * ┌──────────────────────────────┐
+ * │ │ < Border texels that needs to be loaded.
+ * │ x x x x x x x x │ ─┐
+ * │ x x x x x x x x │ │
+ * │ x x x x x x x x │ │
+ * │ x x x x x x x x │ │ Thread Group Size 8x8.
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ │
+ * │ L L L L L x x x x │ ─┘
+ * │ L L L L L │ < Border texels that needs to be loaded.
+ * └──────────────────────────────┘
+ * └───────────┘
+ * Load using 5x5 threads.
+ */
+
+ ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+ for (int y = 0; y < 2; y++) {
+ for (int x = 0; x < 2; x++) {
+ /* 1 Pixel border. */
+ if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) {
+ ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u);
+ ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+ ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1);
+
+ vec4 color = texelFetch(color_tx, load_texel, 0);
+ color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color);
+ coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x;
+ }
+ /* 2 Pixels border. */
+ if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) {
+ ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u);
+ ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset;
+ /* Depth is fullres. Load every 2 pixels. */
+ ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1);
+
+ depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x;
+ }
+ }
+ }
+ barrier();
+}
+
+/* NOTE: Sample color space is already in YCoCg space. */
+DofSample dof_fetch_input_sample(ivec2 offset)
+{
+ ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy);
+ return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]);
+}
+
+float dof_fetch_half_depth(ivec2 offset)
+{
+ ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy);
+ return depth_cache[coord.y][coord.x];
+}
+
+/** \} */
+
+float dof_luma_weight(float luma)
+{
+ /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+ /* To preserve more details in dark areas, we use a bigger bias. */
+ const float exposure_scale = 1.0; /* TODO. */
+ return 1.0 / (4.0 + luma * exposure_scale);
+}
+
+float dof_bilateral_weight(float reference_coc, float sample_coc)
+{
+ /* NOTE: The difference between the cocs should be inside a abs() function,
+ * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19).
+ * Effectively bleed background into foreground.
+ * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */
+ return saturate(1.0 - (sample_coc - reference_coc) / max(1.0, abs(reference_coc)));
+}
+
+DofSample dof_spatial_filtering()
+{
+ /* Plus (+) shape offsets. */
+ const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+ DofSample center = dof_fetch_input_sample(ivec2(0));
+ DofSample accum = DofSample(vec4(0.0), 0.0);
+ float accum_weight = 0.0;
+ for (int i = 0; i < 4; i++) {
+ DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+ float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) *
+ dof_bilateral_weight(center.coc, samp.coc);
+
+ accum.color += samp.color * weight;
+ accum.coc += samp.coc * weight;
+ accum_weight += weight;
+ }
+ /* Accumulate center sample last as it does not need bilateral_weights. */
+ float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x);
+ accum.color += center.color * weight;
+ accum.coc += center.coc * weight;
+ accum_weight += weight;
+
+ float rcp_weight = 1.0 / accum_weight;
+ accum.color *= rcp_weight;
+ accum.coc *= rcp_weight;
+ return accum;
+}
+
+struct DofNeighborhoodMinMax {
+ DofSample min;
+ DofSample max;
+};
+
+/* Return history clipping bounding box in YCoCg color space. */
+DofNeighborhoodMinMax dof_neighbor_boundbox()
+{
+ /* Plus (+) shape offsets. */
+ const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
+ /**
+ * Simple bounding box calculation in YCoCg as described in:
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+ */
+ DofSample min_c = dof_fetch_input_sample(ivec2(0));
+ DofSample max_c = min_c;
+ for (int i = 0; i < 4; i++) {
+ DofSample samp = dof_fetch_input_sample(plus_offsets[i]);
+ min_c.color = min(min_c.color, samp.color);
+ max_c.color = max(max_c.color, samp.color);
+ min_c.coc = min(min_c.coc, samp.coc);
+ max_c.coc = max(max_c.coc, samp.coc);
+ }
+ /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+ * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+ DofSample min_c_3x3 = min_c;
+ DofSample max_c_3x3 = max_c;
+ const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+ for (int i = 0; i < 4; i++) {
+ DofSample samp = dof_fetch_input_sample(corners[i]);
+ min_c_3x3.color = min(min_c_3x3.color, samp.color);
+ max_c_3x3.color = max(max_c_3x3.color, samp.color);
+ min_c_3x3.coc = min(min_c_3x3.coc, samp.coc);
+ max_c_3x3.coc = max(max_c_3x3.coc, samp.coc);
+ }
+ min_c.color = (min_c.color + min_c_3x3.color) * 0.5;
+ max_c.color = (max_c.color + max_c_3x3.color) * 0.5;
+ min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5;
+ max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5;
+
+ return DofNeighborhoodMinMax(min_c, max_c);
+}
+
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
+{
+ /**
+ * Dilate velocity by using the nearest pixel in a cross pattern.
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+ */
+ const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+ float min_depth = dof_fetch_half_depth(ivec2(0));
+ ivec2 nearest_texel = ivec2(0);
+ for (int i = 0; i < 4; i++) {
+ float depth = dof_fetch_half_depth(corners[i]);
+ if (min_depth > depth) {
+ min_depth = depth;
+ nearest_texel = corners[i];
+ }
+ }
+ /* Convert to full resolution buffer pixel. */
+ ivec2 velocity_texel = (texel_sample + nearest_texel) * 2;
+ velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1);
+ vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth);
+ /* Transform to **half** pixel space. */
+ return vector.xy * vec2(textureSize(color_tx, 0));
+}
+
+/* Load color using a special filter to avoid losing detail.
+ * \a texel is sample position with subpixel accuracy. */
+DofSample dof_sample_history(vec2 input_texel)
+{
+#if 1 /* Bilinar. */
+ vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0);
+ vec4 color = textureLod(in_history_tx, uv, 0.0);
+
+#else /* Catmull Rom interpolation. 5 Bilinear Taps. */
+ vec2 center_texel;
+ vec2 inter_texel = modf(input_texel, center_texel);
+ vec2 weights[4];
+ film_get_catmull_rom_weights(inter_texel, weights);
+
+ /**
+ * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+ * corner taps.
+ * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+ * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+ */
+ center_texel += 0.5;
+
+ /* Slide 92. */
+ vec2 weight_12 = weights[1] + weights[2];
+ vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+ vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+ vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+ vec4 color;
+ vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+ float weight_center = weight_12.x * weight_12.y;
+
+ color = textureLod(in_history_tx, uv_12, 0.0) * weight_center;
+ color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+ color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+ color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+ color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+ /* Re-normalize for the removed corners. */
+ color /= (weight_center + sum(weight_cross));
+#endif
+ /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */
+ return DofSample(color.xyzz, color.w);
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src)
+{
+#if 0
+ /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+ vec3 clip_dir = src.color.rgb - history.color.rgb;
+
+ float t = line_aabb_clipping_dist(
+ history.color.rgb, clip_dir, bbox.min.color.rgb, bbox.max.color.rgb);
+ history.color.rgb += clip_dir * saturate(t);
+#else
+ /* More responsive. */
+ history.color = clamp(history.color, bbox.min.color, bbox.max.color);
+#endif
+ /* Clamp CoC to reduce convergence time. Otherwise the result is laggy. */
+ history.coc = clamp(history.coc, bbox.min.coc, bbox.max.coc);
+
+ return history;
+}
+
+float dof_history_blend_factor(
+ float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst)
+{
+ float luma_min = bbox.min.color.x;
+ float luma_max = bbox.max.color.x;
+ float luma_incoming = src.color.x;
+ float luma_history = dst.color.x;
+
+ /* 5% of incoming color by default. */
+ float blend = 0.05;
+ /* Blend less history if the pixel has substantial velocity. */
+ /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */
+ blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0));
+ /**
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+ * Bias towards history if incoming pixel is near clamping. Reduces flicker.
+ */
+ float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+ /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+ distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+ /* Linearly blend when history gets below to 25% of the bbox size. */
+ blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+ /* Progressively discard history until history CoC is twice as big as the filtered CoC.
+ * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to
+ * dilate thin features like hair (slide 19). */
+ float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc)));
+ blend = mix(blend, 1.0, coc_diff_ratio);
+ /* Discard out of view history. */
+ if (any(lessThan(texel, vec2(0))) ||
+ any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) {
+ blend = 1.0;
+ }
+ /* Discard history if invalid. */
+ if (use_history == false) {
+ blend = 1.0;
+ }
+ return blend;
+}
+
+void main()
+{
+ dof_cache_init();
+
+ ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy);
+
+ /**
+ * Naming convention is taken from the film implementation.
+ * SRC is incoming new data.
+ * DST is history data.
+ */
+ DofSample src = dof_spatial_filtering();
+
+ /* Reproject by finding where this pixel was in the previous frame. */
+ vec2 motion = dof_pixel_history_motion_vector(src_texel);
+ vec2 history_texel = vec2(src_texel) + motion;
+
+ float velocity = length(motion);
+
+ DofSample dst = dof_sample_history(history_texel);
+
+ /* Get local color bounding box of source neighborhood. */
+ DofNeighborhoodMinMax bbox = dof_neighbor_boundbox();
+
+ float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst);
+
+ dst = dof_amend_history(bbox, dst, src);
+
+ /* Luma weighted blend to reduce flickering. */
+ float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend);
+ float weight_src = dof_luma_weight(src.color.x) * (blend);
+
+ DofSample result;
+ /* Weighted blend. */
+ result.color = vec4(dst.color.rgb, dst.coc) * weight_dst +
+ vec4(src.color.rgb, src.coc) * weight_src;
+ result.color /= weight_src + weight_dst;
+
+ /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */
+ imageStore(out_history_img, src_texel, result.color);
+
+ /* Un-swizzle. */
+ result.coc = result.color.a;
+ /* Clamp opacity since we don't store it in history. */
+ result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a);
+
+ result.color = colorspace_scene_linear_from_YCoCg(result.color);
+
+ imageStore(out_color_img, src_texel, result.color);
+ imageStore(out_coc_img, src_texel, vec4(result.coc));
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
new file mode 100644
index 00000000000..dba8b5fd79d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
@@ -0,0 +1,97 @@
+
+/**
+ * Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to
+ * their neighborhood. This pass is repeated multiple time until the maximum CoC can be covered.
+ *
+ * Input & Output:
+ * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
+ **/
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/* Error introduced by the random offset of the gathering kernel's center. */
+const float bluring_radius_error = 1.0 + 1.0 / (float(DOF_GATHER_RING_COUNT) + 0.5);
+const float tile_to_fullres_factor = float(DOF_TILES_SIZE * 2);
+
+void main()
+{
+ ivec2 center_tile_pos = ivec2(gl_GlobalInvocationID.xy);
+
+ CocTile ring_buckets[DOF_DILATE_RING_COUNT];
+
+ for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
+ ring_buckets[ring] = dof_coc_tile_init();
+
+ int ring_distance = ring + 1;
+ for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) {
+ ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id);
+
+ offset *= ring_width_multiplier;
+
+ for (int i = 0; i < 2; i++) {
+ ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset);
+
+ CocTile adj_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, adj_tile_pos);
+
+ if (DILATE_MODE_MIN_MAX) {
+ /* Actually gather the "absolute" biggest coc but keeping the sign. */
+ ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc);
+ ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc);
+ }
+ else { /* DILATE_MODE_MIN_ABS */
+ ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc);
+ ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc);
+
+ /* Should be tight as possible to reduce gather overhead (see slide 61). */
+ float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) *
+ tile_to_fullres_factor;
+
+ ring_buckets[ring].fg_max_intersectable_coc = max(
+ ring_buckets[ring].fg_max_intersectable_coc,
+ adj_tile.fg_max_intersectable_coc + closest_neighbor_distance);
+ ring_buckets[ring].bg_min_intersectable_coc = min(
+ ring_buckets[ring].bg_min_intersectable_coc,
+ adj_tile.bg_min_intersectable_coc + closest_neighbor_distance);
+ }
+ }
+ }
+ }
+
+ /* Load center tile. */
+ CocTile out_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, center_tile_pos);
+
+ for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
+ float ring_distance = float(ring + 1);
+
+ ring_distance = (ring_distance * ring_width_multiplier - 1) * tile_to_fullres_factor;
+
+ if (DILATE_MODE_MIN_MAX) {
+ /* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */
+ if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) {
+ out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc);
+ }
+
+ if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) {
+ out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc);
+ }
+ }
+ else { /* DILATE_MODE_MIN_ABS */
+ /* Find minimum absolute CoC radii that will be intersected for the previously
+ * computed maximum CoC values. */
+ if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) {
+ out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc);
+ out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc,
+ ring_buckets[ring].fg_max_intersectable_coc);
+ }
+
+ if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) {
+ out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc);
+ out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc,
+ ring_buckets[ring].bg_min_intersectable_coc);
+ }
+ }
+ }
+
+ ivec2 texel_out = ivec2(gl_GlobalInvocationID.xy);
+ dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, texel_out, out_tile);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
new file mode 100644
index 00000000000..88737ade386
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
@@ -0,0 +1,78 @@
+
+/**
+ * Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles.
+ *
+ * Output min and max values for each tile and for both foreground & background.
+ * Also outputs min intersectable CoC for the background, which is the minimum CoC
+ * that comes from the background pixels.
+ *
+ * Input:
+ * - Half-resolution Circle of confusion. Out of setup pass.
+ * Output:
+ * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
+ */
+
+#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
+
+/**
+ * In order to use atomic operations, we have to use uints. But this means having to deal with the
+ * negative number ourselves. Luckily, each ground have a nicely defined range of values we can
+ * remap to positive float.
+ */
+shared uint fg_min_coc;
+shared uint fg_max_coc;
+shared uint fg_max_intersectable_coc;
+shared uint bg_min_coc;
+shared uint bg_max_coc;
+shared uint bg_min_intersectable_coc;
+
+const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc);
+
+void main()
+{
+ if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+ /* NOTE: Min/Max flipped because of inverted fg_coc sign. */
+ fg_min_coc = floatBitsToUint(0.0);
+ fg_max_coc = dof_tile_large_coc_uint;
+ fg_max_intersectable_coc = dof_tile_large_coc_uint;
+ bg_min_coc = dof_tile_large_coc_uint;
+ bg_max_coc = floatBitsToUint(0.0);
+ bg_min_intersectable_coc = dof_tile_large_coc_uint;
+ }
+ barrier();
+
+ ivec2 sample_texel = min(ivec2(gl_GlobalInvocationID.xy), textureSize(coc_tx, 0).xy - 1);
+ vec2 sample_data = texelFetch(coc_tx, sample_texel, 0).rg;
+
+ float sample_coc = sample_data.x;
+ uint fg_coc = floatBitsToUint(max(-sample_coc, 0.0));
+ /* NOTE: atomicMin/Max flipped because of inverted fg_coc sign. */
+ atomicMax(fg_min_coc, fg_coc);
+ atomicMin(fg_max_coc, fg_coc);
+ atomicMin(fg_max_intersectable_coc, (sample_coc < 0.0) ? fg_coc : dof_tile_large_coc_uint);
+
+ uint bg_coc = floatBitsToUint(max(sample_coc, 0.0));
+ atomicMin(bg_min_coc, bg_coc);
+ atomicMax(bg_max_coc, bg_coc);
+ atomicMin(bg_min_intersectable_coc, (sample_coc > 0.0) ? bg_coc : dof_tile_large_coc_uint);
+
+ barrier();
+
+ if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+ if (fg_max_intersectable_coc == dof_tile_large_coc_uint) {
+ fg_max_intersectable_coc = floatBitsToUint(0.0);
+ }
+
+ CocTile tile;
+ /* Foreground sign is flipped since we compare unsigned representation. */
+ tile.fg_min_coc = -uintBitsToFloat(fg_min_coc);
+ tile.fg_max_coc = -uintBitsToFloat(fg_max_coc);
+ tile.fg_max_intersectable_coc = -uintBitsToFloat(fg_max_intersectable_coc);
+ tile.bg_min_coc = uintBitsToFloat(bg_min_coc);
+ tile.bg_max_coc = uintBitsToFloat(bg_max_coc);
+ tile.bg_min_intersectable_coc = uintBitsToFloat(bg_min_intersectable_coc);
+
+ ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+ dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, tile_co, tile);
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl
new file mode 100644
index 00000000000..ce1f19edf53
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl
@@ -0,0 +1,13 @@
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_film_lib.glsl)
+
+void main()
+{
+ ivec2 texel_film = ivec2(gl_GlobalInvocationID.xy);
+ /* Not used. */
+ vec4 out_color;
+ float out_depth;
+
+ film_process_data(texel_film, out_color, out_depth);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl
new file mode 100644
index 00000000000..26040234fd0
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl
@@ -0,0 +1,31 @@
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_film_lib.glsl)
+
+void main()
+{
+ ivec2 texel_film = ivec2(gl_FragCoord.xy) - film_buf.offset;
+ float out_depth;
+
+ if (film_buf.display_only) {
+ out_depth = imageLoad(depth_img, texel_film).r;
+
+ if (film_buf.display_id == -1) {
+ out_color = texelFetch(in_combined_tx, texel_film, 0);
+ }
+ else if (film_buf.display_is_value) {
+ out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, film_buf.display_id)).rrr;
+ out_color.a = 1.0;
+ }
+ else {
+ out_color = imageLoad(color_accum_img, ivec3(texel_film, film_buf.display_id));
+ }
+ }
+ else {
+ film_process_data(texel_film, out_color, out_depth);
+ }
+
+ gl_FragDepth = get_depth_from_view_z(-out_depth);
+
+ gl_FragDepth = film_display_depth_ammend(texel_film, gl_FragDepth);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
new file mode 100644
index 00000000000..087efa9100d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@@ -0,0 +1,701 @@
+
+/**
+ * Film accumulation utils functions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl)
+
+/* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */
+float film_depth_convert_to_scene(float depth)
+{
+ if (false /* Panoramic */) {
+ /* TODO */
+ return 1.0;
+ }
+ return abs(get_view_z_from_depth(depth));
+}
+
+/* Load a texture sample in a specific format. Combined pass needs to use this. */
+vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel)
+{
+ vec4 color = texelFetch(combined_tx, texel, 0);
+ /* Convert transmittance to opacity. */
+ color.a = saturate(1.0 - color.a);
+ /* Transform to YCoCg for accumulation. */
+ color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb);
+ return color;
+}
+
+/* Returns a weight based on Luma to reduce the flickering introduced by high energy pixels. */
+float film_luma_weight(float luma)
+{
+ /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */
+ /* To preserve more details in dark areas, we use a bigger bias. */
+ return 1.0 / (4.0 + luma * film_buf.exposure_scale);
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Filter
+ * \{ */
+
+FilmSample film_sample_get(int sample_n, ivec2 texel_film)
+{
+#ifdef PANORAMIC
+ /* TODO(fclem): Panoramic projection will be more complex. The samples will have to be retrieve
+ * at runtime, maybe by scanning a whole region. Offset and weight will have to be computed by
+ * reprojecting the incoming pixel data into film pixel space. */
+#else
+
+# ifdef SCALED_RENDERING
+ texel_film /= film_buf.scaling_factor;
+# endif
+
+ FilmSample film_sample = film_buf.samples[sample_n];
+ film_sample.texel += texel_film + film_buf.offset;
+ /* Use extend on borders. */
+ film_sample.texel = clamp(film_sample.texel, ivec2(0, 0), film_buf.render_extent - 1);
+
+ /* TODO(fclem): Panoramic projection will need to compute the sample weight in the shader
+ * instead of precomputing it on CPU. */
+# ifdef SCALED_RENDERING
+ /* We need to compute the real distance and weight since a sample
+ * can be used by many final pixel. */
+ vec2 offset = film_buf.subpixel_offset - vec2(texel_film % film_buf.scaling_factor);
+ film_sample.weight = film_filter_weight(film_buf.filter_size, len_squared(offset));
+# endif
+
+#endif /* PANORAMIC */
+
+ /* Always return a weight above 0 to avoid blind spots between samples. */
+ film_sample.weight = max(film_sample.weight, 1e-6);
+
+ return film_sample;
+}
+
+/* Returns the combined weights of all samples affecting this film pixel. */
+float film_weight_accumulation(ivec2 texel_film)
+{
+#if 0 /* TODO(fclem): Reference implementation, also needed for panoramic cameras. */
+ float weight = 0.0;
+ for (int i = 0; i < film_buf.samples_len; i++) {
+ weight += film_sample_get(i, texel_film).weight;
+ }
+ return weight;
+#endif
+ return film_buf.samples_weight_total;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout vec4 accum)
+{
+ if (pass_id == -1) {
+ return;
+ }
+ accum += texelFetch(tex, samp.texel, 0) * samp.weight;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout float accum)
+{
+ if (pass_id == -1) {
+ return;
+ }
+ accum += texelFetch(tex, samp.texel, 0).x * samp.weight;
+}
+
+void film_sample_accum(
+ FilmSample samp, int pass_id, uint layer, sampler2DArray tex, inout vec4 accum)
+{
+ if (pass_id == -1) {
+ return;
+ }
+ accum += texelFetch(tex, ivec3(samp.texel, layer), 0) * samp.weight;
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout vec4 accum)
+{
+ film_sample_accum(samp, pass_id, pass_id, tex, accum);
+}
+
+void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout float accum)
+{
+ if (pass_id == -1) {
+ return;
+ }
+ accum += texelFetch(tex, ivec3(samp.texel, pass_id), 0).x * samp.weight;
+}
+
+void film_sample_accum_mist(FilmSample samp, inout float accum)
+{
+ if (film_buf.mist_id == -1) {
+ return;
+ }
+ float depth = texelFetch(depth_tx, samp.texel, 0).x;
+ vec2 uv = (vec2(samp.texel) + 0.5) / textureSize(depth_tx, 0).xy;
+ vec3 vP = get_view_space_from_depth(uv, depth);
+ bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ float mist = (is_persp) ? length(vP) : abs(vP.z);
+ /* Remap to 0..1 range. */
+ mist = saturate(mist * film_buf.mist_scale + film_buf.mist_bias);
+ /* Falloff. */
+ mist = pow(mist, film_buf.mist_exponent);
+ accum += mist * samp.weight;
+}
+
+void film_sample_accum_combined(FilmSample samp, inout vec4 accum, inout float weight_accum)
+{
+ if (film_buf.combined_id == -1) {
+ return;
+ }
+ vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, samp.texel);
+
+ /* Weight by luma to remove fireflies. */
+ float weight = film_luma_weight(color.x) * samp.weight;
+
+ accum += color * weight;
+ weight_accum += weight;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Load/Store Data
+ * \{ */
+
+/* Returns the distance used to store nearest interpolation data. */
+float film_distance_load(ivec2 texel)
+{
+ /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
+ texel = texel % imageSize(in_weight_img).xy;
+
+ if (!film_buf.use_history || film_buf.use_reprojection) {
+ return 1.0e16;
+ }
+ return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
+}
+
+float film_weight_load(ivec2 texel)
+{
+ /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
+ texel = texel % imageSize(in_weight_img).xy;
+
+ if (!film_buf.use_history || film_buf.use_reprojection) {
+ return 0.0;
+ }
+ return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
+}
+
+/* Returns motion in pixel space to retrieve the pixel history. */
+vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
+{
+ /**
+ * Dilate velocity by using the nearest pixel in a cross pattern.
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27)
+ */
+ const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
+ float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
+ ivec2 nearest_texel = texel_sample;
+ for (int i = 0; i < 4; i++) {
+ ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1);
+ float depth = texelFetch(depth_tx, texel, 0).x;
+ if (min_depth > depth) {
+ min_depth = depth;
+ nearest_texel = texel;
+ }
+ }
+
+ vec4 vector = velocity_resolve(vector_tx, nearest_texel, min_depth);
+
+ /* Transform to pixel space. */
+ vector.xy *= vec2(film_buf.extent);
+
+ return vector.xy;
+}
+
+/* \a t is inter-pixel position. 0 means perfectly on a pixel center.
+ * Returns weights in both dimensions.
+ * Multiply each dimension weights to get final pixel weights. */
+void film_get_catmull_rom_weights(vec2 t, out vec2 weights[4])
+{
+ vec2 t2 = t * t;
+ vec2 t3 = t2 * t;
+ float fc = 0.5; /* Catmull-Rom. */
+
+ vec2 fct = t * fc;
+ vec2 fct2 = t2 * fc;
+ vec2 fct3 = t3 * fc;
+ weights[0] = (fct2 * 2.0 - fct3) - fct;
+ weights[1] = (t3 * 2.0 - fct3) + (-t2 * 3.0 + fct2) + 1.0;
+ weights[2] = (-t3 * 2.0 + fct3) + (t2 * 3.0 - (2.0 * fct2)) + fct;
+ weights[3] = fct3 - fct2;
+}
+
+/* Load color using a special filter to avoid losing detail.
+ * \a texel is sample position with subpixel accuracy. */
+vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel)
+{
+ vec2 center_texel;
+ vec2 inter_texel = modf(input_texel, center_texel);
+ vec2 weights[4];
+ film_get_catmull_rom_weights(inter_texel, weights);
+
+#if 0 /* Reference. 16 Taps. */
+ vec4 color = vec4(0.0);
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ ivec2 texel = ivec2(center_texel) + ivec2(x, y) - 1;
+ texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1);
+ color += texelFetch(color_tx, texel, 0) * weights[x].x * weights[y].y;
+ }
+ }
+ return color;
+
+#elif 1 /* Optimize version. 5 Bilinear Taps. */
+ /**
+ * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing
+ * corner taps.
+ * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016
+ * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx
+ */
+ center_texel += 0.5;
+
+ /* Slide 92. */
+ vec2 weight_12 = weights[1] + weights[2];
+ vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv;
+ vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv;
+ vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv;
+
+ vec4 color;
+ vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy);
+ float weight_center = weight_12.x * weight_12.y;
+
+ color = textureLod(color_tx, uv_12, 0.0) * weight_center;
+ color += textureLod(color_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x;
+ color += textureLod(color_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y;
+ color += textureLod(color_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z;
+ color += textureLod(color_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w;
+ /* Re-normalize for the removed corners. */
+ return color / (weight_center + sum(weight_cross));
+
+#else /* Nearest interpolation for debugging. 1 Tap. */
+ ivec2 texel = ivec2(center_texel) + ivec2(greaterThan(inter_texel, vec2(0.5)));
+ texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1);
+ return texelFetch(color_tx, texel, 0);
+#endif
+}
+
+/* Return history clipping bounding box in YCoCg color space. */
+void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c)
+{
+ /* Plus (+) shape offsets. */
+ const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */
+ ivec2(-1, 0),
+ ivec2(0, -1),
+ ivec2(1, 0),
+ ivec2(0, 1));
+#if 0
+ /**
+ * Compute Variance of neighborhood as described in:
+ * "An Excursion in Temporal Supersampling" by Marco Salvi at GDC 2016.
+ * and:
+ * "A Survey of Temporal Antialiasing Techniques" by Yang et al.
+ */
+
+ /* First 2 moments. */
+ vec4 mu1 = vec4(0), mu2 = vec4(0);
+ for (int i = 0; i < 5; i++) {
+ vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]);
+ mu1 += color;
+ mu2 += sqr(color);
+ }
+ mu1 *= (1.0 / 5.0);
+ mu2 *= (1.0 / 5.0);
+
+ /* Extent scaling. Range [0.75..1.25].
+ * Balance between more flickering (0.75) or more ghosting (1.25). */
+ const float gamma = 1.25;
+ /* Standard deviation. */
+ vec4 sigma = sqrt(abs(mu2 - sqr(mu1)));
+ /* eq. 6 in "A Survey of Temporal Antialiasing Techniques". */
+ min_c = mu1 - gamma * sigma;
+ max_c = mu1 + gamma * sigma;
+#else
+ /**
+ * Simple bounding box calculation in YCoCg as described in:
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014
+ */
+ min_c = vec4(1e16);
+ max_c = vec4(-1e16);
+ for (int i = 0; i < 5; i++) {
+ vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]);
+ min_c = min(min_c, color);
+ max_c = max(max_c, color);
+ }
+ /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts.
+ * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
+ vec4 min_c_3x3 = min_c;
+ vec4 max_c_3x3 = max_c;
+ const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
+ for (int i = 0; i < 4; i++) {
+ vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]);
+ min_c_3x3 = min(min_c_3x3, color);
+ max_c_3x3 = max(max_c_3x3, color);
+ }
+ min_c = (min_c + min_c_3x3) * 0.5;
+ max_c = (max_c + max_c_3x3) * 0.5;
+#endif
+}
+
+/* 1D equivalent of line_aabb_clipping_dist(). */
+float film_aabb_clipping_dist_alpha(float origin, float direction, float aabb_min, float aabb_max)
+{
+ if (abs(direction) < 1e-5) {
+ return 0.0;
+ }
+ float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max;
+ return (nearest_plane - origin) / direction;
+}
+
+/* Modulate the history color to avoid ghosting artifact. */
+vec4 film_amend_combined_history(
+ vec4 min_color, vec4 max_color, vec4 color_history, vec4 src_color, ivec2 src_texel)
+{
+ /* Clip instead of clamping to avoid color accumulating in the AABB corners. */
+ vec4 clip_dir = src_color - color_history;
+
+ float t = line_aabb_clipping_dist(color_history.rgb, clip_dir.rgb, min_color.rgb, max_color.rgb);
+ color_history.rgb += clip_dir.rgb * saturate(t);
+
+ /* Clip alpha on its own to avoid interference with other channels. */
+ float t_a = film_aabb_clipping_dist_alpha(color_history.a, clip_dir.a, min_color.a, max_color.a);
+ color_history.a += clip_dir.a * saturate(t_a);
+
+ return color_history;
+}
+
+float film_history_blend_factor(float velocity,
+ vec2 texel,
+ float luma_min,
+ float luma_max,
+ float luma_incoming,
+ float luma_history)
+{
+ /* 5% of incoming color by default. */
+ float blend = 0.05;
+ /* Blend less history if the pixel has substantial velocity. */
+ blend = mix(blend, 0.20, saturate(velocity * 0.02));
+ /**
+ * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43)
+ * Bias towards history if incoming pixel is near clamping. Reduces flicker.
+ */
+ float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history));
+ /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */
+ distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min);
+ /* Linearly blend when history gets below to 25% of the bbox size. */
+ blend *= saturate(distance_to_luma_clip * 4.0 + 0.1);
+ /* Discard out of view history. */
+ if (any(lessThan(texel, vec2(0))) || any(greaterThanEqual(texel, film_buf.extent))) {
+ blend = 1.0;
+ }
+ /* Discard history if invalid. */
+ if (film_buf.use_history == false) {
+ blend = 1.0;
+ }
+ return blend;
+}
+
+/* Returns resolved final color. */
+void film_store_combined(
+ FilmSample dst, ivec2 src_texel, vec4 color, float color_weight, inout vec4 display)
+{
+ if (film_buf.combined_id == -1) {
+ return;
+ }
+
+ vec4 color_src, color_dst;
+ float weight_src, weight_dst;
+
+ /* Undo the weighting to get final spatialy-filtered color. */
+ color_src = color / color_weight;
+
+ if (film_buf.use_reprojection) {
+ /* Interactive accumulation. Do reprojection and Temporal Anti-Aliasing. */
+
+ /* Reproject by finding where this pixel was in the previous frame. */
+ vec2 motion = film_pixel_history_motion_vector(src_texel);
+ vec2 history_texel = vec2(dst.texel) + motion;
+
+ float velocity = length(motion);
+
+ /* Load weight if it is not uniform across the whole buffer (i.e: upsampling, panoramic). */
+ // dst.weight = film_weight_load(texel_combined);
+
+ color_dst = film_sample_catmull_rom(in_combined_tx, history_texel);
+ color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
+
+ /* Get local color bounding box of source neighborhood. */
+ vec4 min_color, max_color;
+ film_combined_neighbor_boundbox(src_texel, min_color, max_color);
+
+ float blend = film_history_blend_factor(
+ velocity, history_texel, min_color.x, max_color.x, color_src.x, color_dst.x);
+
+ color_dst = film_amend_combined_history(min_color, max_color, color_dst, color_src, src_texel);
+
+ /* Luma weighted blend to avoid flickering. */
+ weight_dst = film_luma_weight(color_dst.x) * (1.0 - blend);
+ weight_src = film_luma_weight(color_src.x) * (blend);
+ }
+ else {
+ /* Everything is static. Use render accumulation. */
+ color_dst = texelFetch(in_combined_tx, dst.texel, 0);
+ color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb);
+
+ /* Luma weighted blend to avoid flickering. */
+ weight_dst = film_luma_weight(color_dst.x) * dst.weight;
+ weight_src = color_weight;
+ }
+ /* Weighted blend. */
+ color = color_dst * weight_dst + color_src * weight_src;
+ color /= weight_src + weight_dst;
+
+ color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb);
+
+ /* Fix alpha not accumulating to 1 because of float imprecision. */
+ if (color.a > 0.995) {
+ color.a = 1.0;
+ }
+
+ /* Filter NaNs. */
+ if (any(isnan(color))) {
+ color = vec4(0.0, 0.0, 0.0, 1.0);
+ }
+
+ if (film_buf.display_id == -1) {
+ display = color;
+ }
+ imageStore(out_combined_img, dst.texel, color);
+}
+
+void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display)
+{
+ if (pass_id == -1) {
+ return;
+ }
+
+ vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id));
+
+ color = (data_film * dst.weight + color) * dst.weight_sum_inv;
+
+ /* Filter NaNs. */
+ if (any(isnan(color))) {
+ color = vec4(0.0, 0.0, 0.0, 1.0);
+ }
+
+ if (film_buf.display_id == pass_id) {
+ display = color;
+ }
+ imageStore(color_accum_img, ivec3(dst.texel, pass_id), color);
+}
+
+void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display)
+{
+ if (pass_id == -1) {
+ return;
+ }
+
+ float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x;
+
+ value = (data_film * dst.weight + value) * dst.weight_sum_inv;
+
+ /* Filter NaNs. */
+ if (isnan(value)) {
+ value = 0.0;
+ }
+
+ if (film_buf.display_id == pass_id) {
+ display = vec4(value, value, value, 1.0);
+ }
+ imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
+}
+
+/* Nearest sample variant. Always stores the data. */
+void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4 display)
+{
+ if (pass_id == -1) {
+ return;
+ }
+
+ if (film_buf.display_id == pass_id) {
+ display = data_sample;
+ }
+ imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample);
+}
+
+void film_store_depth(ivec2 texel_film, float value, out float out_depth)
+{
+ if (film_buf.depth_id == -1) {
+ return;
+ }
+
+ out_depth = film_depth_convert_to_scene(value);
+
+ imageStore(depth_img, texel_film, vec4(out_depth));
+}
+
+void film_store_distance(ivec2 texel, float value)
+{
+ imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
+}
+
+void film_store_weight(ivec2 texel, float value)
+{
+ imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
+}
+
+float film_display_depth_ammend(ivec2 texel, float depth)
+{
+ /* This effectively offsets the depth of the whole 2x2 region to the lowest value of the region
+ * twice. One for X and one for Y direction. */
+ /* TODO(fclem): This could be improved as it gives flickering result at depth discontinuity.
+ * But this is the quickest stable result I could come with for now. */
+#ifdef GPU_FRAGMENT_SHADER
+ depth += fwidth(depth);
+#endif
+ /* Small offset to avoid depth test lessEqual failing because of all the conversions loss. */
+ depth += 2.4e-7 * 4.0;
+ return saturate(depth);
+}
+
+/** \} */
+
+/** NOTE: out_depth is scene linear depth from the camera origin. */
+void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth)
+{
+ out_color = vec4(0.0);
+ out_depth = 0.0;
+
+ float weight_accum = film_weight_accumulation(texel_film);
+ float film_weight = film_weight_load(texel_film);
+ float weight_sum = film_weight + weight_accum;
+ film_store_weight(texel_film, weight_sum);
+
+ FilmSample dst;
+ dst.texel = texel_film;
+ dst.weight = film_weight;
+ dst.weight_sum_inv = 1.0 / weight_sum;
+
+ /* NOTE: We split the accumulations into separate loops to avoid using too much registers and
+ * maximize occupancy. */
+
+ if (film_buf.combined_id != -1) {
+ /* NOTE: Do weight accumulation again since we use custom weights. */
+ float weight_accum = 0.0;
+ vec4 combined_accum = vec4(0.0);
+
+ FilmSample src;
+ for (int i = film_buf.samples_len - 1; i >= 0; i--) {
+ src = film_sample_get(i, texel_film);
+ film_sample_accum_combined(src, combined_accum, weight_accum);
+ }
+ /* NOTE: src.texel is center texel in incoming data buffer. */
+ film_store_combined(dst, src.texel, combined_accum, weight_accum, out_color);
+ }
+
+ if (film_buf.has_data) {
+ float film_distance = film_distance_load(texel_film);
+
+ /* Get sample closest to target texel. It is always sample 0. */
+ FilmSample film_sample = film_sample_get(0, texel_film);
+
+ if (film_buf.use_reprojection || film_sample.weight < film_distance) {
+ vec4 normal = texelFetch(normal_tx, film_sample.texel, 0);
+ float depth = texelFetch(depth_tx, film_sample.texel, 0).x;
+ vec4 vector = velocity_resolve(vector_tx, film_sample.texel, depth);
+ /* Transform to pixel space. */
+ vector *= vec4(film_buf.render_extent, -film_buf.render_extent);
+
+ film_store_depth(texel_film, depth, out_depth);
+ film_store_data(texel_film, film_buf.normal_id, normal, out_color);
+ film_store_data(texel_film, film_buf.vector_id, vector, out_color);
+ film_store_distance(texel_film, film_sample.weight);
+ }
+ else {
+ out_depth = imageLoad(depth_img, texel_film).r;
+ }
+ }
+
+ if (film_buf.any_render_pass_1) {
+ vec4 diffuse_light_accum = vec4(0.0);
+ vec4 specular_light_accum = vec4(0.0);
+ vec4 volume_light_accum = vec4(0.0);
+ vec4 emission_accum = vec4(0.0);
+
+ for (int i = 0; i < film_buf.samples_len; i++) {
+ FilmSample src = film_sample_get(i, texel_film);
+ film_sample_accum(src,
+ film_buf.diffuse_light_id,
+ RENDER_PASS_LAYER_DIFFUSE_LIGHT,
+ light_tx,
+ diffuse_light_accum);
+ film_sample_accum(src,
+ film_buf.specular_light_id,
+ RENDER_PASS_LAYER_SPECULAR_LIGHT,
+ light_tx,
+ specular_light_accum);
+ film_sample_accum(src, film_buf.volume_light_id, volume_light_tx, volume_light_accum);
+ film_sample_accum(src, film_buf.emission_id, emission_tx, emission_accum);
+ }
+ film_store_color(dst, film_buf.diffuse_light_id, diffuse_light_accum, out_color);
+ film_store_color(dst, film_buf.specular_light_id, specular_light_accum, out_color);
+ film_store_color(dst, film_buf.volume_light_id, volume_light_accum, out_color);
+ film_store_color(dst, film_buf.emission_id, emission_accum, out_color);
+ }
+
+ if (film_buf.any_render_pass_2) {
+ vec4 diffuse_color_accum = vec4(0.0);
+ vec4 specular_color_accum = vec4(0.0);
+ vec4 environment_accum = vec4(0.0);
+ float mist_accum = 0.0;
+ float shadow_accum = 0.0;
+ float ao_accum = 0.0;
+
+ for (int i = 0; i < film_buf.samples_len; i++) {
+ FilmSample src = film_sample_get(i, texel_film);
+ film_sample_accum(src, film_buf.diffuse_color_id, diffuse_color_tx, diffuse_color_accum);
+ film_sample_accum(src, film_buf.specular_color_id, specular_color_tx, specular_color_accum);
+ film_sample_accum(src, film_buf.environment_id, environment_tx, environment_accum);
+ film_sample_accum(src, film_buf.shadow_id, shadow_tx, shadow_accum);
+ film_sample_accum(src, film_buf.ambient_occlusion_id, ambient_occlusion_tx, ao_accum);
+ film_sample_accum_mist(src, mist_accum);
+ }
+ film_store_color(dst, film_buf.diffuse_color_id, diffuse_color_accum, out_color);
+ film_store_color(dst, film_buf.specular_color_id, specular_color_accum, out_color);
+ film_store_color(dst, film_buf.environment_id, environment_accum, out_color);
+ film_store_value(dst, film_buf.shadow_id, shadow_accum, out_color);
+ film_store_value(dst, film_buf.ambient_occlusion_id, ao_accum, out_color);
+ film_store_value(dst, film_buf.mist_id, mist_accum, out_color);
+ }
+
+ for (int aov = 0; aov < film_buf.aov_color_len; aov++) {
+ vec4 aov_accum = vec4(0.0);
+
+ for (int i = 0; i < film_buf.samples_len; i++) {
+ FilmSample src = film_sample_get(i, texel_film);
+ film_sample_accum(src, aov, aov_color_tx, aov_accum);
+ }
+ film_store_color(dst, film_buf.aov_color_id + aov, aov_accum, out_color);
+ }
+
+ for (int aov = 0; aov < film_buf.aov_value_len; aov++) {
+ float aov_accum = 0.0;
+
+ for (int i = 0; i < film_buf.samples_len; i++) {
+ FilmSample src = film_sample_get(i, texel_film);
+ film_sample_accum(src, aov, aov_value_tx, aov_accum);
+ }
+ film_store_value(dst, film_buf.aov_value_id + aov, aov_accum, out_color);
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
new file mode 100644
index 00000000000..e93d0f472fa
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
@@ -0,0 +1,24 @@
+
+/**
+ * Debug hiz down sampling pass.
+ * Output red if above any max pixels, blue otherwise.
+ */
+
+void main()
+{
+ ivec2 texel = ivec2(gl_FragCoord.xy);
+
+ float depth0 = texelFetch(hiz_tx, texel, 0).r;
+
+ vec4 color = vec4(0.1, 0.1, 1.0, 1.0);
+ for (int i = 1; i < HIZ_MIP_COUNT; i++) {
+ ivec2 lvl_texel = texel / ivec2(uvec2(1) << uint(i));
+ lvl_texel = min(lvl_texel, textureSize(hiz_tx, i) - 1);
+ if (texelFetch(hiz_tx, lvl_texel, i).r < depth0) {
+ color = vec4(1.0, 0.1, 0.1, 1.0);
+ break;
+ }
+ }
+ out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+ out_debug_color_mul = color;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
new file mode 100644
index 00000000000..597bc73e2ad
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
@@ -0,0 +1,121 @@
+
+/**
+ * Shader that down-sample depth buffer, creating a Hierarchical-Z buffer.
+ * Saves max value of each 2x2 texel in the mipmap above the one we are
+ * rendering to. Adapted from
+ * http://rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/
+ *
+ * Major simplification has been made since we pad the buffer to always be
+ * bigger than input to avoid mipmapping misalignement.
+ *
+ * Start by copying the base level by quad loading the depth.
+ * Then each thread compute it's local depth for level 1.
+ * After that we use shared variables to do inter thread comunication and
+ * downsample to max level.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float local_depths[gl_WorkGroupSize.y][gl_WorkGroupSize.x];
+
+/* Load values from the previous lod level. */
+vec4 load_local_depths(ivec2 pixel)
+{
+ pixel *= 2;
+ return vec4(local_depths[pixel.y + 1][pixel.x + 0],
+ local_depths[pixel.y + 1][pixel.x + 1],
+ local_depths[pixel.y + 0][pixel.x + 1],
+ local_depths[pixel.y + 0][pixel.x + 0]);
+}
+
+void store_local_depth(ivec2 pixel, float depth)
+{
+ local_depths[pixel.y][pixel.x] = depth;
+}
+
+void main()
+{
+ ivec2 local_px = ivec2(gl_LocalInvocationID.xy);
+ /* Bottom left corner of the kernel. */
+ ivec2 kernel_origin = ivec2(gl_WorkGroupSize.xy * gl_WorkGroupID.xy);
+
+ /* Copy level 0. */
+ ivec2 src_px = ivec2(kernel_origin + local_px) * 2;
+ vec2 samp_co = (vec2(src_px) + 0.5) / vec2(textureSize(depth_tx, 0));
+ vec4 samp = textureGather(depth_tx, samp_co);
+
+ if (update_mip_0) {
+ imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx);
+ imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy);
+ imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz);
+ imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww);
+ }
+
+ /* Level 1. (No load) */
+ float max_depth = max_v4(samp);
+ ivec2 dst_px = ivec2(kernel_origin + local_px);
+ imageStore(out_mip_1, dst_px, vec4(max_depth));
+ store_local_depth(local_px, max_depth);
+
+ /* Level 2-5. */
+ bool active_thread;
+ int mask_shift = 1;
+
+#define downsample_level(out_mip__, lod_) \
+ active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
+ barrier(); /* Wait for previous writes to finish. */ \
+ if (active_thread) { \
+ max_depth = max_v4(load_local_depths(local_px)); \
+ dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \
+ imageStore(out_mip__, dst_px, vec4(max_depth)); \
+ } \
+ barrier(); /* Wait for previous reads to finish. */ \
+ if (active_thread) { \
+ store_local_depth(local_px, max_depth); \
+ } \
+ mask_shift++;
+
+ downsample_level(out_mip_2, 2);
+ downsample_level(out_mip_3, 3);
+ downsample_level(out_mip_4, 4);
+ downsample_level(out_mip_5, 5);
+
+ /* Since we pad the destination texture, the mip size is equal to the dispatch size. */
+ uint tile_count = uint(imageSize(out_mip_5).x * imageSize(out_mip_5).y);
+ /* Let the last tile handle the remaining LOD. */
+ bool last_tile = atomicAdd(finished_tile_counter, 1u) + 1u < tile_count;
+ if (last_tile == false) {
+ return;
+ }
+ finished_tile_counter = 0u;
+
+ ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
+ ivec2 image_border = imageSize(out_mip_5) - 1;
+ for (int y = 0; y < iter.y; y++) {
+ for (int x = 0; x < iter.x; x++) {
+ /* Load result of the other work groups. */
+ kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
+ src_px = ivec2(kernel_origin + local_px) * 2;
+ vec4 samp;
+ samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
+ samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x;
+ samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x;
+ samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x;
+ /* Level 6. */
+ float max_depth = max_v4(samp);
+ ivec2 dst_px = ivec2(kernel_origin + local_px);
+ imageStore(out_mip_6, dst_px, vec4(max_depth));
+ store_local_depth(local_px, max_depth);
+
+ mask_shift = 1;
+
+ /* Level 7. */
+ downsample_level(out_mip_7, 7);
+
+ /* Limited by OpenGL maximum of 8 image slot. */
+ // downsample_level(out_mip_8, 8);
+ // downsample_level(out_mip_9, 9);
+ // downsample_level(out_mip_10, 10);
+ }
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
new file mode 100644
index 00000000000..eefc024d0b8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
@@ -0,0 +1,54 @@
+
+/**
+ * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots.
+ * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling
+ * pass is not conservative enough).
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+void main()
+{
+ ivec2 texel = ivec2(gl_FragCoord.xy);
+
+ float depth = texelFetch(hiz_tx, texel, 0).r;
+ float vP_z = get_view_z_from_depth(depth);
+ vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth);
+
+ float light_count = 0.0;
+ uint light_cull = 0u;
+ vec2 px = gl_FragCoord.xy;
+ LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+ {
+ LightData light = light_buf[l_idx];
+ light_cull |= 1u << l_idx;
+ light_count += 1.0;
+ }
+ LIGHT_FOREACH_END
+
+ uint light_nocull = 0u;
+ LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(light_cull_buf, l_idx)
+ {
+ LightData light = light_buf[l_idx];
+ vec3 L;
+ float dist;
+ light_vector_get(light, P, L, dist);
+ if (light_attenuation(light, L, dist) > 0.0) {
+ light_nocull |= 1u << l_idx;
+ }
+ }
+ LIGHT_FOREACH_END
+
+ vec4 color = vec4(heatmap_gradient(light_count / 4.0), 1.0);
+
+ if ((light_cull & light_nocull) != light_nocull) {
+ /* ERROR. Some lights were culled incorrectly. */
+ color = vec4(0.0, 1.0, 0.0, 1.0);
+ }
+
+ out_debug_color_add = vec4(color.rgb, 0.0) * 0.2;
+ out_debug_color_mul = color;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
new file mode 100644
index 00000000000..9c12b0e50e6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
@@ -0,0 +1,62 @@
+
+/**
+ * Select the visible items inside the active view and put them inside the sorting buffer.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+void main()
+{
+ uint l_idx = gl_GlobalInvocationID.x;
+ if (l_idx >= light_cull_buf.items_count) {
+ return;
+ }
+
+ LightData light = in_light_buf[l_idx];
+
+ /* Do not select 0 power lights. */
+ if (light.influence_radius_max < 1e-8) {
+ return;
+ }
+
+ /* Sun lights are packed at the end of the array. Perform early copy. */
+ if (light.type == LIGHT_SUN) {
+ /* NOTE: We know the index because sun lights are packed at the start of the input buffer. */
+ out_light_buf[light_cull_buf.local_lights_len + l_idx] = light;
+ return;
+ }
+
+ Sphere sphere;
+ switch (light.type) {
+ case LIGHT_SPOT:
+ /* Only for < ~170° Cone due to plane extraction precision. */
+ if (light.spot_tan < 10.0) {
+ Pyramid pyramid = shape_pyramid_non_oblique(
+ light._position,
+ light._position - light._back * light.influence_radius_max,
+ light._right * light.influence_radius_max * light.spot_tan / light.spot_size_inv.x,
+ light._up * light.influence_radius_max * light.spot_tan / light.spot_size_inv.y);
+ if (!intersect_view(pyramid)) {
+ return;
+ }
+ }
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ case LIGHT_POINT:
+ sphere = Sphere(light._position, light.influence_radius_max);
+ break;
+ }
+
+ /* TODO(fclem): HiZ culling? Could be quite beneficial given the nature of the 2.5D culling. */
+
+ /* TODO(fclem): Small light culling / fading? */
+
+ if (intersect_view(sphere)) {
+ uint index = atomicAdd(light_cull_buf.visible_count, 1u);
+
+ out_zdist_buf[index] = dot(cameraForward, light._position) - dot(cameraForward, cameraPos);
+ out_key_buf[index] = l_idx;
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
new file mode 100644
index 00000000000..e98b170cd4c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Sort the lights by their Z distance to the camera.
+ * Outputs ordered light buffer.
+ * One thread processes one Light entity.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+shared float zdists_cache[gl_WorkGroupSize.x];
+
+void main()
+{
+ uint src_index = gl_GlobalInvocationID.x;
+ bool valid_thread = true;
+
+ if (src_index >= light_cull_buf.visible_count) {
+ /* Do not return because we use barriers later on (which need uniform control flow).
+ * Just process the same last item but avoid insertion. */
+ src_index = light_cull_buf.visible_count - 1;
+ valid_thread = false;
+ }
+
+ float local_zdist = in_zdist_buf[src_index];
+
+ int prefix_sum = 0;
+ /* Iterate over the whole key buffer. */
+ uint iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ for (uint i = 0u; i < iter; i++) {
+ uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x;
+ /* NOTE: This will load duplicated values, but they will be discarded. */
+ index = min(index, light_cull_buf.visible_count - 1);
+ zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index];
+
+ barrier();
+
+ /* Iterate over the cache line. */
+ uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i);
+ for (uint j = 0u; j < line_end; j++) {
+ if (zdists_cache[j] < local_zdist) {
+ prefix_sum++;
+ }
+ else if (zdists_cache[j] == local_zdist) {
+ /* Same depth, use index to order and avoid same prefix for 2 different lights. */
+ if ((gl_WorkGroupSize.x * i + j) < src_index) {
+ prefix_sum++;
+ }
+ }
+ }
+ }
+
+ if (valid_thread) {
+ /* Copy sorted light to render light buffer. */
+ uint input_index = in_key_buf[src_index];
+ out_light_buf[prefix_sum] = in_light_buf[input_index];
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
new file mode 100644
index 00000000000..37705e22b22
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
@@ -0,0 +1,188 @@
+
+/**
+ * 2D Culling pass for lights.
+ * We iterate over all items and check if they intersect with the tile frustum.
+ * Dispatch one thread per word.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Culling shapes extraction
+ * \{ */
+
+struct CullingTile {
+ IsectFrustum frustum;
+ vec4 bounds;
+};
+
+/* Corners are expected to be in viewspace so that the cone is starting from the origin.
+ * Corner order does not matter. */
+vec4 tile_bound_cone(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+ v00 = normalize(v00);
+ v01 = normalize(v01);
+ v10 = normalize(v10);
+ v11 = normalize(v11);
+ vec3 center = normalize(v00 + v01 + v10 + v11);
+ float angle_cosine = dot(center, v00);
+ angle_cosine = max(angle_cosine, dot(center, v01));
+ angle_cosine = max(angle_cosine, dot(center, v10));
+ angle_cosine = max(angle_cosine, dot(center, v11));
+ return vec4(center, angle_cosine);
+}
+
+/* Corners are expected to be in viewspace. Returns Z-aligned bounding cylinder.
+ * Corner order does not matter. */
+vec4 tile_bound_cylinder(vec3 v00, vec3 v01, vec3 v10, vec3 v11)
+{
+ vec3 center = (v00 + v01 + v10 + v11) * 0.25;
+ vec4 corners_dist;
+ float dist_sqr = distance_squared(center, v00);
+ dist_sqr = max(dist_sqr, distance_squared(center, v01));
+ dist_sqr = max(dist_sqr, distance_squared(center, v10));
+ dist_sqr = max(dist_sqr, distance_squared(center, v11));
+ /* Return a cone. Later converted to cylinder. */
+ return vec4(center, sqrt(dist_sqr));
+}
+
+vec2 tile_to_ndc(vec2 tile_co, vec2 offset)
+{
+ /* Add a margin to prevent culling too much if the frustum becomes too much unstable. */
+ const float margin = 0.02;
+ tile_co += margin * (offset * 2.0 - 1.0);
+
+ tile_co += offset;
+ return tile_co * light_cull_buf.tile_to_uv_fac * 2.0 - 1.0;
+}
+
+CullingTile tile_culling_get(uvec2 tile_co)
+{
+ vec2 ftile = vec2(tile_co);
+ /* Culling frustum corners for this tile. */
+ vec3 corners[8];
+ /* Follow same corners order as view frustum. */
+ corners[1].xy = corners[0].xy = tile_to_ndc(ftile, vec2(0, 0));
+ corners[5].xy = corners[4].xy = tile_to_ndc(ftile, vec2(1, 0));
+ corners[6].xy = corners[7].xy = tile_to_ndc(ftile, vec2(1, 1));
+ corners[2].xy = corners[3].xy = tile_to_ndc(ftile, vec2(0, 1));
+ corners[1].z = corners[5].z = corners[6].z = corners[2].z = -1.0;
+ corners[0].z = corners[4].z = corners[7].z = corners[3].z = 1.0;
+
+ for (int i = 0; i < 8; i++) {
+ /* Culling in view space for precision. */
+ corners[i] = project_point(ProjectionMatrixInverse, corners[i]);
+ }
+
+ bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ CullingTile tile;
+ tile.bounds = (is_persp) ? tile_bound_cone(corners[0], corners[4], corners[7], corners[3]) :
+ tile_bound_cylinder(corners[0], corners[4], corners[7], corners[3]);
+
+ tile.frustum = isect_data_setup(shape_frustum(corners));
+ return tile;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Intersection Tests
+ * \{ */
+
+bool intersect(CullingTile tile, Sphere sphere)
+{
+ bool isect = true;
+ /* Test tile intersection using bounding cone or bounding cylinder.
+ * This has less false positive cases when the sphere is large. */
+ if (ProjectionMatrix[3][3] == 0.0) {
+ isect = intersect(shape_cone(tile.bounds.xyz, tile.bounds.w), sphere);
+ }
+ else {
+ /* Simplify to a 2D circle test on the view Z axis plane. */
+ isect = intersect(shape_circle(tile.bounds.xy, tile.bounds.w),
+ shape_circle(sphere.center.xy, sphere.radius));
+ }
+ /* Refine using frustum test. If the sphere is small it avoids intersection
+ * with a neighbor tile. */
+ if (isect) {
+ isect = intersect(tile.frustum, sphere);
+ }
+ return isect;
+}
+
+bool intersect(CullingTile tile, Box bbox)
+{
+ return intersect(tile.frustum, bbox);
+}
+
+bool intersect(CullingTile tile, Pyramid pyramid)
+{
+ return intersect(tile.frustum, pyramid);
+}
+
+/** \} */
+
+void main()
+{
+ uint word_idx = gl_GlobalInvocationID.x % light_cull_buf.tile_word_len;
+ uint tile_idx = gl_GlobalInvocationID.x / light_cull_buf.tile_word_len;
+ uvec2 tile_co = uvec2(tile_idx % light_cull_buf.tile_x_len,
+ tile_idx / light_cull_buf.tile_x_len);
+
+ if (tile_co.y >= light_cull_buf.tile_y_len) {
+ return;
+ }
+
+ /* TODO(fclem): We could stop the tile at the HiZ depth. */
+ CullingTile tile = tile_culling_get(tile_co);
+
+ uint l_idx = word_idx * 32u;
+ uint l_end = min(l_idx + 32u, light_cull_buf.visible_count);
+ uint word = 0u;
+ for (; l_idx < l_end; l_idx++) {
+ LightData light = light_buf[l_idx];
+
+ /* Culling in view space for precision and simplicity. */
+ vec3 vP = transform_point(ViewMatrix, light._position);
+ vec3 v_right = transform_direction(ViewMatrix, light._right);
+ vec3 v_up = transform_direction(ViewMatrix, light._up);
+ vec3 v_back = transform_direction(ViewMatrix, light._back);
+ float radius = light.influence_radius_max;
+
+ Sphere sphere = shape_sphere(vP, radius);
+ bool intersect_tile = intersect(tile, sphere);
+
+ switch (light.type) {
+ case LIGHT_SPOT:
+ /* Only for < ~170° Cone due to plane extraction precision. */
+ if (light.spot_tan < 10.0) {
+ Pyramid pyramid = shape_pyramid_non_oblique(
+ vP,
+ vP - v_back * radius,
+ v_right * radius * light.spot_tan / light.spot_size_inv.x,
+ v_up * radius * light.spot_tan / light.spot_size_inv.y);
+ intersect_tile = intersect_tile && intersect(tile, pyramid);
+ break;
+ }
+ /* Fallthrough to the hemispheric case. */
+ case LIGHT_RECT:
+ case LIGHT_ELLIPSE:
+ vec3 v000 = vP - v_right * radius - v_up * radius;
+ vec3 v100 = v000 + v_right * (radius * 2.0);
+ vec3 v010 = v000 + v_up * (radius * 2.0);
+ vec3 v001 = v000 - v_back * radius;
+ Box bbox = shape_box(v000, v100, v010, v001);
+ intersect_tile = intersect_tile && intersect(tile, bbox);
+ default:
+ break;
+ }
+
+ if (intersect_tile) {
+ word |= 1u << (l_idx % 32u);
+ }
+ }
+
+ out_light_tile_buf[gl_GlobalInvocationID.x] = word;
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
new file mode 100644
index 00000000000..ae20153f26c
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl
@@ -0,0 +1,56 @@
+
+/**
+ * Create the Zbins from Z-sorted lights.
+ * Perform min-max operation in LDS memory for speed.
+ * For this reason, we only dispatch 1 thread group.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* Fits the limit of 32KB. */
+shared uint zbin_max[CULLING_ZBIN_COUNT];
+shared uint zbin_min[CULLING_ZBIN_COUNT];
+
+void main()
+{
+ const uint zbin_iter = CULLING_ZBIN_COUNT / gl_WorkGroupSize.x;
+ const uint zbin_local = gl_LocalInvocationID.x * zbin_iter;
+
+ uint src_index = gl_GlobalInvocationID.x;
+
+ for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+ zbin_max[l] = 0x0u;
+ zbin_min[l] = ~0x0u;
+ }
+ barrier();
+
+ uint light_iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x);
+ for (uint i = 0u; i < light_iter; i++) {
+ uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
+ if (index >= light_cull_buf.visible_count) {
+ continue;
+ }
+ vec3 P = light_buf[index]._position;
+ /* TODO(fclem): Could have better bounds for spot and area lights. */
+ float radius = light_buf[index].influence_radius_max;
+ float z_dist = dot(cameraForward, P) - dot(cameraForward, cameraPos);
+ int z_min = culling_z_to_zbin(
+ light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist + radius);
+ int z_max = culling_z_to_zbin(
+ light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist - radius);
+ z_min = clamp(z_min, 0, CULLING_ZBIN_COUNT - 1);
+ z_max = clamp(z_max, 0, CULLING_ZBIN_COUNT - 1);
+ /* Register to Z bins. */
+ for (int z = z_min; z <= z_max; z++) {
+ atomicMin(zbin_min[z], index);
+ atomicMax(zbin_max[z], index);
+ }
+ }
+ barrier();
+
+ /* Write result to zbins buffer. Pack min & max into 1 uint. */
+ for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) {
+ out_zbin_buf[l] = (zbin_max[l] << 16u) | (zbin_min[l] & 0xFFFFu);
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
new file mode 100644
index 00000000000..d4abdd43aa4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl
@@ -0,0 +1,129 @@
+
+/**
+ * The resources expected to be defined are:
+ * - light_buf
+ * - light_zbin_buf
+ * - light_cull_buf
+ * - light_tile_buf
+ * - shadow_atlas_tx
+ * - shadow_tilemaps_tx
+ * - sss_transmittance_tx
+ * - utility_tx
+ */
+
+#pragma BLENDER_REQUIRE(eevee_light_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
+
+/* TODO(fclem): We could reduce register pressure by only having static branches for sun lights. */
+void light_eval_ex(ClosureDiffuse diffuse,
+ ClosureReflection reflection,
+ const bool is_directional,
+ vec3 P,
+ vec3 V,
+ float vP_z,
+ float thickness,
+ vec4 ltc_mat,
+ uint l_idx,
+ inout vec3 out_diffuse,
+ inout vec3 out_specular)
+{
+ LightData light = light_buf[l_idx];
+ vec3 L;
+ float dist;
+ light_vector_get(light, P, L, dist);
+
+ float visibility = light_attenuation(light, L, dist);
+
+#if 0 /* TODO(fclem): Shadows */
+ if ((light.shadow_id != LIGHT_NO_SHADOW) && (visibility > 0.0)) {
+ vec3 lL = light_world_to_local(light, -L) * dist;
+
+ float shadow_delta = shadow_delta_get(
+ shadow_atlas_tx, shadow_tilemaps_tx, light, light.shadow_data, lL, dist, P);
+
+# ifdef SSS_TRANSMITTANCE
+ /* Transmittance evaluation first to use initial visibility. */
+ if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) {
+ float delta = max(thickness, shadow_delta);
+
+ vec3 intensity = visibility * light.transmit_power *
+ light_translucent(sss_transmittance_tx,
+ is_directional,
+ light,
+ diffuse.N,
+ L,
+ dist,
+ diffuse.sss_radius,
+ delta);
+ out_diffuse += light.color * intensity;
+ }
+# endif
+
+ visibility *= float(shadow_delta - light.shadow_data.bias <= 0.0);
+ }
+#endif
+
+ if (visibility < 1e-6) {
+ return;
+ }
+
+ if (light.diffuse_power > 0.0) {
+ float intensity = visibility * light.diffuse_power *
+ light_diffuse(utility_tx, is_directional, light, diffuse.N, V, L, dist);
+ out_diffuse += light.color * intensity;
+ }
+
+ if (light.specular_power > 0.0) {
+ float intensity = visibility * light.specular_power *
+ light_ltc(
+ utility_tx, is_directional, light, reflection.N, V, L, dist, ltc_mat);
+ out_specular += light.color * intensity;
+ }
+}
+
+void light_eval(ClosureDiffuse diffuse,
+ ClosureReflection reflection,
+ vec3 P,
+ vec3 V,
+ float vP_z,
+ float thickness,
+ inout vec3 out_diffuse,
+ inout vec3 out_specular)
+{
+ vec2 uv = vec2(reflection.roughness, safe_sqrt(1.0 - dot(reflection.N, V)));
+ uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+ vec4 ltc_mat = utility_tx_sample(utility_tx, uv, UTIL_LTC_MAT_LAYER);
+
+ LIGHT_FOREACH_BEGIN_DIRECTIONAL(light_cull_buf, l_idx)
+ {
+ light_eval_ex(diffuse,
+ reflection,
+ true,
+ P,
+ V,
+ vP_z,
+ thickness,
+ ltc_mat,
+ l_idx,
+ out_diffuse,
+ out_specular);
+ }
+ LIGHT_FOREACH_END
+
+ vec2 px = gl_FragCoord.xy;
+ LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx)
+ {
+ light_eval_ex(diffuse,
+ reflection,
+ false,
+ P,
+ V,
+ vP_z,
+ thickness,
+ ltc_mat,
+ l_idx,
+ out_diffuse,
+ out_specular);
+ }
+ LIGHT_FOREACH_END
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
new file mode 100644
index 00000000000..22a5f98e6c3
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl
@@ -0,0 +1,72 @@
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max)
+{
+ uint word_start = word_index * 32u;
+ uint word_end = word_start + 31u;
+ uint local_min = max(zbin_min, word_start);
+ uint local_max = min(zbin_max, word_end);
+ uint mask_width = local_max - local_min + 1;
+ return bit_field_mask(mask_width, local_min);
+}
+
+int culling_z_to_zbin(float scale, float bias, float z)
+{
+ return int(z * scale + bias);
+}
+
+/* Waiting to implement extensions support. We need:
+ * - GL_KHR_shader_subgroup_ballot
+ * - GL_KHR_shader_subgroup_arithmetic
+ * or
+ * - Vulkan 1.1
+ */
+#if 1
+# define subgroupMin(a) a
+# define subgroupMax(a) a
+# define subgroupOr(a) a
+# define subgroupBroadcastFirst(a) a
+#endif
+
+#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \
+ { \
+ { \
+ for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) {
+
+#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \
+ { \
+ uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \
+ uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \
+ _culling.tile_word_len; \
+ int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \
+ zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \
+ uint zbin_data = _zbins[zbin_index]; \
+ uint min_index = zbin_data & 0xFFFFu; \
+ uint max_index = zbin_data >> 16u; \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \
+ max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \
+ /* Same as divide by 32 but avoid interger division. */ \
+ uint word_min = min_index >> 5u; \
+ uint word_max = max_index >> 5u; \
+ for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \
+ uint word = _words[tile_word_offset + word_idx]; \
+ word &= zbin_mask(word_idx, min_index, max_index); \
+ /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \
+ word = subgroupBroadcastFirst(subgroupOr(word)); \
+ int bit_index; \
+ while ((bit_index = findLSB(word)) != -1) { \
+ word &= ~1u << uint(bit_index); \
+ uint _item_index = word_idx * 32u + bit_index;
+
+/* No culling. Iterate over all items. */
+#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \
+ { \
+ { \
+ for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) {
+
+#define LIGHT_FOREACH_END \
+ } \
+ } \
+ }
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
new file mode 100644
index 00000000000..58608f6e1f0
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl
@@ -0,0 +1,209 @@
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_ltc_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Light Functions
+ * \{ */
+
+void light_vector_get(LightData ld, vec3 P, out vec3 L, out float dist)
+{
+ if (ld.type == LIGHT_SUN) {
+ L = ld._back;
+ dist = 1.0;
+ }
+ else {
+ L = ld._position - P;
+ dist = inversesqrt(len_squared(L));
+ L *= dist;
+ dist = 1.0 / dist;
+ }
+}
+
+/* Rotate vector to light's local space. Does not translate. */
+vec3 light_world_to_local(LightData ld, vec3 L)
+{
+ /* Avoid relying on compiler to optimize this.
+ * vec3 lL = transpose(mat3(ld.object_mat)) * L; */
+ vec3 lL;
+ lL.x = dot(ld.object_mat[0].xyz, L);
+ lL.y = dot(ld.object_mat[1].xyz, L);
+ lL.z = dot(ld.object_mat[2].xyz, L);
+ return lL;
+}
+
+/* From Frostbite PBR Course
+ * Distance based attenuation
+ * http://www.frostbite.com/wp-content/uploads/2014/11/course_notes_moving_frostbite_to_pbr.pdf */
+float light_influence_attenuation(float dist, float inv_sqr_influence)
+{
+ float factor = sqr(dist) * inv_sqr_influence;
+ float fac = saturate(1.0 - sqr(factor));
+ return sqr(fac);
+}
+
+float light_spot_attenuation(LightData ld, vec3 L)
+{
+ vec3 lL = light_world_to_local(ld, L);
+ float ellipse = inversesqrt(1.0 + len_squared(lL.xy * ld.spot_size_inv / lL.z));
+ float spotmask = smoothstep(0.0, 1.0, ellipse * ld._spot_mul + ld._spot_bias);
+ return spotmask;
+}
+
+float light_attenuation(LightData ld, vec3 L, float dist)
+{
+ float vis = 1.0;
+ if (ld.type == LIGHT_SPOT) {
+ vis *= light_spot_attenuation(ld, L);
+ }
+ if (ld.type >= LIGHT_SPOT) {
+ vis *= step(0.0, -dot(L, -ld._back));
+ }
+ if (ld.type != LIGHT_SUN) {
+#ifdef VOLUME_LIGHTING
+ vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_volume);
+#else
+ vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_surface);
+#endif
+ }
+ return vis;
+}
+
+/* Cheaper alternative than evaluating the LTC.
+ * The result needs to be multiplied by BSDF or Phase Function. */
+float light_point_light(LightData ld, const bool is_directional, vec3 L, float dist)
+{
+ if (is_directional) {
+ return 1.0;
+ }
+ /**
+ * Using "Point Light Attenuation Without Singularity" from Cem Yuksel
+ * http://www.cemyuksel.com/research/pointlightattenuation/pointlightattenuation.pdf
+ * http://www.cemyuksel.com/research/pointlightattenuation/
+ **/
+ float d_sqr = sqr(dist);
+ float r_sqr = ld.radius_squared;
+ /* Using reformulation that has better numerical percision. */
+ float power = 2.0 / (d_sqr + r_sqr + dist * sqrt(d_sqr + r_sqr));
+
+ if (is_area_light(ld.type)) {
+ /* Modulate by light plane orientation / solid angle. */
+ power *= saturate(dot(ld._back, L));
+ }
+ return power;
+}
+
+float light_diffuse(sampler2DArray utility_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 V,
+ vec3 L,
+ float dist)
+{
+ if (is_directional || !is_area_light(ld.type)) {
+ float radius = ld._radius / dist;
+ return ltc_evaluate_disk_simple(utility_tx, radius, dot(N, L));
+ }
+ else if (ld.type == LIGHT_RECT) {
+ vec3 corners[4];
+ corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+ corners[2] = -corners[0];
+ corners[3] = -corners[1];
+
+ corners[0] = normalize(L * dist + corners[0]);
+ corners[1] = normalize(L * dist + corners[1]);
+ corners[2] = normalize(L * dist + corners[2]);
+ corners[3] = normalize(L * dist + corners[3]);
+
+ return ltc_evaluate_quad(utility_tx, corners, N);
+ }
+ else /* (ld.type == LIGHT_ELLIPSE) */ {
+ vec3 points[3];
+ points[0] = ld._right * -ld._area_size_x + ld._up * -ld._area_size_y;
+ points[1] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ points[2] = -points[0];
+
+ points[0] += L * dist;
+ points[1] += L * dist;
+ points[2] += L * dist;
+
+ return ltc_evaluate_disk(utility_tx, N, V, mat3(1.0), points);
+ }
+}
+
+float light_ltc(sampler2DArray utility_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 V,
+ vec3 L,
+ float dist,
+ vec4 ltc_mat)
+{
+ if (is_directional || ld.type != LIGHT_RECT) {
+ vec3 Px = ld._right;
+ vec3 Py = ld._up;
+
+ if (is_directional || !is_area_light(ld.type)) {
+ make_orthonormal_basis(L, Px, Py);
+ }
+
+ vec3 points[3];
+ points[0] = Px * -ld._area_size_x + Py * -ld._area_size_y;
+ points[1] = Px * ld._area_size_x + Py * -ld._area_size_y;
+ points[2] = -points[0];
+
+ points[0] += L * dist;
+ points[1] += L * dist;
+ points[2] += L * dist;
+
+ return ltc_evaluate_disk(utility_tx, N, V, ltc_matrix(ltc_mat), points);
+ }
+ else {
+ vec3 corners[4];
+ corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y;
+ corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y;
+ corners[2] = -corners[0];
+ corners[3] = -corners[1];
+
+ corners[0] += L * dist;
+ corners[1] += L * dist;
+ corners[2] += L * dist;
+ corners[3] += L * dist;
+
+ ltc_transform_quad(N, V, ltc_matrix(ltc_mat), corners);
+
+ return ltc_evaluate_quad(utility_tx, corners, vec3(0.0, 0.0, 1.0));
+ }
+}
+
+vec3 light_translucent(sampler1D transmittance_tx,
+ const bool is_directional,
+ LightData ld,
+ vec3 N,
+ vec3 L,
+ float dist,
+ vec3 sss_radius,
+ float delta)
+{
+ /* TODO(fclem): We should compute the power at the entry point. */
+ /* NOTE(fclem): we compute the light attenuation using the light vector but the transmittance
+ * using the shadow depth delta. */
+ float power = light_point_light(ld, is_directional, L, dist);
+ /* Do not add more energy on front faces. Also apply lambertian BSDF. */
+ power *= max(0.0, dot(-N, L)) * M_1_PI;
+
+ sss_radius *= SSS_TRANSMIT_LUT_RADIUS;
+ vec3 channels_co = saturate(delta / sss_radius) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS;
+
+ vec3 translucency;
+ translucency.x = (sss_radius.x > 0.0) ? texture(transmittance_tx, channels_co.x).r : 0.0;
+ translucency.y = (sss_radius.y > 0.0) ? texture(transmittance_tx, channels_co.y).r : 0.0;
+ translucency.z = (sss_radius.z > 0.0) ? texture(transmittance_tx, channels_co.z).r : 0.0;
+ return translucency * power;
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
new file mode 100644
index 00000000000..57e92b0b9b4
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl
@@ -0,0 +1,299 @@
+
+/**
+ * Adapted from :
+ * Real-Time Polygonal-Light Shading with Linearly Transformed Cosines.
+ * Eric Heitz, Jonathan Dupuy, Stephen Hill and David Neubelt.
+ * ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2016) 35(4), 2016.
+ * Project page: https://eheitzresearch.wordpress.com/415-2/
+ */
+
+/* Diffuse *clipped* sphere integral. */
+float ltc_diffuse_sphere_integral(sampler2DArray utility_tx, float avg_dir_z, float form_factor)
+{
+#if 1
+ /* use tabulated horizon-clipped sphere */
+ vec2 uv = vec2(avg_dir_z * 0.5 + 0.5, form_factor);
+ uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS;
+
+ return texture(utility_tx, vec3(uv, UTIL_DISK_INTEGRAL_LAYER))[UTIL_DISK_INTEGRAL_COMP];
+#else
+ /* Cheap approximation. Less smooth and have energy issues. */
+ return max((form_factor * form_factor + avg_dir_z) / (form_factor + 1.0), 0.0);
+#endif
+}
+
+/**
+ * An extended version of the implementation from
+ * "How to solve a cubic equation, revisited"
+ * http://momentsingraphics.de/?p=105
+ */
+vec3 ltc_solve_cubic(vec4 coefs)
+{
+ /* Normalize the polynomial */
+ coefs.xyz /= coefs.w;
+ /* Divide middle coefficients by three */
+ coefs.yz /= 3.0;
+
+ float A = coefs.w;
+ float B = coefs.z;
+ float C = coefs.y;
+ float D = coefs.x;
+
+ /* Compute the Hessian and the discriminant */
+ vec3 delta = vec3(-coefs.zy * coefs.zz + coefs.yx, dot(vec2(coefs.z, -coefs.y), coefs.xy));
+
+ /* Discriminant */
+ float discr = dot(vec2(4.0 * delta.x, -delta.y), delta.zy);
+
+ /* Clamping avoid NaN output on some platform. (see T67060) */
+ float sqrt_discr = sqrt(clamp(discr, 0.0, FLT_MAX));
+
+ vec2 xlc, xsc;
+
+ /* Algorithm A */
+ {
+ float A_a = 1.0;
+ float C_a = delta.x;
+ float D_a = -2.0 * B * delta.x + delta.y;
+
+ /* Take the cubic root of a normalized complex number */
+ float theta = atan(sqrt_discr, -D_a) / 3.0;
+
+ float _2_sqrt_C_a = 2.0 * sqrt(-C_a);
+ float x_1a = _2_sqrt_C_a * cos(theta);
+ float x_3a = _2_sqrt_C_a * cos(theta + (2.0 / 3.0) * M_PI);
+
+ float xl;
+ if ((x_1a + x_3a) > 2.0 * B) {
+ xl = x_1a;
+ }
+ else {
+ xl = x_3a;
+ }
+
+ xlc = vec2(xl - B, A);
+ }
+
+ /* Algorithm D */
+ {
+ float A_d = D;
+ float C_d = delta.z;
+ float D_d = -D * delta.y + 2.0 * C * delta.z;
+
+ /* Take the cubic root of a normalized complex number */
+ float theta = atan(D * sqrt_discr, -D_d) / 3.0;
+
+ float _2_sqrt_C_d = 2.0 * sqrt(-C_d);
+ float x_1d = _2_sqrt_C_d * cos(theta);
+ float x_3d = _2_sqrt_C_d * cos(theta + (2.0 / 3.0) * M_PI);
+
+ float xs;
+ if (x_1d + x_3d < 2.0 * C) {
+ xs = x_1d;
+ }
+ else {
+ xs = x_3d;
+ }
+
+ xsc = vec2(-D, xs + C);
+ }
+
+ float E = xlc.y * xsc.y;
+ float F = -xlc.x * xsc.y - xlc.y * xsc.x;
+ float G = xlc.x * xsc.x;
+
+ vec2 xmc = vec2(C * F - B * G, -B * F + C * E);
+
+ vec3 root = vec3(xsc.x / xsc.y, xmc.x / xmc.y, xlc.x / xlc.y);
+
+ if (root.x < root.y && root.x < root.z) {
+ root.xyz = root.yxz;
+ }
+ else if (root.z < root.x && root.z < root.y) {
+ root.xyz = root.xzy;
+ }
+
+ return root;
+}
+
+/* from Real-Time Area Lighting: a Journey from Research to Production
+ * Stephen Hill and Eric Heitz */
+vec3 ltc_edge_integral_vec(vec3 v1, vec3 v2)
+{
+ float x = dot(v1, v2);
+ float y = abs(x);
+
+ float a = 0.8543985 + (0.4965155 + 0.0145206 * y) * y;
+ float b = 3.4175940 + (4.1616724 + y) * y;
+ float v = a / b;
+
+ float theta_sintheta = (x > 0.0) ? v : 0.5 * inversesqrt(max(1.0 - x * x, 1e-7)) - v;
+
+ return cross(v1, v2) * theta_sintheta;
+}
+
+mat3 ltc_matrix(vec4 lut)
+{
+ /* Load inverse matrix. */
+ return mat3(vec3(lut.x, 0, lut.y), vec3(0, 1, 0), vec3(lut.z, 0, lut.w));
+}
+
+void ltc_transform_quad(vec3 N, vec3 V, mat3 Minv, inout vec3 corners[4])
+{
+ /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+ V = normalize(V + 1e-8);
+
+ /* Construct orthonormal basis around N. */
+ vec3 T1, T2;
+ T1 = normalize(V - N * dot(N, V));
+ T2 = cross(N, T1);
+
+ /* Rotate area light in (T1, T2, R) basis. */
+ Minv = Minv * transpose(mat3(T1, T2, N));
+
+ /* Apply LTC inverse matrix. */
+ corners[0] = normalize(Minv * corners[0]);
+ corners[1] = normalize(Minv * corners[1]);
+ corners[2] = normalize(Minv * corners[2]);
+ corners[3] = normalize(Minv * corners[3]);
+}
+
+/* If corners have already pass through ltc_transform_quad(),
+ * then N **MUST** be vec3(0.0, 0.0, 1.0), corresponding to the Up axis of the shading basis. */
+float ltc_evaluate_quad(sampler2DArray utility_tx, vec3 corners[4], vec3 N)
+{
+ /* Approximation using a sphere of the same solid angle than the quad.
+ * Finding the clipped sphere diffuse integral is easier than clipping the quad. */
+ vec3 avg_dir;
+ avg_dir = ltc_edge_integral_vec(corners[0], corners[1]);
+ avg_dir += ltc_edge_integral_vec(corners[1], corners[2]);
+ avg_dir += ltc_edge_integral_vec(corners[2], corners[3]);
+ avg_dir += ltc_edge_integral_vec(corners[3], corners[0]);
+
+ float form_factor = length(avg_dir);
+ float avg_dir_z = dot(N, avg_dir / form_factor);
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir_z, form_factor);
+}
+
+/* If disk does not need to be transformed and is already front facing. */
+float ltc_evaluate_disk_simple(sampler2DArray utility_tx, float disk_radius, float NL)
+{
+ float r_sqr = disk_radius * disk_radius;
+ float one_r_sqr = 1.0 + r_sqr;
+ float form_factor = r_sqr * inversesqrt(one_r_sqr * one_r_sqr);
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, NL, form_factor);
+}
+
+/* disk_points are WS vectors from the shading point to the disk "bounding domain" */
+float ltc_evaluate_disk(sampler2DArray utility_tx, vec3 N, vec3 V, mat3 Minv, vec3 disk_points[3])
+{
+ /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */
+ V = normalize(V + 1e-8);
+
+ /* construct orthonormal basis around N */
+ vec3 T1, T2;
+ T1 = normalize(V - N * dot(V, N));
+ T2 = cross(N, T1);
+
+ /* rotate area light in (T1, T2, R) basis */
+ mat3 R = transpose(mat3(T1, T2, N));
+
+ /* Intermediate step: init ellipse. */
+ vec3 L_[3];
+ L_[0] = mul(R, disk_points[0]);
+ L_[1] = mul(R, disk_points[1]);
+ L_[2] = mul(R, disk_points[2]);
+
+ vec3 C = 0.5 * (L_[0] + L_[2]);
+ vec3 V1 = 0.5 * (L_[1] - L_[2]);
+ vec3 V2 = 0.5 * (L_[1] - L_[0]);
+
+ /* Transform ellipse by Minv. */
+ C = Minv * C;
+ V1 = Minv * V1;
+ V2 = Minv * V2;
+
+ /* Compute eigenvectors of new ellipse. */
+
+ float d11 = dot(V1, V1);
+ float d22 = dot(V2, V2);
+ float d12 = dot(V1, V2);
+ float a, b; /* Eigenvalues */
+ const float threshold = 0.0007; /* Can be adjusted. Fix artifacts. */
+ if (abs(d12) / sqrt(d11 * d22) > threshold) {
+ float tr = d11 + d22;
+ float det = -d12 * d12 + d11 * d22;
+
+ /* use sqrt matrix to solve for eigenvalues */
+ det = sqrt(det);
+ float u = 0.5 * sqrt(tr - 2.0 * det);
+ float v = 0.5 * sqrt(tr + 2.0 * det);
+ float e_max = (u + v);
+ float e_min = (u - v);
+ e_max *= e_max;
+ e_min *= e_min;
+
+ vec3 V1_, V2_;
+ if (d11 > d22) {
+ V1_ = d12 * V1 + (e_max - d11) * V2;
+ V2_ = d12 * V1 + (e_min - d11) * V2;
+ }
+ else {
+ V1_ = d12 * V2 + (e_max - d22) * V1;
+ V2_ = d12 * V2 + (e_min - d22) * V1;
+ }
+
+ a = 1.0 / e_max;
+ b = 1.0 / e_min;
+ V1 = normalize(V1_);
+ V2 = normalize(V2_);
+ }
+ else {
+ a = 1.0 / d11;
+ b = 1.0 / d22;
+ V1 *= sqrt(a);
+ V2 *= sqrt(b);
+ }
+
+ /* Now find front facing ellipse with same solid angle. */
+
+ vec3 V3 = normalize(cross(V1, V2));
+ if (dot(C, V3) < 0.0) {
+ V3 *= -1.0;
+ }
+
+ float L = dot(V3, C);
+ float inv_L = 1.0 / L;
+ float x0 = dot(V1, C) * inv_L;
+ float y0 = dot(V2, C) * inv_L;
+
+ float L_sqr = L * L;
+ a *= L_sqr;
+ b *= L_sqr;
+
+ float t = 1.0 + x0 * x0;
+ float c0 = a * b;
+ float c1 = c0 * (t + y0 * y0) - a - b;
+ float c2 = (1.0 - a * t) - b * (1.0 + y0 * y0);
+ float c3 = 1.0;
+
+ vec3 roots = ltc_solve_cubic(vec4(c0, c1, c2, c3));
+ float e1 = roots.x;
+ float e2 = roots.y;
+ float e3 = roots.z;
+
+ vec3 avg_dir = vec3(a * x0 / (a - e2), b * y0 / (b - e2), 1.0);
+
+ mat3 rotate = mat3(V1, V2, V3);
+
+ avg_dir = rotate * avg_dir;
+ avg_dir = normalize(avg_dir);
+
+ /* L1, L2 are the extends of the front facing ellipse. */
+ float L1 = sqrt(-e2 / e3);
+ float L2 = sqrt(-e2 / e1);
+
+ /* Find the sphere and compute lighting. */
+ float form_factor = max(0.0, L1 * L2 * inversesqrt((1.0 + L1 * L1) * (1.0 + L2 * L2)));
+ return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir.z, form_factor);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
new file mode 100644
index 00000000000..07139ea6a09
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
@@ -0,0 +1,115 @@
+
+/**
+ * Dilate motion vector tiles until we covered maximum velocity.
+ * Outputs the largest intersecting motion vector in the neighborhood.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+#define DEBUG_BYPASS_DILATION 0
+
+struct MotionRect {
+ ivec2 bottom_left;
+ ivec2 extent;
+};
+
+MotionRect compute_motion_rect(ivec2 tile, vec2 motion)
+{
+#if DEBUG_BYPASS_DILATION
+ return MotionRect(tile, ivec2(1));
+#endif
+ /* Ceil to number of tile touched. */
+ ivec2 point1 = tile + ivec2(sign(motion) * ceil(abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
+ ivec2 point2 = tile;
+
+ ivec2 max_point = max(point1, point2);
+ ivec2 min_point = min(point1, point2);
+ /* Clamp to bounds. */
+ max_point = min(max_point, imageSize(in_tiles_img) - 1);
+ min_point = max(min_point, ivec2(0));
+
+ MotionRect rect;
+ rect.bottom_left = min_point;
+ rect.extent = 1 + max_point - min_point;
+ return rect;
+}
+
+struct MotionLine {
+ /** Origin of the line. */
+ vec2 origin;
+ /** Normal to the line direction. */
+ vec2 normal;
+};
+
+MotionLine compute_motion_line(ivec2 tile, vec2 motion)
+{
+ vec2 dir = safe_normalize(motion);
+
+ MotionLine line;
+ line.origin = vec2(tile);
+ /* Rotate 90° Counter-Clockwise. */
+ line.normal = vec2(-dir.y, dir.x);
+ return line;
+}
+
+bool is_inside_motion_line(ivec2 tile, MotionLine motion_line)
+{
+#if DEBUG_BYPASS_DILATION
+ return true;
+#endif
+ /* NOTE: Everything in is tile unit. */
+ float dist = point_line_projection_dist(vec2(tile), motion_line.origin, motion_line.normal);
+ /* In order to be conservative and for simplicity, we use the tiles bounding circles.
+ * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
+ return abs(dist) < M_SQRT2;
+}
+
+void main()
+{
+ ivec2 src_tile = ivec2(gl_GlobalInvocationID.xy);
+ if (any(greaterThanEqual(src_tile, imageSize(in_tiles_img)))) {
+ return;
+ }
+
+ vec4 max_motion = imageLoad(in_tiles_img, src_tile);
+
+ MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
+ MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+ if (true) {
+ /* Rectangular area (in tiles) where the motion vector spreads. */
+ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
+ MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy);
+ /* Do a conservative rasterization of the line of the motion vector line. */
+ for (int x = 0; x < motion_rect.extent.x; x++) {
+ for (int y = 0; y < motion_rect.extent.y; y++) {
+ ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+ if (is_inside_motion_line(tile, motion_line)) {
+ motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+ /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+ * the motion next so that weighting in gather pass is better. */
+ motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+ }
+ }
+ }
+ }
+
+ if (true) {
+ MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+ /* Rectangular area (in tiles) where the motion vector spreads. */
+ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
+ MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
+ /* Do a conservative rasterization of the line of the motion vector line. */
+ for (int x = 0; x < motion_rect.extent.x; x++) {
+ for (int y = 0; y < motion_rect.extent.y; y++) {
+ ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
+ if (is_inside_motion_line(tile, motion_line)) {
+ motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+ /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
+ * the motion next so that weighting in gather pass is better. */
+ motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+ }
+ }
+ }
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
new file mode 100644
index 00000000000..cbbeea25d20
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
@@ -0,0 +1,103 @@
+
+/**
+ * Shaders that down-sample velocity buffer into squared tile of MB_TILE_DIVISOR pixels wide.
+ * Outputs the largest motion vector in the tile area.
+ * Also perform velocity resolve to speedup the convolution pass.
+ *
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * Adapted from G3D Innovation Engine implementation.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+
+shared uint payload_prev;
+shared uint payload_next;
+shared vec2 max_motion_prev;
+shared vec2 max_motion_next;
+
+/* Store velocity magnitude in the MSB and thread id in the LSB. */
+uint pack_payload(vec2 motion, uvec2 thread_id)
+{
+ /* NOTE: We clamp max velocity to 16k pixels. */
+ return (min(uint(ceil(length(motion))), 0xFFFFu) << 16u) | (thread_id.y << 8) | thread_id.x;
+}
+
+/* Return thread index from the payload. */
+uvec2 unpack_payload(uint payload)
+{
+ return uvec2(payload & 0xFFu, (payload >> 8) & 0xFFu);
+}
+
+void main()
+{
+ if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+ payload_prev = 0u;
+ payload_next = 0u;
+ }
+ barrier();
+
+ uint local_payload_prev = 0u;
+ uint local_payload_next = 0u;
+ vec2 local_max_motion_prev;
+ vec2 local_max_motion_next;
+
+ ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(velocity_img) - 1);
+
+ vec2 render_size = vec2(imageSize(velocity_img).xy);
+ vec2 uv = (vec2(texel) + 0.5) / render_size;
+ float depth = texelFetch(depth_tx, texel, 0).r;
+ vec4 motion = velocity_resolve(imageLoad(velocity_img, texel), uv, depth);
+#ifdef FLATTEN_VIEWPORT
+ /* imageLoad does not perform the swizzling like sampler does. Do it manually. */
+ motion = motion.xyxy;
+#endif
+
+ /* Store resolved velocity to speedup the gather pass. Out of bounds writes are ignored.
+ * Unfortunately, we cannot convert to pixel space here since it is also used by TAA and the
+ * motion blur needs to remain optional. */
+ imageStore(velocity_img, ivec2(gl_GlobalInvocationID.xy), velocity_pack(motion));
+ /* Clip velocity to viewport bounds (in NDC space). */
+ vec2 line_clip;
+ line_clip.x = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, motion.xy * 2.0);
+ line_clip.y = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, -motion.zw * 2.0);
+ motion *= min(line_clip, vec2(1.0)).xxyy;
+ /* Convert to pixel space. Note this is only for velocity tiles. */
+ motion *= render_size.xyxy;
+ /* Rescale to shutter relative motion for viewport. */
+ motion *= motion_blur_buf.motion_scale.xxyy;
+
+ uint sample_payload_prev = pack_payload(motion.xy, gl_LocalInvocationID.xy);
+ if (local_payload_prev < sample_payload_prev) {
+ local_payload_prev = sample_payload_prev;
+ local_max_motion_prev = motion.xy;
+ }
+
+ uint sample_payload_next = pack_payload(motion.zw, gl_LocalInvocationID.xy);
+ if (local_payload_next < sample_payload_next) {
+ local_payload_next = sample_payload_next;
+ local_max_motion_next = motion.zw;
+ }
+
+ /* Compare the local payload with the other threads. */
+ atomicMax(payload_prev, local_payload_prev);
+ atomicMax(payload_next, local_payload_next);
+ barrier();
+
+ /* Need to broadcast the result to another thread in order to issue a unique write. */
+ if (all(equal(unpack_payload(payload_prev), gl_LocalInvocationID.xy))) {
+ max_motion_prev = local_max_motion_prev;
+ }
+ if (all(equal(unpack_payload(payload_next), gl_LocalInvocationID.xy))) {
+ max_motion_next = local_max_motion_next;
+ }
+ barrier();
+
+ if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
+ ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
+ imageStore(out_tiles_img, tile_co, vec4(max_motion_prev, max_motion_next));
+ }
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
new file mode 100644
index 00000000000..5249e6637b6
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
@@ -0,0 +1,221 @@
+
+/**
+ * Perform two gather blur in the 2 motion blur directions
+ * Based on:
+ * A Fast and Stable Feature-Aware Motion Blur Filter
+ * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai
+ *
+ * With modification from the presentation:
+ * Next Generation Post Processing in Call of Duty Advanced Warfare
+ * by Jorge Jimenez
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl)
+
+const int gather_sample_count = 8;
+
+/* Converts uv velocity into pixel space. Assumes velocity_tx is the same resolution as the
+ * target post-fx framebuffer. */
+vec4 motion_blur_sample_velocity(sampler2D velocity_tx, vec2 uv)
+{
+ /* We can load velocity without velocity_resolve() since we resolved during the flatten pass. */
+ vec4 velocity = velocity_unpack(texture(velocity_tx, uv));
+ return velocity * vec2(textureSize(velocity_tx, 0)).xyxy * motion_blur_buf.motion_scale.xxyy;
+}
+
+vec2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length)
+{
+ return saturate(vec2(center_motion_length, sample_motion_length) - offset_length + 1.0);
+}
+
+vec2 depth_compare(float center_depth, float sample_depth)
+{
+ vec2 depth_scale = vec2(-motion_blur_buf.depth_scale, motion_blur_buf.depth_scale);
+ return saturate(0.5 + depth_scale * (sample_depth - center_depth));
+}
+
+/* Kill contribution if not going the same direction. */
+float dir_compare(vec2 offset, vec2 sample_motion, float sample_motion_length)
+{
+ if (sample_motion_length < 0.5) {
+ return 1.0;
+ }
+ return (dot(offset, sample_motion) > 0.0) ? 1.0 : 0.0;
+}
+
+/* Return background (x) and foreground (y) weights. */
+vec2 sample_weights(float center_depth,
+ float sample_depth,
+ float center_motion_length,
+ float sample_motion_length,
+ float offset_length)
+{
+ /* Classify foreground/background. */
+ vec2 depth_weight = depth_compare(center_depth, sample_depth);
+ /* Weight if sample is overlapping or under the center pixel. */
+ vec2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
+ return depth_weight * spread_weight;
+}
+
+struct Accumulator {
+ vec4 fg;
+ vec4 bg;
+ /** x: Background, y: Foreground, z: dir. */
+ vec3 weight;
+};
+
+void gather_sample(vec2 screen_uv,
+ float center_depth,
+ float center_motion_len,
+ vec2 offset,
+ float offset_len,
+ const bool next,
+ inout Accumulator accum)
+{
+ vec2 sample_uv = screen_uv - offset * motion_blur_buf.target_size_inv;
+ vec4 sample_vectors = motion_blur_sample_velocity(velocity_tx, sample_uv);
+ vec2 sample_motion = (next) ? sample_vectors.zw : sample_vectors.xy;
+ float sample_motion_len = length(sample_motion);
+ float sample_depth = texture(depth_tx, sample_uv).r;
+ vec4 sample_color = textureLod(in_color_tx, sample_uv, 0.0);
+
+ sample_depth = get_view_z_from_depth(sample_depth);
+
+ vec3 weights;
+ weights.xy = sample_weights(
+ center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
+ weights.z = dir_compare(offset, sample_motion, sample_motion_len);
+ weights.xy *= weights.z;
+
+ accum.fg += sample_color * weights.y;
+ accum.bg += sample_color * weights.x;
+ accum.weight += weights;
+}
+
+void gather_blur(vec2 screen_uv,
+ vec2 center_motion,
+ float center_depth,
+ vec2 max_motion,
+ float ofs,
+ const bool next,
+ inout Accumulator accum)
+{
+ float center_motion_len = length(center_motion);
+ float max_motion_len = length(max_motion);
+
+ /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
+ * Fix this by overriding the max_motion. */
+ if (max_motion_len < center_motion_len) {
+ max_motion_len = center_motion_len;
+ max_motion = center_motion;
+ }
+
+ if (max_motion_len < 0.5) {
+ return;
+ }
+
+ int i;
+ float t, inc = 1.0 / float(gather_sample_count);
+ for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+ gather_sample(screen_uv,
+ center_depth,
+ center_motion_len,
+ max_motion * t,
+ max_motion_len * t,
+ next,
+ accum);
+ }
+
+ if (center_motion_len < 0.5) {
+ return;
+ }
+
+ for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) {
+ /* Also sample in center motion direction.
+ * Allow recovering motion where there is conflicting
+ * motion between foreground and background. */
+ gather_sample(screen_uv,
+ center_depth,
+ center_motion_len,
+ center_motion * t,
+ center_motion_len * t,
+ next,
+ accum);
+ }
+}
+
+void main()
+{
+ ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+ vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(depth_tx, 0).xy);
+
+ if (!in_texture_range(texel, depth_tx)) {
+ return;
+ }
+
+ /* Data of the center pixel of the gather (target). */
+ float center_depth = get_view_z_from_depth(texelFetch(depth_tx, texel, 0).r);
+ vec4 center_motion = motion_blur_sample_velocity(velocity_tx, uv);
+
+ vec4 center_color = textureLod(in_color_tx, uv, 0.0);
+
+ float noise_offset = sampling_rng_1D_get(SAMPLING_TIME);
+ /** TODO(fclem) Blue noise. */
+ vec2 rand = vec2(interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 0, noise_offset),
+ interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 1, noise_offset));
+
+ /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
+ * Note this randomize only in one direction but in practice it's enough. */
+ rand.x = rand.x * 2.0 - 1.0;
+ ivec2 tile = (texel + ivec2(rand.x * float(MOTION_BLUR_TILE_SIZE) * 0.25)) /
+ MOTION_BLUR_TILE_SIZE;
+ tile = clamp(tile, ivec2(0), imageSize(in_tiles_img) - 1);
+ /* NOTE: Tile velocity is already in pixel space and with correct zw sign. */
+ vec4 max_motion;
+ /* Load dilation result from the indirection table. */
+ ivec2 tile_prev;
+ motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
+ max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
+ ivec2 tile_next;
+ motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
+ max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
+
+ Accumulator accum;
+ accum.weight = vec3(0.0, 0.0, 1.0);
+ accum.bg = vec4(0.0);
+ accum.fg = vec4(0.0);
+ /* First linear gather. time = [T - delta, T] */
+ gather_blur(uv, center_motion.xy, center_depth, max_motion.xy, rand.y, false, accum);
+ /* Second linear gather. time = [T, T + delta] */
+ gather_blur(uv, center_motion.zw, center_depth, max_motion.zw, rand.y, true, accum);
+
+#if 1 /* Own addition. Not present in reference implementation. */
+ /* Avoid division by 0.0. */
+ float w = 1.0 / (50.0 * float(gather_sample_count) * 4.0);
+ accum.bg += center_color * w;
+ accum.weight.x += w;
+ /* NOTE: In Jimenez's presentation, they used center sample.
+ * We use background color as it contains more information for foreground
+ * elements that have not enough weights.
+ * Yield better blur in complex motion. */
+ center_color = accum.bg / accum.weight.x;
+#endif
+ /* Merge background. */
+ accum.fg += accum.bg;
+ accum.weight.y += accum.weight.x;
+ /* Balance accumulation for failed samples.
+ * We replace the missing foreground by the background. */
+ float blend_fac = saturate(1.0 - accum.weight.y / accum.weight.z);
+ vec4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
+
+#if 0 /* For debugging. */
+ out_color.rgb = out_color.ggg;
+ out_color.rg += max_motion.xy;
+#endif
+
+ imageStore(out_color_img, texel, out_color);
+}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
new file mode 100644
index 00000000000..436fd01795a
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl
@@ -0,0 +1,48 @@
+
+
+/* -------------------------------------------------------------------- */
+/** \name Tile indirection packing
+ * \{ */
+
+#define MotionPayload uint
+
+/* Store velocity magnitude in the MSB to be able to use it with atomicMax operations. */
+MotionPayload motion_blur_tile_indirection_pack_payload(vec2 motion, uvec2 payload)
+{
+ /* NOTE: Clamp to 16383 pixel velocity. After that, it is tile position that determine the tile
+ * to dilate over. */
+ uint velocity = min(uint(ceil(length(motion))), 0x3FFFu);
+ /* Designed for 512x512 tiles max. */
+ return (velocity << 18u) | ((payload.x & 0x1FFu) << 9u) | (payload.y & 0x1FFu);
+}
+
+/* Return thread index. */
+ivec2 motion_blur_tile_indirection_pack_payload(uint data)
+{
+ return ivec2((data >> 9u) & 0x1FFu, data & 0x1FFu);
+}
+
+uint motion_blur_tile_indirection_index(uint motion_step, uvec2 tile)
+{
+ uint index = tile.x;
+ index += tile.y * MOTION_BLUR_MAX_TILE;
+ index += motion_step * MOTION_BLUR_MAX_TILE * MOTION_BLUR_MAX_TILE;
+ return index;
+}
+
+#define MOTION_PREV 0u
+#define MOTION_NEXT 1u
+
+#define motion_blur_tile_indirection_store(table_, step_, tile, payload_) \
+ if (true) { \
+ uint index = motion_blur_tile_indirection_index(step_, tile); \
+ atomicMax(table_[index], payload_); \
+ }
+
+#define motion_blur_tile_indirection_load(table_, step_, tile_, result_) \
+ if (true) { \
+ uint index = motion_blur_tile_indirection_index(step_, tile_); \
+ result_ = motion_blur_tile_indirection_pack_payload(table_[index]); \
+ }
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
index 0ccf06a9e14..dd047709afd 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
@@ -39,6 +39,8 @@ bool closure_select(float weight, inout float total_weight, inout float r)
destination = candidate; \
}
+float g_closure_rand;
+
void closure_weights_reset()
{
g_diffuse_data.weight = 0.0;
@@ -58,18 +60,8 @@ void closure_weights_reset()
g_refraction_data.roughness = 0.0;
g_refraction_data.ior = 0.0;
- /* TEMP */
-#define P(x) ((x + 0.5) / 16.0)
- const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
- vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
- vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
- vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
-#undef P
#if defined(GPU_FRAGMENT_SHADER)
- ivec2 pix = ivec2(gl_FragCoord.xy) % ivec2(4);
- g_diffuse_rand = dither_mat4x4[pix.x][pix.y];
- g_reflection_rand = dither_mat4x4[pix.x][pix.y];
- g_refraction_rand = dither_mat4x4[pix.x][pix.y];
+ g_diffuse_rand = g_reflection_rand = g_refraction_rand = g_closure_rand;
#else
g_diffuse_rand = 0.0;
g_reflection_rand = 0.0;
@@ -245,6 +237,20 @@ float F_eta(float a, float b)
}
void output_aov(vec4 color, float value, uint hash)
{
+#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
+ for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
+ if (aov_buf.hash_color[i] == hash) {
+ imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
+ return;
+ }
+ }
+ for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
+ if (aov_buf.hash_value[i] == hash) {
+ imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
+ return;
+ }
+ }
+#endif
}
#ifdef EEVEE_MATERIAL_STUBS
@@ -255,6 +261,10 @@ void output_aov(vec4 color, float value, uint hash)
# define nodetree_thickness() 0.1
#endif
+#ifdef GPU_VERTEX_SHADER
+# define closure_to_rgba(a) vec4(0.0)
+#endif
+
/* -------------------------------------------------------------------- */
/** \name Fragment Displacement
*
@@ -359,3 +369,71 @@ vec3 coordinate_incoming(vec3 P)
}
/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Volume Attribute post
+ *
+ * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
+ * the engine side. But as of now, the engines are responsible for loading the attributes.
+ *
+ * \{ */
+
+#if defined(MAT_GEOM_VOLUME)
+
+float attr_load_temperature_post(float attr)
+{
+ /* Bring the into standard range without having to modify the grid values */
+ attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0;
+ return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+ /* Density is premultiplied for interpolation, divide it out here. */
+ attr.rgb *= safe_rcp(attr.a);
+ attr.rgb *= drw_volume.color_mul.rgb;
+ attr.a = 1.0;
+ return attr;
+}
+
+#else /* Noop for any other surface. */
+
+float attr_load_temperature_post(float attr)
+{
+ return attr;
+}
+vec4 attr_load_color_post(vec4 attr)
+{
+ return attr;
+}
+
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Uniform Attributes
+ *
+ * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on
+ * the engine side. But as of now, the engines are responsible for loading the attributes.
+ *
+ * \{ */
+
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash)
+{
+#if defined(OBATTR_LIB)
+ uint index = floatBitsToUint(ObjectAttributeStart);
+ for (uint i = 0; i < floatBitsToUint(ObjectAttributeLen); i++, index++) {
+ if (drw_attrs[index].hash_code == attr_hash) {
+ return vec4(drw_attrs[index].data_x,
+ drw_attrs[index].data_y,
+ drw_attrs[index].data_z,
+ drw_attrs[index].data_w);
+ }
+ }
+ return vec4(0.0);
+#else
+ return attr;
+#endif
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
new file mode 100644
index 00000000000..0eea4a5ff33
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl
@@ -0,0 +1,104 @@
+
+/**
+ * Sampling data accessors and random number generators.
+ * Also contains some sample mapping functions.
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+/* -------------------------------------------------------------------- */
+/** \name Sampling data.
+ *
+ * Return a random values from Low Discrepancy Sequence in [0..1) range.
+ * This value is uniform (constant) for the whole scene sample.
+ * You might want to couple it with a noise function.
+ * \{ */
+
+#ifdef EEVEE_SAMPLING_DATA
+
+float sampling_rng_1D_get(const eSamplingDimension dimension)
+{
+ return sampling_buf.dimensions[dimension];
+}
+
+vec2 sampling_rng_2D_get(const eSamplingDimension dimension)
+{
+ return vec2(sampling_buf.dimensions[dimension], sampling_buf.dimensions[dimension + 1u]);
+}
+
+vec3 sampling_rng_3D_get(const eSamplingDimension dimension)
+{
+ return vec3(sampling_buf.dimensions[dimension],
+ sampling_buf.dimensions[dimension + 1u],
+ sampling_buf.dimensions[dimension + 2u]);
+}
+
+#endif
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Random Number Generators.
+ * \{ */
+
+/* Interlieved gradient noise by Jorge Jimenez
+ * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+ * Seeding found by Epic Game. */
+float interlieved_gradient_noise(vec2 pixel, float seed, float offset)
+{
+ pixel += seed * (vec2(47, 17) * 0.695);
+ return fract(offset + 52.9829189 * fract(0.06711056 * pixel.x + 0.00583715 * pixel.y));
+}
+
+/* From: http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html */
+float van_der_corput_radical_inverse(uint bits)
+{
+#if 0 /* Reference */
+ bits = (bits << 16u) | (bits >> 16u);
+ bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+ bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+ bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+ bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+#else
+ bits = bitfieldReverse(bits);
+#endif
+ /* Same as dividing by 0x100000000. */
+ return float(bits) * 2.3283064365386963e-10;
+}
+
+vec2 hammersley_2d(float i, float sample_count)
+{
+ vec2 rand;
+ rand.x = i / sample_count;
+ rand.y = van_der_corput_radical_inverse(uint(i));
+ return rand;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Distribution mapping.
+ *
+ * Functions mapping input random numbers to sampling shapes (i.e: hemisphere).
+ * \{ */
+
+/* Given 2 random number in [0..1] range, return a random unit disk sample. */
+vec2 sample_disk(vec2 noise)
+{
+ float angle = noise.x * M_2PI;
+ return vec2(cos(angle), sin(angle)) * sqrt(noise.y);
+}
+
+/* This transform a 2d random sample (in [0..1] range) to a sample located on a cylinder of the
+ * same range. This is because the sampling functions expect such a random sample which is
+ * normally precomputed. */
+vec3 sample_cylinder(vec2 rand)
+{
+ float theta = rand.x;
+ float phi = (rand.y - 0.5) * M_2PI;
+ float cos_phi = cos(phi);
+ float sin_phi = sqrt(1.0 - sqr(cos_phi)) * sign(phi);
+ return vec3(theta, cos_phi, sin_phi);
+}
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
index 7ddf941df7c..183aac1e546 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
@@ -6,10 +6,23 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_surf_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
+vec4 closure_to_rgba(Closure cl)
+{
+ vec4 out_color;
+ out_color.rgb = g_emission;
+ out_color.a = saturate(1.0 - avg(g_transmittance));
+
+ /* Reset for the next closure tree. */
+ closure_weights_reset();
+
+ return out_color;
+}
+
/* From the paper "Hashed Alpha Testing" by Chris Wyman and Morgan McGuire. */
float hash(vec2 a)
{
@@ -61,8 +74,7 @@ void main()
nodetree_surface();
- // float noise_offset = sampling_rng_1D_get(sampling_buf, SAMPLING_TRANSPARENCY);
- float noise_offset = 0.5;
+ float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY);
float random_threshold = hashed_alpha_threshold(1.0, noise_offset, g_data.P);
float transparency = avg(g_transmittance);
@@ -72,14 +84,7 @@ void main()
#endif
#ifdef MAT_VELOCITY
- vec4 out_velocity_camera; /* TODO(fclem): Panoramic cameras. */
- velocity_camera(interp.P + motion.prev,
- interp.P,
- interp.P - motion.next,
- out_velocity_camera,
- out_velocity_view);
-
- /* For testing in viewport. */
- out_velocity_view.zw = vec2(0.0);
+ out_velocity = velocity_surface(interp.P + motion.prev, interp.P, interp.P + motion.next);
+ out_velocity = velocity_pack(out_velocity);
#endif
}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
index 143e88dbe68..39758c0dfc1 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl
@@ -5,35 +5,36 @@
* This is used by alpha blended materials and materials using Shader to RGB nodes.
**/
-#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_light_eval_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_surf_lib.glsl)
-float spec_light(ClosureReflection ref)
-{
- float gloss = saturate(1.0 - ref.roughness);
- float shininess = exp2(10.0 * gloss + 1.0);
- vec3 N = ref.N;
- vec3 L = vec3(0.0, 0.0, 1.0);
- vec3 H = normalize(L + cameraVec(g_data.P));
- float spec_angle = saturate(dot(N, H));
- float normalization_factor = shininess * 0.125 + 1.0;
- float spec_light = pow(spec_angle, shininess) * saturate(dot(N, L)) * normalization_factor;
- return spec_light;
-}
-
vec4 closure_to_rgba(Closure cl)
{
+ vec3 diffuse_light = vec3(0.0);
+ vec3 reflection_light = vec3(0.0);
+ vec3 refraction_light = vec3(0.0);
+
+ float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos);
+
+ light_eval(g_diffuse_data,
+ g_reflection_data,
+ g_data.P,
+ cameraVec(g_data.P),
+ vP_z,
+ 0.01 /* TODO(fclem) thickness. */,
+ diffuse_light,
+ reflection_light);
+
vec4 out_color;
out_color.rgb = g_emission;
- out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight *
- saturate(g_diffuse_data.N.z * 0.5 + 0.5);
- out_color.rgb += g_reflection_data.color * g_reflection_data.weight *
- spec_light(g_reflection_data);
- out_color.rgb += g_refraction_data.color * g_refraction_data.weight *
- saturate(g_refraction_data.N.z * 0.5 + 0.5);
+ out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight * diffuse_light;
+ out_color.rgb += g_reflection_data.color * g_reflection_data.weight * reflection_light;
+ out_color.rgb += g_refraction_data.color * g_refraction_data.weight * refraction_light;
out_color.a = saturate(1.0 - avg(g_transmittance));
@@ -47,27 +48,69 @@ void main()
{
init_globals();
+ float noise = utility_tx_fetch(utility_tx, gl_FragCoord.xy, UTIL_BLUE_NOISE_LAYER).r;
+ g_closure_rand = fract(noise + sampling_rng_1D_get(SAMPLING_CLOSURE));
+
fragment_displacement();
nodetree_surface();
g_holdout = saturate(g_holdout);
+ vec3 diffuse_light = vec3(0.0);
+ vec3 reflection_light = vec3(0.0);
+ vec3 refraction_light = vec3(0.0);
+
+ float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos);
+
+ light_eval(g_diffuse_data,
+ g_reflection_data,
+ g_data.P,
+ cameraVec(g_data.P),
+ vP_z,
+ 0.01 /* TODO(fclem) thickness. */,
+ diffuse_light,
+ reflection_light);
+
+ g_diffuse_data.color *= g_diffuse_data.weight;
+ g_reflection_data.color *= g_reflection_data.weight;
+ g_refraction_data.color *= g_refraction_data.weight;
+ diffuse_light *= step(1e-5, g_diffuse_data.weight);
+ reflection_light *= step(1e-5, g_reflection_data.weight);
+ refraction_light *= step(1e-5, g_refraction_data.weight);
+
out_radiance.rgb = g_emission;
- out_radiance.rgb += g_diffuse_data.color * g_diffuse_data.weight *
- saturate(g_diffuse_data.N.z * 0.5 + 0.5);
- out_radiance.rgb += g_reflection_data.color * g_reflection_data.weight *
- spec_light(g_reflection_data);
- out_radiance.rgb += g_refraction_data.color * g_refraction_data.weight *
- saturate(g_refraction_data.N.z * 0.5 + 0.5);
+ out_radiance.rgb += g_diffuse_data.color * diffuse_light;
+ out_radiance.rgb += g_reflection_data.color * reflection_light;
+ out_radiance.rgb += g_refraction_data.color * refraction_light;
out_radiance.a = 0.0;
+ vec3 specular_light = reflection_light + refraction_light;
+ vec3 specular_color = g_reflection_data.color + g_refraction_data.color;
+
+ /* TODO(fclem): This feels way too complex for what is it. */
+ bool has_any_bsdf_weight = g_diffuse_data.weight != 0.0 || g_reflection_data.weight != 0.0 ||
+ g_refraction_data.weight != 0.0;
+ vec3 out_normal = has_any_bsdf_weight ? vec3(0.0) : g_data.N;
+ out_normal += g_diffuse_data.N * g_diffuse_data.weight;
+ out_normal += g_reflection_data.N * g_reflection_data.weight;
+ out_normal += g_refraction_data.N * g_refraction_data.weight;
+ out_normal = safe_normalize(out_normal);
+
+#ifdef MAT_RENDER_PASS_SUPPORT
+ ivec2 out_texel = ivec2(gl_FragCoord.xy);
+ imageStore(rp_normal_img, out_texel, vec4(out_normal, 1.0));
+ imageStore(
+ rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(diffuse_light, 1.0));
+ imageStore(
+ rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(specular_light, 1.0));
+ imageStore(rp_diffuse_color_img, out_texel, vec4(g_diffuse_data.color, 1.0));
+ imageStore(rp_specular_color_img, out_texel, vec4(specular_color, 1.0));
+ imageStore(rp_emission_img, out_texel, vec4(g_emission, 1.0));
+#endif
+
out_radiance.rgb *= 1.0 - g_holdout;
out_transmittance.rgb = g_transmittance;
out_transmittance.a = saturate(avg(g_transmittance));
-
- /* Test */
- out_transmittance.a = 1.0 - out_transmittance.a;
- out_radiance.a = 1.0 - out_radiance.a;
}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
index 30b48edaa78..6c1fc818f41 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl
@@ -40,7 +40,7 @@ void init_globals_curves()
/* Shade as a cylinder. */
float cos_theta = interp.curves_time_width / interp.curves_thickness;
float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta));
- g_data.N = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta);
+ g_data.N = g_data.Ni = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta);
/* Costly, but follows cycles per pixel tangent space (not following curve shape). */
vec3 V = cameraVec(g_data.P);
@@ -60,13 +60,14 @@ void init_globals_curves()
void init_globals_gpencil()
{
/* Undo backface flip as the gpencil normal is already pointing towards the camera. */
- g_data.N = interp.N;
+ g_data.N = g_data.Ni = interp.N;
}
void init_globals()
{
/* Default values. */
g_data.P = interp.P;
+ g_data.Ni = interp.N;
g_data.N = safe_normalize(interp.N);
g_data.Ng = g_data.N;
g_data.is_strand = false;
@@ -81,6 +82,7 @@ void init_globals()
#ifdef GPU_FRAGMENT_SHADER
g_data.N = (FrontFacing) ? g_data.N : -g_data.N;
+ g_data.Ni = (FrontFacing) ? g_data.Ni : -g_data.Ni;
g_data.Ng = safe_normalize(cross(dFdx(g_data.P), dFdy(g_data.P)));
#endif
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
index ac657afc922..1ef1c1f84b8 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl
@@ -24,6 +24,19 @@ void main()
g_holdout = saturate(g_holdout);
+ ivec2 out_texel = ivec2(gl_FragCoord.xy);
+ imageStore(rp_normal_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+ imageStore(
+ rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(0.0, 0.0, 0.0, 1.0));
+ imageStore(
+ rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(0.0, 0.0, 0.0, 1.0));
+ imageStore(rp_diffuse_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+ imageStore(rp_specular_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+ imageStore(rp_emission_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0));
+
out_background.rgb = safe_color(g_emission) * (1.0 - g_holdout);
out_background.a = saturate(avg(g_transmittance)) * g_holdout;
+
+ /* World opacity. */
+ out_background = mix(vec4(0.0, 0.0, 0.0, 1.0), out_background, world_opacity_fade);
}
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
index 435ae6658c9..8d02609fedc 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl
@@ -2,23 +2,38 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_camera_lib.glsl)
+vec4 velocity_pack(vec4 data)
+{
+ return data * 0.01;
+}
+
+vec4 velocity_unpack(vec4 data)
+{
+ return data * 100.0;
+}
+
#ifdef VELOCITY_CAMERA
/**
* Given a triple of position, compute the previous and next motion vectors.
- * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy)
+ * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy).
*/
-vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next)
+vec4 velocity_surface(vec3 P_prv, vec3 P, vec3 P_nxt)
{
- vec2 prev_uv, curr_uv, next_uv;
-
- prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy;
- curr_uv = transform_point(ViewProjectionMatrix, P).xy;
- next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy;
-
- vec4 motion;
- motion.xy = prev_uv - curr_uv;
- motion.zw = curr_uv - next_uv;
+ /* NOTE: We don't use the drw_view.persmat to avoid adding the TAA jitter to the velocity. */
+ vec2 prev_uv = project_point(camera_prev.persmat, P_prv).xy;
+ vec2 curr_uv = project_point(camera_curr.persmat, P).xy;
+ vec2 next_uv = project_point(camera_next.persmat, P_nxt).xy;
+ /* Fix issue with perspective division. */
+ if (any(isnan(prev_uv))) {
+ prev_uv = curr_uv;
+ }
+ if (any(isnan(next_uv))) {
+ next_uv = curr_uv;
+ }
+ /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+ * with rgrg swizzle in viewport. */
+ vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
/* Convert NDC velocity to UV velocity */
motion *= 0.5;
@@ -26,37 +41,55 @@ vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next)
}
/**
- * Given a triple of position, compute the previous and next motion vectors.
- * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy)
- * \a velocity_camera is the motion in film UV space after camera projection.
- * \a velocity_view is the motion in ShadingView UV space. It is different
- * from velocity_camera for multi-view rendering.
+ * Given a view space view vector \a vV, compute the previous and next motion vectors for
+ * background pixels.
+ * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy).
*/
-void velocity_camera(vec3 P_prev, vec3 P, vec3 P_next, out vec4 vel_camera, out vec4 vel_view)
+vec4 velocity_background(vec3 vV)
{
- vec2 prev_uv, curr_uv, next_uv;
- prev_uv = camera_uv_from_world(camera_prev, P_prev);
- curr_uv = camera_uv_from_world(camera_curr, P);
- next_uv = camera_uv_from_world(camera_next, P_next);
-
- vel_camera.xy = prev_uv - curr_uv;
- vel_camera.zw = curr_uv - next_uv;
+ /* Only transform direction to avoid losing precision. */
+ vec3 V = transform_direction(camera_curr.viewinv, vV);
+ /* NOTE: We don't use the drw_view.winmat to avoid adding the TAA jitter to the velocity. */
+ vec2 prev_uv = project_point(camera_prev.winmat, V).xy;
+ vec2 curr_uv = project_point(camera_curr.winmat, V).xy;
+ vec2 next_uv = project_point(camera_next.winmat, V).xy;
+ /* NOTE: We output both vectors in the same direction so we can reuse the same vector
+ * with rgrg swizzle in viewport. */
+ vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv);
+ /* Convert NDC velocity to UV velocity */
+ motion *= 0.5;
- if (is_panoramic(camera_curr.type)) {
- /* This path is only used if using using panoramic projections. Since the views always have
- * the same 45° aperture angle, we can safely reuse the projection matrix. */
- prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy;
- curr_uv = transform_point(ViewProjectionMatrix, P).xy;
- next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy;
+ return motion;
+}
- vel_view.xy = prev_uv - curr_uv;
- vel_view.zw = curr_uv - next_uv;
- /* Convert NDC velocity to UV velocity */
- vel_view *= 0.5;
- }
- else {
- vel_view = vel_camera;
+vec4 velocity_resolve(vec4 vector, vec2 uv, float depth)
+{
+ if (vector.x == VELOCITY_INVALID) {
+ bool is_background = (depth == 1.0);
+ if (is_background) {
+ /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */
+ vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0));
+ return velocity_background(vV);
+ }
+ else {
+ /* Static geometry. No translation in world space. */
+ vec3 P = get_world_space_from_depth(uv, depth);
+ return velocity_surface(P, P, P);
+ }
}
+ return velocity_unpack(vector);
+}
+
+/**
+ * Load and resolve correct velocity as some pixels might still not have correct
+ * motion data for performance reasons.
+ * Returns motion vector in render UV space.
+ */
+vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth)
+{
+ vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(vector_tx, 0).xy);
+ vec4 vector = texelFetch(vector_tx, texel, 0);
+ return velocity_resolve(vector, uv, depth);
}
#endif
diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl
deleted file mode 100644
index b68b2eaf117..00000000000
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl
+++ /dev/null
@@ -1,58 +0,0 @@
-
-/**
- * Fullscreen pass that compute motion vector for static geometry.
- * Animated geometry has already written correct motion vectors.
- */
-
-#pragma BLENDER_REQUIRE(common_view_lib.glsl)
-#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
-
-#define is_valid_output(img_) (imageSize(img_).x > 1)
-
-void main()
-{
- ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
- vec4 motion = imageLoad(velocity_view_img, texel);
-
- bool pixel_has_valid_motion = (motion.x != VELOCITY_INVALID);
- float depth = texelFetch(depth_tx, texel, 0).r;
- bool is_background = (depth == 1.0f);
-
- vec2 uv = vec2(texel) * drw_view.viewport_size_inverse;
- vec3 P_next, P_prev, P_curr;
-
- if (pixel_has_valid_motion) {
- /* Animated geometry. View motion already computed during prepass. Convert only to camera. */
- // P_prev = get_world_space_from_depth(uv + motion.xy, 0.5);
- // P_curr = get_world_space_from_depth(uv, 0.5);
- // P_next = get_world_space_from_depth(uv + motion.zw, 0.5);
- return;
- }
- else if (is_background) {
- /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */
- vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0));
- vec3 V = transform_direction(ViewMatrixInverse, vV);
- /* Background has no motion under camera translation. Translate view vector with the camera. */
- /* WATCH(fclem): Might create precision issues. */
- P_next = camera_next.viewinv[3].xyz + V;
- P_curr = camera_curr.viewinv[3].xyz + V;
- P_prev = camera_prev.viewinv[3].xyz + V;
- }
- else {
- /* Static geometry. No translation in world space. */
- P_curr = get_world_space_from_depth(uv, depth);
- P_prev = P_curr;
- P_next = P_curr;
- }
-
- vec4 vel_camera, vel_view;
- velocity_camera(P_prev, P_curr, P_next, vel_camera, vel_view);
-
- if (in_texture_range(texel, depth_tx)) {
- imageStore(velocity_view_img, texel, vel_view);
-
- if (is_valid_output(velocity_camera_img)) {
- imageStore(velocity_camera_img, texel, vel_camera);
- }
- }
-}
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
new file mode 100644
index 00000000000..b689a7f53a2
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Setup
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut)
+ .do_static_compilation(true)
+ .local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE)
+ .additional_info("eevee_shared", "draw_view")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img")
+ .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img")
+ .image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img")
+ .compute_source("eevee_depth_of_field_bokeh_lut_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
+ .do_static_compilation(true)
+ .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .sampler(0, ImageType::FLOAT_2D, "color_tx")
+ .sampler(1, ImageType::DEPTH_2D, "depth_tx")
+ .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+ .compute_source("eevee_depth_of_field_setup_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
+ .do_static_compilation(true)
+ .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .sampler(0, ImageType::FLOAT_2D, "coc_tx")
+ .sampler(1, ImageType::FLOAT_2D, "color_tx")
+ .sampler(2, ImageType::FLOAT_2D, "velocity_tx")
+ .sampler(3, ImageType::FLOAT_2D, "in_history_tx")
+ .sampler(4, ImageType::DEPTH_2D, "depth_tx")
+ .push_constant(Type::BOOL, "use_history")
+ .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
+ .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img")
+ .compute_source("eevee_depth_of_field_stabilize_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)
+ .do_static_compilation(true)
+ .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view")
+ .sampler(0, ImageType::FLOAT_2D, "color_tx")
+ .sampler(1, ImageType::FLOAT_2D, "coc_tx")
+ .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .compute_source("eevee_depth_of_field_downsample_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce)
+ .do_static_compilation(true)
+ .local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .sampler(0, ImageType::FLOAT_2D, "downsample_tx")
+ .storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]")
+ .storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]")
+ .storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "scatter_fg_indirect_buf")
+ .storage_buf(3, Qualifier::READ_WRITE, "DrawCommand", "scatter_bg_indirect_buf")
+ .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "inout_color_lod0_img")
+ .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod1_img")
+ .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod2_img")
+ .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod3_img")
+ .image(4, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D, "in_coc_lod0_img")
+ .image(5, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod1_img")
+ .image(6, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod2_img")
+ .image(7, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod3_img")
+ .compute_source("eevee_depth_of_field_reduce_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Circle-Of-Confusion Tiles
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_flatten)
+ .do_static_compilation(true)
+ .local_group_size(DOF_TILES_FLATTEN_GROUP_SIZE, DOF_TILES_FLATTEN_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view")
+ .sampler(0, ImageType::FLOAT_2D, "coc_tx")
+ .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
+ .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
+ .compute_source("eevee_depth_of_field_tiles_flatten_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate)
+ .additional_info("eevee_shared", "draw_view", "eevee_depth_of_field_tiles_common")
+ .local_group_size(DOF_TILES_DILATE_GROUP_SIZE, DOF_TILES_DILATE_GROUP_SIZE)
+ .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
+ .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
+ .push_constant(Type::INT, "ring_count")
+ .push_constant(Type::INT, "ring_width_multiplier")
+ .compute_source("eevee_depth_of_field_tiles_dilate_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minabs)
+ .do_static_compilation(true)
+ .define("DILATE_MODE_MIN_MAX", "false")
+ .additional_info("eevee_depth_of_field_tiles_dilate");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minmax)
+ .do_static_compilation(true)
+ .define("DILATE_MODE_MIN_MAX", "true")
+ .additional_info("eevee_depth_of_field_tiles_dilate");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_common)
+ .image(0, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_fg_img")
+ .image(1, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_bg_img");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Variations
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_no_lut)
+ .define("DOF_BOKEH_TEXTURE", "false")
+ /**
+ * WORKAROUND(@fclem): This is to keep the code as is for now. The bokeh_lut_tx is referenced
+ * even if not used after optimization. But we don't want to include it in the create infos.
+ */
+ .define("bokeh_lut_tx", "color_tx");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lut)
+ .define("DOF_BOKEH_TEXTURE", "true")
+ .sampler(5, ImageType::FLOAT_2D, "bokeh_lut_tx");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_background).define("DOF_FOREGROUND_PASS", "false");
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_foreground).define("DOF_FOREGROUND_PASS", "true");
+
+#define EEVEE_DOF_FINAL_VARIATION(name, ...) \
+ GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
+
+#define EEVEE_DOF_LUT_VARIATIONS(prefix, ...) \
+ EEVEE_DOF_FINAL_VARIATION(prefix##_lut, "eevee_depth_of_field_lut", __VA_ARGS__) \
+ EEVEE_DOF_FINAL_VARIATION(prefix##_no_lut, "eevee_depth_of_field_no_lut", __VA_ARGS__)
+
+#define EEVEE_DOF_GROUND_VARIATIONS(name, ...) \
+ EEVEE_DOF_LUT_VARIATIONS(name##_background, "eevee_depth_of_field_background", __VA_ARGS__) \
+ EEVEE_DOF_LUT_VARIATIONS(name##_foreground, "eevee_depth_of_field_foreground", __VA_ARGS__)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Gather
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common)
+ .additional_info("eevee_shared",
+ "draw_view",
+ "eevee_depth_of_field_tiles_common",
+ "eevee_sampling_data")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE)
+ .sampler(0, ImageType::FLOAT_2D, "color_tx")
+ .sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx")
+ .sampler(2, ImageType::FLOAT_2D, "coc_tx")
+ .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .image(3, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather)
+ .image(4, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_occlusion_img")
+ .compute_source("eevee_depth_of_field_gather_comp.glsl")
+ .additional_info("eevee_depth_of_field_gather_common");
+
+EEVEE_DOF_GROUND_VARIATIONS(eevee_depth_of_field_gather, "eevee_depth_of_field_gather")
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hole_fill)
+ .do_static_compilation(true)
+ .compute_source("eevee_depth_of_field_hole_fill_comp.glsl")
+ .additional_info("eevee_depth_of_field_gather_common", "eevee_depth_of_field_no_lut");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_filter)
+ .do_static_compilation(true)
+ .local_group_size(DOF_FILTER_GROUP_SIZE, DOF_FILTER_GROUP_SIZE)
+ .additional_info("eevee_shared")
+ .sampler(0, ImageType::FLOAT_2D, "color_tx")
+ .sampler(1, ImageType::FLOAT_2D, "weight_tx")
+ .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img")
+ .compute_source("eevee_depth_of_field_filter_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Scatter
+ * \{ */
+
+GPU_SHADER_INTERFACE_INFO(eevee_depth_of_field_scatter_iface, "interp")
+ /** Colors, weights, and Circle of confusion radii for the 4 pixels to scatter. */
+ .flat(Type::VEC4, "color_and_coc1")
+ .flat(Type::VEC4, "color_and_coc2")
+ .flat(Type::VEC4, "color_and_coc3")
+ .flat(Type::VEC4, "color_and_coc4")
+ /** Sprite pixel position with origin at sprite center. In pixels. */
+ .no_perspective(Type::VEC2, "rect_uv1")
+ .no_perspective(Type::VEC2, "rect_uv2")
+ .no_perspective(Type::VEC2, "rect_uv3")
+ .no_perspective(Type::VEC2, "rect_uv4")
+ /** Scaling factor for the bokeh distance. */
+ .flat(Type::FLOAT, "distance_scale");
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_scatter)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .sampler(0, ImageType::FLOAT_2D, "occlusion_tx")
+ .sampler(1, ImageType::FLOAT_2D, "bokeh_lut_tx")
+ .storage_buf(0, Qualifier::READ, "ScatterRect", "scatter_list_buf[]")
+ .fragment_out(0, Type::VEC4, "out_color")
+ .push_constant(Type::BOOL, "use_bokeh_lut")
+ .vertex_out(eevee_depth_of_field_scatter_iface)
+ .vertex_source("eevee_depth_of_field_scatter_vert.glsl")
+ .fragment_source("eevee_depth_of_field_scatter_frag.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Resolve
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve)
+ .define("DOF_RESOLVE_PASS", "true")
+ .local_group_size(DOF_RESOLVE_GROUP_SIZE, DOF_RESOLVE_GROUP_SIZE)
+ .additional_info("eevee_shared",
+ "draw_view",
+ "eevee_depth_of_field_tiles_common",
+ "eevee_sampling_data")
+ .uniform_buf(6, "DepthOfFieldData", "dof_buf")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .sampler(1, ImageType::FLOAT_2D, "color_tx")
+ .sampler(2, ImageType::FLOAT_2D, "color_bg_tx")
+ .sampler(3, ImageType::FLOAT_2D, "color_fg_tx")
+ .sampler(4, ImageType::FLOAT_2D, "color_hole_fill_tx")
+ .sampler(7, ImageType::FLOAT_2D, "weight_bg_tx")
+ .sampler(8, ImageType::FLOAT_2D, "weight_fg_tx")
+ .sampler(9, ImageType::FLOAT_2D, "weight_hole_fill_tx")
+ .sampler(10, ImageType::FLOAT_2D, "stable_color_tx")
+ .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .compute_source("eevee_depth_of_field_resolve_comp.glsl");
+
+EEVEE_DOF_LUT_VARIATIONS(eevee_depth_of_field_resolve, "eevee_depth_of_field_resolve")
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
new file mode 100644
index 00000000000..db82a3265d7
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_film)
+ .uniform_buf(6, "FilmData", "film_buf")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .sampler(1, ImageType::FLOAT_2D, "combined_tx")
+ .sampler(2, ImageType::FLOAT_2D, "normal_tx")
+ .sampler(3, ImageType::FLOAT_2D, "vector_tx")
+ .sampler(4, ImageType::FLOAT_2D_ARRAY, "light_tx")
+ .sampler(5, ImageType::FLOAT_2D, "diffuse_color_tx")
+ .sampler(6, ImageType::FLOAT_2D, "specular_color_tx")
+ .sampler(7, ImageType::FLOAT_2D, "volume_light_tx")
+ .sampler(8, ImageType::FLOAT_2D, "emission_tx")
+ .sampler(9, ImageType::FLOAT_2D, "environment_tx")
+ .sampler(10, ImageType::FLOAT_2D, "shadow_tx")
+ .sampler(11, ImageType::FLOAT_2D, "ambient_occlusion_tx")
+ .sampler(12, ImageType::FLOAT_2D_ARRAY, "aov_color_tx")
+ .sampler(13, ImageType::FLOAT_2D_ARRAY, "aov_value_tx")
+ /* Color History for TAA needs to be sampler to leverage bilinear sampling. */
+ .sampler(14, ImageType::FLOAT_2D, "in_combined_tx")
+ // .sampler(15, ImageType::FLOAT_2D, "cryptomatte_tx") /* TODO */
+ .image(0, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "in_weight_img")
+ .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D_ARRAY, "out_weight_img")
+ /* Color History for TAA needs to be sampler to leverage bilinear sampling. */
+ //.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
+ .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
+ .image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
+ .image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
+ .image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
+ .additional_info("eevee_shared")
+ .additional_info("eevee_velocity_camera")
+ .additional_info("draw_view");
+
+GPU_SHADER_CREATE_INFO(eevee_film_frag)
+ .do_static_compilation(true)
+ .fragment_out(0, Type::VEC4, "out_color")
+ .fragment_source("eevee_film_frag.glsl")
+ .additional_info("draw_fullscreen", "eevee_film");
+
+GPU_SHADER_CREATE_INFO(eevee_film_comp)
+ .do_static_compilation(true)
+ .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+ .compute_source("eevee_film_comp.glsl")
+ .additional_info("eevee_film");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
new file mode 100644
index 00000000000..5e32631a8f8
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_data)
+ .sampler(15, ImageType::FLOAT_2D, "hiz_tx")
+ .uniform_buf(5, "HiZData", "hiz_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_update)
+ .do_static_compilation(true)
+ .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ_WRITE, "uint", "finished_tile_counter")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_0")
+ .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_1")
+ .image(2, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_2")
+ .image(3, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_3")
+ .image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4")
+ .image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5")
+ .image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6")
+ .image(7, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_7")
+ .push_constant(Type::BOOL, "update_mip_0")
+ .compute_source("eevee_hiz_update_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_hiz_debug)
+ .do_static_compilation(true)
+ .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+ .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
+ .fragment_source("eevee_hiz_debug_frag.glsl")
+ .additional_info("eevee_shared", "eevee_hiz_data", "draw_fullscreen");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
new file mode 100644
index 00000000000..41602426a1d
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Shared
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_data)
+ .storage_buf(LIGHT_CULL_BUF_SLOT, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(LIGHT_BUF_SLOT, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(LIGHT_ZBIN_BUF_SLOT, Qualifier::READ, "uint", "light_zbin_buf[]")
+ .storage_buf(LIGHT_TILE_BUF_SLOT, Qualifier::READ, "uint", "light_tile_buf[]");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Culling
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_select)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_SELECT_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ_WRITE, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+ .storage_buf(3, Qualifier::WRITE, "float", "out_zdist_buf[]")
+ .storage_buf(4, Qualifier::WRITE, "uint", "out_key_buf[]")
+ .compute_source("eevee_light_culling_select_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_sort)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]")
+ .storage_buf(3, Qualifier::READ, "float", "in_zdist_buf[]")
+ .storage_buf(4, Qualifier::READ, "uint", "in_key_buf[]")
+ .local_group_size(CULLING_SORT_GROUP_SIZE)
+ .compute_source("eevee_light_culling_sort_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_zbin)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_ZBIN_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "uint", "out_zbin_buf[]")
+ .compute_source("eevee_light_culling_zbin_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_tile)
+ .do_static_compilation(true)
+ .additional_info("eevee_shared", "draw_view")
+ .local_group_size(CULLING_TILE_GROUP_SIZE)
+ .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
+ .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
+ .storage_buf(2, Qualifier::WRITE, "uint", "out_light_tile_buf[]")
+ .compute_source("eevee_light_culling_tile_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(eevee_light_culling_debug)
+ .do_static_compilation(true)
+ .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0)
+ .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1)
+ .fragment_source("eevee_light_culling_debug_frag.glsl")
+ .additional_info(
+ "eevee_shared", "draw_view", "draw_fullscreen", "eevee_light_data", "eevee_hiz_data");
+
+/** \} */
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
index a944bea402e..9abdd1f8adf 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -12,8 +13,12 @@ GPU_SHADER_CREATE_INFO(eevee_shared)
.typedef_source("eevee_shader_shared.hh");
GPU_SHADER_CREATE_INFO(eevee_sampling_data)
+ .define("EEVEE_SAMPLING_DATA")
.additional_info("eevee_shared")
- .uniform_buf(14, "SamplingData", "sampling_buf");
+ .storage_buf(6, Qualifier::READ, "SamplingData", "sampling_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_utility_texture)
+ .sampler(RBUFS_UTILITY_TEX_SLOT, ImageType::FLOAT_2D_ARRAY, "utility_tx");
/** \} */
@@ -27,7 +32,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_mesh)
.vertex_in(0, Type::VEC3, "pos")
.vertex_in(1, Type::VEC3, "nor")
.vertex_source("eevee_geom_mesh_vert.glsl")
- .additional_info("draw_mesh", "draw_resource_id_varying", "draw_resource_handle");
+ .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
GPU_SHADER_CREATE_INFO(eevee_geom_gpencil)
.additional_info("eevee_shared")
@@ -49,7 +54,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_world)
.define("MAT_GEOM_WORLD")
.builtins(BuiltinBits::VERTEX_ID)
.vertex_source("eevee_geom_world_vert.glsl")
- .additional_info("draw_modelmat", "draw_resource_id_varying", "draw_resource_handle");
+ .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
/** \} */
@@ -70,6 +75,22 @@ GPU_SHADER_INTERFACE_INFO(eevee_surf_iface, "interp")
#define image_out(slot, qualifier, format, name) \
image(slot, format, qualifier, ImageType::FLOAT_2D, name, Frequency::PASS)
+#define image_array_out(slot, qualifier, format, name) \
+ image(slot, format, qualifier, ImageType::FLOAT_2D_ARRAY, name, Frequency::PASS)
+
+GPU_SHADER_CREATE_INFO(eevee_aov_out)
+ .define("MAT_AOV_SUPPORT")
+ .image_array_out(RBUFS_AOV_COLOR_SLOT, Qualifier::WRITE, GPU_RGBA16F, "aov_color_img")
+ .image_array_out(RBUFS_AOV_VALUE_SLOT, Qualifier::WRITE, GPU_R16F, "aov_value_img")
+ .storage_buf(RBUFS_AOV_BUF_SLOT, Qualifier::READ, "AOVsInfoData", "aov_buf");
+
+GPU_SHADER_CREATE_INFO(eevee_render_pass_out)
+ .define("MAT_RENDER_PASS_SUPPORT")
+ .image_out(RBUFS_NORMAL_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img")
+ .image_array_out(RBUFS_LIGHT_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img")
+ .image_out(RBUFS_DIFF_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img")
+ .image_out(RBUFS_SPEC_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img")
+ .image_out(RBUFS_EMISSION_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img");
GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
.vertex_out(eevee_surf_iface)
@@ -85,44 +106,45 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
// .image_out(3, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_reflection_color")
// .image_out(4, Qualifier::WRITE, GPU_RGBA16F, "gbuff_reflection_normal")
// .image_out(5, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_emission")
- /* Renderpasses. */
+ /* Render-passes. */
// .image_out(6, Qualifier::READ_WRITE, GPU_RGBA16F, "rpass_volume_light")
/* TODO: AOVs maybe? */
.fragment_source("eevee_surf_deferred_frag.glsl")
- // .additional_info("eevee_sampling_data", "eevee_utility_texture")
+ // .additional_info("eevee_aov_out", "eevee_sampling_data", "eevee_utility_texture")
;
-#undef image_out
-
GPU_SHADER_CREATE_INFO(eevee_surf_forward)
- .auto_resource_location(true)
.vertex_out(eevee_surf_iface)
+ /* Early fragment test is needed for render passes support for forward surfaces. */
+ /* NOTE: This removes the possibility of using gl_FragDepth. */
+ .early_fragment_test(true)
.fragment_out(0, Type::VEC4, "out_radiance", DualBlend::SRC_0)
.fragment_out(0, Type::VEC4, "out_transmittance", DualBlend::SRC_1)
.fragment_source("eevee_surf_forward_frag.glsl")
- // .additional_info("eevee_sampling_data",
- // "eevee_lightprobe_data",
- /* Optionally added depending on the material. */
- // "eevee_raytrace_data",
- // "eevee_transmittance_data",
- // "eevee_utility_texture",
- // "eevee_light_data",
- // "eevee_shadow_data"
- // )
- ;
+ .additional_info("eevee_light_data", "eevee_utility_texture", "eevee_sampling_data"
+ // "eevee_lightprobe_data",
+ // "eevee_shadow_data"
+ /* Optionally added depending on the material. */
+ // "eevee_raytrace_data",
+ // "eevee_transmittance_data",
+ // "eevee_aov_out",
+ // "eevee_render_pass_out",
+ );
GPU_SHADER_CREATE_INFO(eevee_surf_depth)
.vertex_out(eevee_surf_iface)
.fragment_source("eevee_surf_depth_frag.glsl")
- // .additional_info("eevee_sampling_data", "eevee_utility_texture")
- ;
+ .additional_info("eevee_sampling_data", "eevee_utility_texture");
GPU_SHADER_CREATE_INFO(eevee_surf_world)
.vertex_out(eevee_surf_iface)
+ .push_constant(Type::FLOAT, "world_opacity_fade")
.fragment_out(0, Type::VEC4, "out_background")
.fragment_source("eevee_surf_world_frag.glsl")
- // .additional_info("eevee_utility_texture")
- ;
+ .additional_info("eevee_aov_out", "eevee_render_pass_out", "eevee_utility_texture");
+
+#undef image_out
+#undef image_array_out
/** \} */
@@ -161,10 +183,7 @@ GPU_SHADER_CREATE_INFO(eevee_volume_deferred)
GPU_SHADER_CREATE_INFO(eevee_material_stub).define("EEVEE_MATERIAL_STUBS");
# define EEVEE_MAT_FINAL_VARIATION(name, ...) \
- GPU_SHADER_CREATE_INFO(name) \
- .additional_info(__VA_ARGS__) \
- .auto_resource_location(true) \
- .do_static_compilation(true);
+ GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
# define EEVEE_MAT_GEOM_VARIATIONS(prefix, ...) \
EEVEE_MAT_FINAL_VARIATION(prefix##_world, "eevee_geom_world", __VA_ARGS__) \
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
new file mode 100644
index 00000000000..ec302ec6770
--- /dev/null
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten)
+ .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
+ .uniform_buf(6, "MotionBlurData", "motion_blur_buf")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_img")
+ .compute_source("eevee_motion_blur_flatten_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_viewport)
+ .do_static_compilation(true)
+ .define("FLATTEN_VIEWPORT")
+ .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+ .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_render)
+ .do_static_compilation(true)
+ .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img")
+ .additional_info("eevee_motion_blur_tiles_flatten");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_dilate)
+ .do_static_compilation(true)
+ .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+ .additional_info("eevee_shared")
+ /* NOTE: See MotionBlurTileIndirection. */
+ .storage_buf(0, Qualifier::READ_WRITE, "uint", "tile_indirection_buf[]")
+ .image(1, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+ .compute_source("eevee_motion_blur_dilate_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(eevee_motion_blur_gather)
+ .do_static_compilation(true)
+ .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
+ .additional_info("eevee_shared", "draw_view", "eevee_sampling_data")
+ .uniform_buf(6, "MotionBlurData", "motion_blur_buf")
+ .sampler(0, ImageType::DEPTH_2D, "depth_tx")
+ .sampler(1, ImageType::FLOAT_2D, "velocity_tx")
+ .sampler(2, ImageType::FLOAT_2D, "in_color_tx")
+ /* NOTE: See MotionBlurTileIndirection. */
+ .storage_buf(0, Qualifier::READ, "uint", "tile_indirection_buf[]")
+ .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img")
+ .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
+ .compute_source("eevee_motion_blur_gather_comp.glsl");
diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
index a5f16363466..0a1c2721c61 100644
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
@@ -1,4 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -16,40 +18,22 @@ GPU_SHADER_INTERFACE_INFO(eevee_velocity_surface_iface, "motion")
GPU_SHADER_CREATE_INFO(eevee_velocity_camera)
.define("VELOCITY_CAMERA")
- .uniform_buf(1, "CameraData", "camera_prev")
- .uniform_buf(2, "CameraData", "camera_curr")
- .uniform_buf(3, "CameraData", "camera_next");
+ .uniform_buf(VELOCITY_CAMERA_PREV_BUF, "CameraData", "camera_prev")
+ .uniform_buf(VELOCITY_CAMERA_CURR_BUF, "CameraData", "camera_curr")
+ .uniform_buf(VELOCITY_CAMERA_NEXT_BUF, "CameraData", "camera_next");
GPU_SHADER_CREATE_INFO(eevee_velocity_geom)
.define("MAT_VELOCITY")
- .auto_resource_location(true)
- .storage_buf(4, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]", Frequency::PASS)
- .storage_buf(5, Qualifier::READ, "mat4", "velocity_obj_next_buf[]", Frequency::PASS)
- .storage_buf(6, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]", Frequency::PASS)
- .storage_buf(7, Qualifier::READ, "vec4", "velocity_geo_next_buf[]", Frequency::PASS)
- .storage_buf(
- 7, Qualifier::READ, "VelocityIndex", "velocity_indirection_buf[]", Frequency::PASS)
+ .storage_buf(VELOCITY_OBJ_PREV_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]")
+ .storage_buf(VELOCITY_OBJ_NEXT_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_next_buf[]")
+ .storage_buf(VELOCITY_GEO_PREV_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]")
+ .storage_buf(VELOCITY_GEO_NEXT_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_next_buf[]")
+ .storage_buf(VELOCITY_INDIRECTION_BUF_SLOT,
+ Qualifier::READ,
+ "VelocityIndex",
+ "velocity_indirection_buf[]")
.vertex_out(eevee_velocity_surface_iface)
- .fragment_out(0, Type::VEC4, "out_velocity_view")
+ .fragment_out(0, Type::VEC4, "out_velocity")
.additional_info("eevee_velocity_camera");
/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Velocity Resolve
- *
- * Computes velocity for static objects.
- * Also converts motion to camera space (as opposed to view space) if needed.
- * \{ */
-
-GPU_SHADER_CREATE_INFO(eevee_velocity_resolve)
- .do_static_compilation(true)
- .local_group_size(8, 8)
- .sampler(0, ImageType::DEPTH_2D, "depth_tx")
- .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_view_img")
- .image(1, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "velocity_camera_img")
- .additional_info("eevee_shared")
- .compute_source("eevee_velocity_resolve_comp.glsl")
- .additional_info("draw_view", "eevee_velocity_camera");
-
-/** \} */
diff --git a/source/blender/draw/engines/external/external_engine.c b/source/blender/draw/engines/external/external_engine.c
index b9c09e2bc4f..3f047d8de68 100644
--- a/source/blender/draw/engines/external/external_engine.c
+++ b/source/blender/draw/engines/external/external_engine.c
@@ -236,7 +236,11 @@ static void external_draw_scene_do_v3d(void *vedata)
RegionView3D *rv3d = draw_ctx->rv3d;
ARegion *region = draw_ctx->region;
- DRW_state_reset_ex(DRW_STATE_DEFAULT & ~DRW_STATE_DEPTH_LESS_EQUAL);
+ DRW_state_reset_ex(DRW_STATE_WRITE_COLOR);
+
+ /* The external engine can use the OpenGL rendering API directly, so make sure the state is
+ * already applied. */
+ GPU_apply_state();
/* Create render engine. */
if (!rv3d->render_engine) {
@@ -332,6 +336,12 @@ static void external_draw_scene_do_image(void *UNUSED(vedata))
BLI_assert(re != NULL);
BLI_assert(engine != NULL);
+ DRW_state_reset_ex(DRW_STATE_WRITE_COLOR);
+
+ /* The external engine can use the OpenGL rendering API directly, so make sure the state is
+ * already applied. */
+ GPU_apply_state();
+
const DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
/* Clear the depth buffer to the value used by the background overlay so that the overlay is not
diff --git a/source/blender/draw/engines/gpencil/gpencil_engine.h b/source/blender/draw/engines/gpencil/gpencil_engine.h
index 332c7f67c64..2f9d20b3902 100644
--- a/source/blender/draw/engines/gpencil/gpencil_engine.h
+++ b/source/blender/draw/engines/gpencil/gpencil_engine.h
@@ -19,6 +19,8 @@
extern "C" {
#endif
+#define GP_LIGHT
+
#include "gpencil_defines.h"
#include "gpencil_shader_shared.h"
diff --git a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
index 50ff7e7efc7..4c621e955b9 100644
--- a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
+++ b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h
@@ -7,7 +7,9 @@
typedef struct gpMaterial gpMaterial;
typedef struct gpLight gpLight;
typedef enum gpMaterialFlag gpMaterialFlag;
+# ifdef GP_LIGHT
typedef enum gpLightType gpLightType;
+# endif
# endif
#endif
@@ -75,8 +77,9 @@ struct gpMaterial {
};
BLI_STATIC_ASSERT_ALIGN(gpMaterial, 16)
+#ifdef GP_LIGHT
struct gpLight {
-#ifndef GPU_SHADER
+# ifndef GPU_SHADER
float3 color;
gpLightType type;
float3 right;
@@ -87,7 +90,7 @@ struct gpLight {
float _pad0;
float3 position;
float _pad1;
-#else
+# else
/* Some drivers are completely messing the alignment or the fetches here.
* We are forced to pack these into vec4 otherwise we only get 0.0 as value. */
/* NOTE(@fclem): This was the case on MacOS OpenGL implementation.
@@ -97,17 +100,18 @@ struct gpLight {
float4 packed2;
float4 packed3;
float4 packed4;
-# define _color packed0.xyz
-# define _type packed0.w
-# define _right packed1.xyz
-# define _spot_size packed1.w
-# define _up packed2.xyz
-# define _spot_blend packed2.w
-# define _forward packed3.xyz
-# define _position packed4.xyz
-#endif
+# define _color packed0.xyz
+# define _type packed0.w
+# define _right packed1.xyz
+# define _spot_size packed1.w
+# define _up packed2.xyz
+# define _spot_blend packed2.w
+# define _forward packed3.xyz
+# define _position packed4.xyz
+# endif
};
BLI_STATIC_ASSERT_ALIGN(gpLight, 16)
+#endif
#ifndef GPU_SHADER
# undef gpMaterialFlag
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
index 75bd3d30d68..6671c16aa0b 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl
@@ -344,7 +344,7 @@ float stroke_thickness_modulate(float thickness)
}
else {
/* World space point size. */
- thickness *= thicknessWorldScale * ProjectionMatrix[1][1] * sizeViewport.y;
+ thickness *= thicknessWorldScale * drw_view.winmat[1][1] * sizeViewport.y;
}
return thickness;
}
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
index e162c5bf45e..2fca8b69183 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl
@@ -5,5 +5,5 @@ void main()
int v = gl_VertexID % 3;
float x = -1.0 + float((v & 1) << 2);
float y = -1.0 + float((v & 2) << 1);
- gl_Position = ViewProjectionMatrix * (model_matrix * vec4(x, y, 0.0, 1.0));
+ gl_Position = drw_view.persmat * (model_matrix * vec4(x, y, 0.0, 1.0));
}
diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
index af8aec85598..b0ee059cb9d 100644
--- a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
+++ b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl
@@ -32,7 +32,7 @@ void main()
vec3 vert_N;
gpMaterial gp_mat = materials[ma1.x + gpMaterialOffset];
- gpMaterialFlag gp_flag = floatBitsToInt(gp_mat._flag);
+ gpMaterialFlag gp_flag = floatBitsToUint(gp_mat._flag);
gl_Position = gpencil_vertex(ma,
ma1,
@@ -125,7 +125,7 @@ void main()
gpencil_color_output(fill_col, fcol_decode, 1.0, gp_mat._fill_texture_mix);
gp_interp.mat_flag = gp_flag & GP_FILL_FLAGS;
- gp_interp.mat_flag |= uint(ma1.x) << GPENCIl_MATID_SHIFT;
+ gp_interp.mat_flag |= uint(ma1.x + gpMaterialOffset) << GPENCIl_MATID_SHIFT;
gp_interp.uv = mat2(gp_mat.fill_uv_rot_scale.xy, gp_mat.fill_uv_rot_scale.zw) * uv1.xy +
gp_mat._fill_uv_offset;
diff --git a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
index 3b4de704c00..1db98d13c4a 100644
--- a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
+++ b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh
@@ -20,8 +20,8 @@ GPU_SHADER_INTERFACE_INFO(gpencil_geometry_iface, "gp_interp")
GPU_SHADER_CREATE_INFO(gpencil_geometry)
.do_static_compilation(true)
+ .define("GP_LIGHT")
.typedef_source("gpencil_defines.h")
- .typedef_source("gpencil_shader_shared.h")
.sampler(0, ImageType::FLOAT_2D, "gpFillTexture")
.sampler(1, ImageType::FLOAT_2D, "gpStrokeTexture")
.sampler(2, ImageType::DEPTH_2D, "gpSceneDepthTexture")
diff --git a/source/blender/draw/engines/overlay/overlay_antialiasing.c b/source/blender/draw/engines/overlay/overlay_antialiasing.c
index 27ee479cf36..780915b7fc4 100644
--- a/source/blender/draw/engines/overlay/overlay_antialiasing.c
+++ b/source/blender/draw/engines/overlay/overlay_antialiasing.c
@@ -52,7 +52,7 @@ void OVERLAY_antialiasing_init(OVERLAY_Data *vedata)
OVERLAY_PrivateData *pd = vedata->stl->pd;
DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
- /* Small texture which will have very small impact on rendertime. */
+ /* Small texture which will have very small impact on render-time. */
if (txl->dummy_depth_tx == NULL) {
const float pixel[1] = {1.0f};
txl->dummy_depth_tx = DRW_texture_create_2d(1, 1, GPU_DEPTH_COMPONENT24, 0, pixel);
diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c
index ea0c2f287a6..df5ee6a18c0 100644
--- a/source/blender/draw/engines/overlay/overlay_armature.c
+++ b/source/blender/draw/engines/overlay/overlay_armature.c
@@ -2102,7 +2102,7 @@ static void pchan_culling_calc_bsphere(const Object *ob,
{
float min[3], max[3];
INIT_MINMAX(min, max);
- BKE_pchan_minmax(ob, pchan, min, max);
+ BKE_pchan_minmax(ob, pchan, true, min, max);
mid_v3_v3v3(r_bsphere->center, min, max);
r_bsphere->radius = len_v3v3(min, r_bsphere->center);
}
@@ -2220,7 +2220,7 @@ static void draw_armature_edit(ArmatureDrawContext *ctx)
const bool show_text = DRW_state_show_text();
const Object *ob_orig = DEG_get_original_object(ob);
- /* FIXME(campbell): We should be able to use the CoW object,
+ /* FIXME(@campbellbarton): We should be able to use the CoW object,
* however the active bone isn't updated. Long term solution is an 'EditArmature' struct.
* for now we can draw from the original armature. See: T66773. */
// bArmature *arm = ob->data;
diff --git a/source/blender/draw/engines/overlay/overlay_edit_text.c b/source/blender/draw/engines/overlay/overlay_edit_text.c
index dfef5b3c241..bd8720042f1 100644
--- a/source/blender/draw/engines/overlay/overlay_edit_text.c
+++ b/source/blender/draw/engines/overlay/overlay_edit_text.c
@@ -7,6 +7,8 @@
#include "DRW_render.h"
+#include "UI_resources.h"
+
#include "BKE_vfont.h"
#include "DNA_curve_types.h"
@@ -38,17 +40,24 @@ void OVERLAY_edit_text_cache_init(OVERLAY_Data *vedata)
DRW_shgroup_uniform_vec4_copy(grp, "color", G_draw.block.color_wire);
}
{
+ /* Cursor (text caret). */
state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA;
- DRW_PASS_CREATE(psl->edit_text_overlay_ps, state | pd->clipping_state);
-
+ DRW_PASS_CREATE(psl->edit_text_cursor_ps, state | pd->clipping_state);
sh = OVERLAY_shader_uniform_color();
- pd->edit_text_overlay_grp = grp = DRW_shgroup_create(sh, psl->edit_text_overlay_ps);
+ pd->edit_text_cursor_grp = grp = DRW_shgroup_create(sh, psl->edit_text_cursor_ps);
+ DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.cursor_color, 1);
- DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.overlay_color, 1);
+ /* Selection boxes. */
+ state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA;
+ DRW_PASS_CREATE(psl->edit_text_selection_ps, state | pd->clipping_state);
+ sh = OVERLAY_shader_uniform_color();
+ pd->edit_text_selection_grp = grp = DRW_shgroup_create(sh, psl->edit_text_selection_ps);
+ DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.selection_color, 1);
- state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_MUL | DRW_STATE_DEPTH_GREATER_EQUAL |
+ /* Highlight text within selection boxes. */
+ state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA | DRW_STATE_DEPTH_GREATER_EQUAL |
pd->clipping_state;
- DRW_PASS_INSTANCE_CREATE(psl->edit_text_darken_ps, psl->edit_text_overlay_ps, state);
+ DRW_PASS_INSTANCE_CREATE(psl->edit_text_highlight_ps, psl->edit_text_selection_ps, state);
}
{
/* Create view which will render everything (hopefully) behind the text geometry. */
@@ -112,7 +121,7 @@ static void edit_text_cache_populate_select(OVERLAY_Data *vedata, Object *ob)
v2_quad_corners_to_mat4(box, final_mat);
mul_m4_m4m4(final_mat, ob->obmat, final_mat);
- DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, final_mat);
+ DRW_shgroup_call_obmat(pd->edit_text_selection_grp, geom, final_mat);
}
}
@@ -128,7 +137,7 @@ static void edit_text_cache_populate_cursor(OVERLAY_Data *vedata, Object *ob)
mul_m4_m4m4(mat, ob->obmat, mat);
struct GPUBatch *geom = DRW_cache_quad_get();
- DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, mat);
+ DRW_shgroup_call_obmat(pd->edit_text_cursor_grp, geom, mat);
}
static void edit_text_cache_populate_boxes(OVERLAY_Data *vedata, Object *ob)
@@ -193,11 +202,18 @@ void OVERLAY_edit_text_draw(OVERLAY_Data *vedata)
DRW_view_set_active(pd->view_edit_text);
- /* Alpha blended. */
- copy_v4_fl4(pd->edit_text.overlay_color, 0.8f, 0.8f, 0.8f, 0.5f);
- DRW_draw_pass(psl->edit_text_overlay_ps);
+ /* Selection Boxes. */
+ UI_GetThemeColor4fv(TH_WIDGET_TEXT_SELECTION, pd->edit_text.selection_color);
+ srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color);
+ DRW_draw_pass(psl->edit_text_selection_ps);
+
+ /* Highlight text within selection boxes. */
+ UI_GetThemeColor4fv(TH_WIDGET_TEXT_HIGHLIGHT, pd->edit_text.selection_color);
+ srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color);
+ DRW_draw_pass(psl->edit_text_highlight_ps);
- /* Multiply previous result where depth test fail. */
- copy_v4_fl4(pd->edit_text.overlay_color, 0.0f, 0.0f, 0.0f, 1.0f);
- DRW_draw_pass(psl->edit_text_darken_ps);
+ /* Cursor (text caret). */
+ UI_GetThemeColor4fv(TH_WIDGET_TEXT_CURSOR, pd->edit_text.cursor_color);
+ srgb_to_linearrgb_v4(pd->edit_text.cursor_color, pd->edit_text.cursor_color);
+ DRW_draw_pass(psl->edit_text_cursor_ps);
}
diff --git a/source/blender/draw/engines/overlay/overlay_edit_uv.c b/source/blender/draw/engines/overlay/overlay_edit_uv.c
index 4cfe9fcea4e..adbe5e7155e 100644
--- a/source/blender/draw/engines/overlay/overlay_edit_uv.c
+++ b/source/blender/draw/engines/overlay/overlay_edit_uv.c
@@ -160,7 +160,6 @@ void OVERLAY_edit_uv_init(OVERLAY_Data *vedata)
pd->edit_uv.draw_type = sima->dt_uvstretch;
BLI_listbase_clear(&pd->edit_uv.totals);
pd->edit_uv.total_area_ratio = 0.0f;
- pd->edit_uv.total_area_ratio_inv = 0.0f;
/* During engine initialization phase the `sima` isn't locked and
* we are able to retrieve the needed data.
@@ -280,8 +279,6 @@ void OVERLAY_edit_uv_cache_init(OVERLAY_Data *vedata)
DRW_shgroup_uniform_block(pd->edit_uv_stretching_grp, "globalsBlock", G_draw.block_ubo);
DRW_shgroup_uniform_float(
pd->edit_uv_stretching_grp, "totalAreaRatio", &pd->edit_uv.total_area_ratio, 1);
- DRW_shgroup_uniform_float(
- pd->edit_uv_stretching_grp, "totalAreaRatioInv", &pd->edit_uv.total_area_ratio_inv, 1);
}
}
@@ -510,7 +507,6 @@ static void edit_uv_stretching_update_ratios(OVERLAY_Data *vedata)
if (total_area > FLT_EPSILON && total_area_uv > FLT_EPSILON) {
pd->edit_uv.total_area_ratio = total_area / total_area_uv;
- pd->edit_uv.total_area_ratio_inv = total_area_uv / total_area;
}
}
BLI_freelistN(&pd->edit_uv.totals);
diff --git a/source/blender/draw/engines/overlay/overlay_engine.c b/source/blender/draw/engines/overlay/overlay_engine.c
index f8c28394b16..6e2da95e405 100644
--- a/source/blender/draw/engines/overlay/overlay_engine.c
+++ b/source/blender/draw/engines/overlay/overlay_engine.c
@@ -192,6 +192,8 @@ static void OVERLAY_cache_init(void *vedata)
OVERLAY_edit_curves_cache_init(vedata);
break;
case CTX_MODE_SCULPT_CURVES:
+ OVERLAY_sculpt_curves_cache_init(vedata);
+ break;
case CTX_MODE_OBJECT:
break;
default:
@@ -308,13 +310,16 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
(pd->ctx_mode == CTX_MODE_PARTICLE);
const bool in_paint_mode = (ob == draw_ctx->obact) &&
(draw_ctx->object_mode & OB_MODE_ALL_PAINT);
+ const bool in_sculpt_curve_mode = (ob == draw_ctx->obact) &&
+ (draw_ctx->object_mode & OB_MODE_SCULPT_CURVES);
const bool in_sculpt_mode = (ob == draw_ctx->obact) && (ob->sculpt != NULL) &&
(ob->sculpt->mode_type == OB_MODE_SCULPT);
+ const bool in_curves_sculpt_mode = (ob == draw_ctx->obact) &&
+ (ob->mode == OB_MODE_SCULPT_CURVES);
const bool has_surface = ELEM(ob->type,
OB_MESH,
OB_CURVES_LEGACY,
OB_SURF,
- OB_MBALL,
OB_FONT,
OB_GPENCIL,
OB_CURVES,
@@ -329,8 +334,8 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
const bool draw_bones = (pd->overlay.flag & V3D_OVERLAY_HIDE_BONES) == 0;
const bool draw_wires = draw_surface && has_surface &&
(pd->wireframe_mode || !pd->hide_overlays);
- const bool draw_outlines = !in_edit_mode && !in_paint_mode && renderable && has_surface &&
- !instance_parent_in_edit_mode &&
+ const bool draw_outlines = !in_edit_mode && !in_paint_mode && !in_sculpt_curve_mode &&
+ renderable && has_surface && !instance_parent_in_edit_mode &&
(pd->v3d_flag & V3D_SELECT_OUTLINE) &&
(ob->base_flag & BASE_SELECTED);
const bool draw_bone_selection = (ob->type == OB_MESH) && pd->armature.do_pose_fade_geom &&
@@ -428,6 +433,9 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob)
if (in_sculpt_mode) {
OVERLAY_sculpt_cache_populate(vedata, ob);
}
+ else if (in_curves_sculpt_mode) {
+ OVERLAY_sculpt_curves_cache_populate(vedata, ob);
+ }
if (draw_motion_paths) {
OVERLAY_motion_path_cache_populate(vedata, ob);
@@ -591,6 +599,9 @@ static void OVERLAY_draw_scene(void *vedata)
case CTX_MODE_SCULPT:
OVERLAY_sculpt_draw(vedata);
break;
+ case CTX_MODE_SCULPT_CURVES:
+ OVERLAY_sculpt_curves_draw(vedata);
+ break;
case CTX_MODE_EDIT_MESH:
case CTX_MODE_POSE:
case CTX_MODE_PAINT_WEIGHT:
diff --git a/source/blender/draw/engines/overlay/overlay_extra.c b/source/blender/draw/engines/overlay/overlay_extra.c
index f875254a685..8211b2f0490 100644
--- a/source/blender/draw/engines/overlay/overlay_extra.c
+++ b/source/blender/draw/engines/overlay/overlay_extra.c
@@ -1315,9 +1315,14 @@ static void OVERLAY_relationship_lines(OVERLAY_ExtraCallBuffers *cb,
if ((curcon->ui_expand_flag & (1 << 0)) && BKE_constraint_targets_get(curcon, &targets)) {
bConstraintTarget *ct;
+ BKE_constraint_custom_object_space_init(cob, curcon);
+
for (ct = targets.first; ct; ct = ct->next) {
/* calculate target's matrix */
- if (cti->get_target_matrix) {
+ if (ct->flag & CONSTRAINT_TAR_CUSTOM_SPACE) {
+ copy_m4_m4(ct->matrix, cob->space_obj_world_matrix);
+ }
+ else if (cti->get_target_matrix) {
cti->get_target_matrix(depsgraph, curcon, cob, ct, DEG_get_ctime(depsgraph));
}
else {
@@ -1353,7 +1358,7 @@ static void OVERLAY_volume_extra(OVERLAY_ExtraCallBuffers *cb,
/* Don't show smoke before simulation starts, this could be made an option in the future. */
const bool draw_velocity = (fds->draw_velocity && fds->fluid &&
- CFRA >= fds->point_cache[0]->startframe);
+ scene->r.cfra >= fds->point_cache[0]->startframe);
/* Show gridlines only for slices with no interpolation. */
const bool show_gridlines = (fds->show_gridlines && fds->fluid &&
@@ -1484,7 +1489,7 @@ static void OVERLAY_object_center(OVERLAY_ExtraCallBuffers *cb,
{
const bool is_library = ID_REAL_USERS(&ob->id) > 1 || ID_IS_LINKED(ob);
- if (ob == OBACT(view_layer)) {
+ if (ob == BKE_view_layer_active_object_get(view_layer)) {
DRW_buffer_add_entry(cb->center_active, ob->obmat[3]);
}
else if (ob->base_flag & BASE_SELECTED) {
@@ -1546,8 +1551,9 @@ void OVERLAY_extra_cache_populate(OVERLAY_Data *vedata, Object *ob)
(md = BKE_modifiers_findby_type(ob, eModifierType_Fluid)) &&
(BKE_modifier_is_enabled(scene, md, eModifierMode_Realtime)) &&
(((FluidModifierData *)md)->domain != NULL) &&
- (CFRA >= (((FluidModifierData *)md)->domain->cache_frame_start)) &&
- (CFRA <= (((FluidModifierData *)md)->domain->cache_frame_end));
+ (scene->r.cfra >=
+ (((FluidModifierData *)md)->domain->cache_frame_start)) &&
+ (scene->r.cfra <= (((FluidModifierData *)md)->domain->cache_frame_end));
float *color;
int theme_id = DRW_object_wire_theme_get(ob, view_layer, &color);
diff --git a/source/blender/draw/engines/overlay/overlay_outline.c b/source/blender/draw/engines/overlay/overlay_outline.c
index eea9a1a1bef..f2e2acc98a9 100644
--- a/source/blender/draw/engines/overlay/overlay_outline.c
+++ b/source/blender/draw/engines/overlay/overlay_outline.c
@@ -133,6 +133,10 @@ void OVERLAY_outline_cache_init(OVERLAY_Data *vedata)
pd->outlines_gpencil_grp = grp = DRW_shgroup_create(sh_gpencil, psl->outlines_prepass_ps);
DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0);
DRW_shgroup_uniform_float_copy(grp, "gpStrokeIndexOffset", 0.0);
+
+ GPUShader *sh_curves = OVERLAY_shader_outline_prepass_curves();
+ pd->outlines_curves_grp = grp = DRW_shgroup_create(sh_curves, psl->outlines_prepass_ps);
+ DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0);
}
/* outlines_prepass_ps is still needed for selection of probes. */
@@ -267,6 +271,12 @@ static void OVERLAY_outline_volume(OVERLAY_PrivateData *pd, Object *ob)
DRW_shgroup_call(shgroup, geom, ob);
}
+static void OVERLAY_outline_curves(OVERLAY_PrivateData *pd, Object *ob)
+{
+ DRWShadingGroup *shgroup = pd->outlines_curves_grp;
+ DRW_shgroup_curves_create_sub(ob, shgroup, NULL);
+}
+
void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata,
Object *ob,
OVERLAY_DupliData *dupli,
@@ -293,6 +303,11 @@ void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata,
return;
}
+ if (ob->type == OB_CURVES) {
+ OVERLAY_outline_curves(pd, ob);
+ return;
+ }
+
if (ob->type == OB_POINTCLOUD && pd->wireframe_mode) {
/* Looks bad in this case. Could be relaxed if we draw a
* wireframe of some sort in the future. */
diff --git a/source/blender/draw/engines/overlay/overlay_private.h b/source/blender/draw/engines/overlay/overlay_private.h
index 23c20a186a0..0a783c44029 100644
--- a/source/blender/draw/engines/overlay/overlay_private.h
+++ b/source/blender/draw/engines/overlay/overlay_private.h
@@ -78,8 +78,9 @@ typedef struct OVERLAY_PassList {
DRWPass *edit_mesh_analysis_ps;
DRWPass *edit_mesh_normals_ps;
DRWPass *edit_particle_ps;
- DRWPass *edit_text_overlay_ps;
- DRWPass *edit_text_darken_ps;
+ DRWPass *edit_text_cursor_ps;
+ DRWPass *edit_text_selection_ps;
+ DRWPass *edit_text_highlight_ps;
DRWPass *edit_text_wire_ps[2];
DRWPass *edit_uv_edges_ps;
DRWPass *edit_uv_verts_ps;
@@ -116,6 +117,7 @@ typedef struct OVERLAY_PassList {
DRWPass *particle_ps;
DRWPass *pointcloud_ps;
DRWPass *sculpt_mask_ps;
+ DRWPass *sculpt_curves_selection_ps;
DRWPass *volume_ps;
DRWPass *wireframe_ps;
DRWPass *wireframe_xray_ps;
@@ -251,7 +253,8 @@ typedef struct OVERLAY_PrivateData {
DRWShadingGroup *edit_mesh_analysis_grp;
DRWShadingGroup *edit_particle_strand_grp;
DRWShadingGroup *edit_particle_point_grp;
- DRWShadingGroup *edit_text_overlay_grp;
+ DRWShadingGroup *edit_text_cursor_grp;
+ DRWShadingGroup *edit_text_selection_grp;
DRWShadingGroup *edit_text_wire_grp[2];
DRWShadingGroup *edit_uv_verts_grp;
DRWShadingGroup *edit_uv_edges_grp;
@@ -267,6 +270,7 @@ typedef struct OVERLAY_PrivateData {
DRWShadingGroup *motion_path_lines_grp;
DRWShadingGroup *motion_path_points_grp;
DRWShadingGroup *outlines_grp;
+ DRWShadingGroup *outlines_curves_grp;
DRWShadingGroup *outlines_ptcloud_grp;
DRWShadingGroup *outlines_gpencil_grp;
DRWShadingGroup *paint_depth_grp;
@@ -279,6 +283,7 @@ typedef struct OVERLAY_PrivateData {
DRWShadingGroup *particle_shapes_grp;
DRWShadingGroup *pointcloud_dots_grp;
DRWShadingGroup *sculpt_mask_grp;
+ DRWShadingGroup *sculpt_curves_selection_grp;
DRWShadingGroup *volume_selection_surface_grp;
DRWShadingGroup *wires_grp[2][2]; /* With and without coloring. */
DRWShadingGroup *wires_all_grp[2][2]; /* With and without coloring. */
@@ -335,7 +340,8 @@ typedef struct OVERLAY_PrivateData {
int handle_display;
} edit_curve;
struct {
- float overlay_color[4];
+ float cursor_color[4];
+ float selection_color[4];
} edit_text;
struct {
bool do_zbufclip;
@@ -381,7 +387,6 @@ typedef struct OVERLAY_PrivateData {
eSpaceImage_UVDT_Stretch draw_type;
ListBase totals;
float total_area_ratio;
- float total_area_ratio_inv;
/* stencil overlay */
struct Image *stencil_image;
@@ -669,6 +674,10 @@ void OVERLAY_sculpt_cache_init(OVERLAY_Data *vedata);
void OVERLAY_sculpt_cache_populate(OVERLAY_Data *vedata, Object *ob);
void OVERLAY_sculpt_draw(OVERLAY_Data *vedata);
+void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata);
+void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *ob);
+void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata);
+
void OVERLAY_wireframe_init(OVERLAY_Data *vedata);
void OVERLAY_wireframe_cache_init(OVERLAY_Data *vedata);
void OVERLAY_wireframe_cache_populate(OVERLAY_Data *vedata,
@@ -737,6 +746,7 @@ GPUShader *OVERLAY_shader_motion_path_line(void);
GPUShader *OVERLAY_shader_motion_path_vert(void);
GPUShader *OVERLAY_shader_uniform_color(void);
GPUShader *OVERLAY_shader_outline_prepass(bool use_wire);
+GPUShader *OVERLAY_shader_outline_prepass_curves(void);
GPUShader *OVERLAY_shader_outline_prepass_gpencil(void);
GPUShader *OVERLAY_shader_outline_prepass_pointcloud(void);
GPUShader *OVERLAY_shader_extra_grid(void);
@@ -750,6 +760,7 @@ GPUShader *OVERLAY_shader_paint_wire(void);
GPUShader *OVERLAY_shader_particle_dot(void);
GPUShader *OVERLAY_shader_particle_shape(void);
GPUShader *OVERLAY_shader_sculpt_mask(void);
+GPUShader *OVERLAY_shader_sculpt_curves_selection(void);
GPUShader *OVERLAY_shader_volume_velocity(bool use_needle, bool use_mac);
GPUShader *OVERLAY_shader_volume_gridlines(bool color_with_flags, bool color_range);
GPUShader *OVERLAY_shader_wireframe(bool custom_bias);
diff --git a/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
new file mode 100644
index 00000000000..b8021124f27
--- /dev/null
+++ b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw_engine
+ */
+
+#include "DRW_render.h"
+
+#include "draw_cache_impl.h"
+#include "overlay_private.h"
+
+#include "BKE_curves.hh"
+
+void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)
+{
+ OVERLAY_PassList *psl = vedata->psl;
+ OVERLAY_PrivateData *pd = vedata->stl->pd;
+
+ const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_BLEND_ALPHA;
+ DRW_PASS_CREATE(psl->sculpt_curves_selection_ps, state | pd->clipping_state);
+
+ GPUShader *sh = OVERLAY_shader_sculpt_curves_selection();
+ pd->sculpt_curves_selection_grp = DRW_shgroup_create(sh, psl->sculpt_curves_selection_ps);
+ DRWShadingGroup *grp = pd->sculpt_curves_selection_grp;
+
+ /* Reuse the same mask opacity from sculpt mode, since it wasn't worth it to add a different
+ * property yet. */
+ DRW_shgroup_uniform_float_copy(grp, "selection_opacity", pd->overlay.sculpt_mode_mask_opacity);
+}
+
+static bool everything_selected(const Curves &curves_id)
+{
+ if (!(curves_id.flag & CV_SCULPT_SELECTION_ENABLED)) {
+ /* When the selection is disabled, conceptually everything is selected. */
+ return true;
+ }
+ const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
+ curves_id.geometry);
+ blender::VArray<float> selection;
+ switch (curves_id.selection_domain) {
+ case ATTR_DOMAIN_POINT:
+ selection = curves.selection_point_float();
+ break;
+ case ATTR_DOMAIN_CURVE:
+ selection = curves.selection_curve_float();
+ break;
+ }
+ return selection.is_single() && selection.get_internal_single() == 1.0f;
+}
+
+void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
+{
+ OVERLAY_PrivateData *pd = vedata->stl->pd;
+ Curves *curves = static_cast<Curves *>(object->data);
+
+ /* As an optimization, return early if everything is selected. */
+ if (everything_selected(*curves)) {
+ return;
+ }
+
+ /* Retrieve the location of the texture. */
+ const char *name = curves->selection_domain == ATTR_DOMAIN_POINT ? ".selection_point_float" :
+ ".selection_curve_float";
+
+ bool is_point_domain;
+ GPUTexture **texture = DRW_curves_texture_for_evaluated_attribute(
+ curves, name, &is_point_domain);
+ if (texture == nullptr) {
+ return;
+ }
+
+ /* Evaluate curves and their attributes if necessary. */
+ DRWShadingGroup *grp = DRW_shgroup_curves_create_sub(
+ object, pd->sculpt_curves_selection_grp, nullptr);
+ if (*texture == nullptr) {
+ return;
+ }
+
+ DRW_shgroup_uniform_bool_copy(grp, "is_point_domain", is_point_domain);
+ DRW_shgroup_uniform_texture(grp, "selection_tx", *texture);
+}
+
+void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata)
+{
+ OVERLAY_PassList *psl = vedata->psl;
+ OVERLAY_PrivateData *pd = vedata->stl->pd;
+ OVERLAY_FramebufferList *fbl = vedata->fbl;
+
+ if (DRW_state_is_fbo()) {
+ GPU_framebuffer_bind(pd->painting.in_front ? fbl->overlay_in_front_fb :
+ fbl->overlay_default_fb);
+ }
+
+ DRW_draw_pass(psl->sculpt_curves_selection_ps);
+}
diff --git a/source/blender/draw/engines/overlay/overlay_shader.c b/source/blender/draw/engines/overlay/overlay_shader.c
index 48146fbddfb..2373363ab9d 100644
--- a/source/blender/draw/engines/overlay/overlay_shader.c
+++ b/source/blender/draw/engines/overlay/overlay_shader.c
@@ -76,6 +76,7 @@ typedef struct OVERLAY_Shaders {
GPUShader *motion_path_line;
GPUShader *motion_path_vert;
GPUShader *outline_prepass;
+ GPUShader *outline_prepass_curves;
GPUShader *outline_prepass_gpencil;
GPUShader *outline_prepass_pointcloud;
GPUShader *outline_prepass_wire;
@@ -90,6 +91,7 @@ typedef struct OVERLAY_Shaders {
GPUShader *particle_shape;
GPUShader *pointcloud_dot;
GPUShader *sculpt_mask;
+ GPUShader *sculpt_curves_selection;
GPUShader *uniform_color;
GPUShader *volume_velocity_needle_sh;
GPUShader *volume_velocity_mac_sh;
@@ -650,6 +652,18 @@ GPUShader *OVERLAY_shader_outline_prepass(bool use_wire)
return use_wire ? sh_data->outline_prepass_wire : sh_data->outline_prepass;
}
+GPUShader *OVERLAY_shader_outline_prepass_curves()
+{
+ const DRWContextState *draw_ctx = DRW_context_state_get();
+ OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg];
+ if (!sh_data->outline_prepass_curves) {
+ sh_data->outline_prepass_curves = GPU_shader_create_from_info_name(
+ draw_ctx->sh_cfg ? "overlay_outline_prepass_curves_clipped" :
+ "overlay_outline_prepass_curves");
+ }
+ return sh_data->outline_prepass_curves;
+}
+
GPUShader *OVERLAY_shader_outline_prepass_gpencil(void)
{
const DRWContextState *draw_ctx = DRW_context_state_get();
@@ -792,6 +806,18 @@ GPUShader *OVERLAY_shader_sculpt_mask(void)
return sh_data->sculpt_mask;
}
+GPUShader *OVERLAY_shader_sculpt_curves_selection(void)
+{
+ const DRWContextState *draw_ctx = DRW_context_state_get();
+ OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg];
+ if (!sh_data->sculpt_curves_selection) {
+ sh_data->sculpt_curves_selection = GPU_shader_create_from_info_name(
+ draw_ctx->sh_cfg == GPU_SHADER_CFG_CLIPPED ? "overlay_sculpt_curves_selection_clipped" :
+ "overlay_sculpt_curves_selection");
+ }
+ return sh_data->sculpt_curves_selection;
+}
+
struct GPUShader *OVERLAY_shader_uniform_color(void)
{
const DRWContextState *draw_ctx = DRW_context_state_get();
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
index 58f96110887..9396a6d3f2f 100644
--- a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh
@@ -293,7 +293,6 @@ GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_area)
.do_static_compilation(true)
.vertex_in(1, Type::FLOAT, "ratio")
.push_constant(Type::FLOAT, "totalAreaRatio")
- .push_constant(Type::FLOAT, "totalAreaRatioInv")
.additional_info("overlay_edit_uv_stretching");
GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_angle)
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
index 6f6a9c1622d..288fb3b3cbd 100644
--- a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh
@@ -29,6 +29,16 @@ GPU_SHADER_CREATE_INFO(overlay_outline_prepass_mesh_clipped)
GPU_SHADER_INTERFACE_INFO(overlay_outline_prepass_wire_iface, "vert").flat(Type::VEC3, "pos");
+GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves)
+ .do_static_compilation(true)
+ .vertex_source("overlay_outline_prepass_curves_vert.glsl")
+ .additional_info("draw_hair", "overlay_outline_prepass")
+ .additional_info("draw_object_infos");
+
+GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves_clipped)
+ .do_static_compilation(true)
+ .additional_info("overlay_outline_prepass_curves", "drw_clipped");
+
GPU_SHADER_CREATE_INFO(overlay_outline_prepass_wire)
.do_static_compilation(true)
.define("USE_GEOM")
diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh
new file mode 100644
index 00000000000..46e3943b293
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_INTERFACE_INFO(overlay_sculpt_curves_selection_iface, "")
+ .smooth(Type::FLOAT, "mask_weight");
+
+GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection)
+ .do_static_compilation(true)
+ .push_constant(Type::BOOL, "is_point_domain")
+ .push_constant(Type::FLOAT, "selection_opacity")
+ .sampler(0, ImageType::FLOAT_BUFFER, "selection_tx")
+ .vertex_out(overlay_sculpt_curves_selection_iface)
+ .vertex_source("overlay_sculpt_curves_selection_vert.glsl")
+ .fragment_source("overlay_sculpt_curves_selection_frag.glsl")
+ .fragment_out(0, Type::VEC4, "out_color")
+ .additional_info("draw_hair", "draw_globals");
+
+GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection_clipped)
+ .do_static_compilation(true)
+ .additional_info("overlay_sculpt_curves_selection", "drw_clipped");
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
index f28a809fdab..606292bbe83 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl
@@ -96,7 +96,7 @@ void main()
float dist_raw = texelFetch(lineTex, center_texel, 0).b;
float dist = decode_line_dist(dist_raw);
- /* TODO: Opti: use textureGather. */
+ /* TODO: Optimization: use textureGather. */
vec4 neightbor_col0 = texelFetchOffset(colorTex, center_texel, 0, ivec2(1, 0));
vec4 neightbor_col1 = texelFetchOffset(colorTex, center_texel, 0, ivec2(-1, 0));
vec4 neightbor_col2 = texelFetchOffset(colorTex, center_texel, 0, ivec2(0, 1));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
index 0a8e279e9b0..ca5a6aff2ca 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl
@@ -17,8 +17,8 @@ vec2 compute_dir(vec2 v0, vec2 v1, vec2 v2)
mat3 compute_mat(vec4 sphere, vec3 bone_vec, out float z_ofs)
{
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
- vec3 cam_ray = (is_persp) ? sphere.xyz - ViewMatrixInverse[3].xyz : -ViewMatrixInverse[2].xyz;
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
+ vec3 cam_ray = (is_persp) ? sphere.xyz - drw_view.viewinv[3].xyz : -drw_view.viewinv[2].xyz;
/* Sphere center distance from the camera (persp) in world space. */
float cam_dist = length(cam_ray);
@@ -88,13 +88,13 @@ vec3 get_outline_point(vec2 pos,
void main()
{
- float dst_head = distance(headSphere.xyz, ViewMatrixInverse[3].xyz);
- float dst_tail = distance(tailSphere.xyz, ViewMatrixInverse[3].xyz);
- // float dst_head = -dot(headSphere.xyz, ViewMatrix[2].xyz);
- // float dst_tail = -dot(tailSphere.xyz, ViewMatrix[2].xyz);
+ float dst_head = distance(headSphere.xyz, drw_view.viewinv[3].xyz);
+ float dst_tail = distance(tailSphere.xyz, drw_view.viewinv[3].xyz);
+ // float dst_head = -dot(headSphere.xyz, drw_view.viewmat[2].xyz);
+ // float dst_tail = -dot(tailSphere.xyz, drw_view.viewmat[2].xyz);
vec4 sph_near, sph_far;
- if ((dst_head > dst_tail) && (ProjectionMatrix[3][3] == 0.0)) {
+ if ((dst_head > dst_tail) && (drw_view.winmat[3][3] == 0.0)) {
sph_near = tailSphere;
sph_far = headSphere;
}
@@ -130,7 +130,7 @@ void main()
gl_Position = p1;
/* compute position from 3 vertex because the change in direction
- * can happen very quicky and lead to very thin edges. */
+ * can happen very quickly and lead to very thin edges. */
vec2 ss0 = proj(p0);
vec2 ss1 = proj(p1);
vec2 ss2 = proj(p2);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
index 2dd86a57dfd..4d21ffd96b5 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl
@@ -30,7 +30,7 @@ void main()
sp = bone_mat * sp.xzy + headSphere.xyz;
nor = bone_mat * nor.xzy;
- normalView = mat3(ViewMatrix) * nor;
+ normalView = mat3(drw_view.viewmat) * nor;
finalStateColor = stateColor;
finalBoneColor = boneColor;
@@ -38,5 +38,5 @@ void main()
view_clipping_distances(sp);
vec4 pos_4d = vec4(sp, 1.0);
- gl_Position = ViewProjectionMatrix * pos_4d;
+ gl_Position = drw_view.persmat * pos_4d;
}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
index 47c5dada708..b485b0a7807 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl
@@ -5,7 +5,7 @@ void main(void)
{
finalColor = vec4(geom_in[0].vColSize.rgb, 1.0);
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
vec3 view_vec = (is_persp) ? normalize(geom_in[1].vPos) : vec3(0.0, 0.0, -1.0);
vec3 v10 = geom_in[0].vPos - geom_in[1].vPos;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
index 29319b3f7ac..91eb6265192 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl
@@ -14,10 +14,10 @@ void main()
mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
vec4 world_pos = model_mat * vec4(pos, 1.0);
- vec4 view_pos = ViewMatrix * world_pos;
+ vec4 view_pos = drw_view.viewmat * world_pos;
geom_in.vPos = view_pos.xyz;
- geom_in.pPos = ProjectionMatrix * view_pos;
+ geom_in.pPos = drw_view.winmat * view_pos;
geom_in.inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
index cdbe8c3d7df..68f7e75673f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl
@@ -25,7 +25,7 @@ void main()
finalColor.a = 1.0;
vec4 world_pos = model_mat * vec4(pos, 1.0);
- gl_Position = ViewProjectionMatrix * world_pos;
+ gl_Position = drw_view.persmat * world_pos;
view_clipping_distances(world_pos.xyz);
}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
index 31369e0c3df..4d79fab718f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl
@@ -13,10 +13,10 @@ void main()
vec4 bone_color, state_color;
mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
- mat4 model_view_matrix = ViewMatrix * model_mat;
+ mat4 model_view_matrix = drw_view.viewmat * model_mat;
mat4 sphereMatrix = inverse(model_view_matrix);
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
/* This is the local space camera ray (not normalize).
* In perspective mode it's also the viewspace position
@@ -58,8 +58,8 @@ void main()
vec3 cam_pos0 = x_axis * pos.x + y_axis * pos.y + z_axis * z_ofs;
vec4 V = model_view_matrix * vec4(cam_pos0, 1.0);
- gl_Position = ProjectionMatrix * V;
- vec4 center = ProjectionMatrix * vec4(model_view_matrix[3].xyz, 1.0);
+ gl_Position = drw_view.winmat * V;
+ vec4 center = drw_view.winmat * vec4(model_view_matrix[3].xyz, 1.0);
/* Offset away from the center to avoid overlap with solid shape. */
vec2 ofs_dir = normalize(proj(gl_Position) - proj(center));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
index e60b6e94492..150701b78df 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl
@@ -5,7 +5,7 @@ void main()
{
const float sphere_radius = 0.05;
- bool is_perp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_perp = (drw_view.winmat[3][3] == 0.0);
vec3 ray_ori_view = (is_perp) ? vec3(0.0) : viewPosition.xyz;
vec3 ray_dir_view = (is_perp) ? viewPosition : vec3(0.0, 0.0, -1.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
index abbaad8cd10..3d2dfc018bb 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl
@@ -10,10 +10,10 @@ void main()
vec4 bone_color, state_color;
mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color);
- mat4 model_view_matrix = ViewMatrix * model_mat;
+ mat4 model_view_matrix = drw_view.viewmat * model_mat;
sphereMatrix = inverse(model_view_matrix);
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
/* This is the local space camera ray (not normalize).
* In perspective mode it's also the viewspace position
@@ -65,7 +65,7 @@ void main()
vec4 pos_4d = vec4(cam_pos, 1.0);
vec4 V = model_view_matrix * pos_4d;
- gl_Position = ProjectionMatrix * V;
+ gl_Position = drw_view.winmat * V;
viewPosition = V.xyz;
finalStateColor = state_color.xyz;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
index b5edcd2858b..e7917a46312 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl
@@ -31,12 +31,12 @@ void main()
vec4 boneStart_4d = vec4(boneStart, 1.0);
vec4 boneEnd_4d = vec4(boneEnd, 1.0);
- vec4 v0 = ViewMatrix * boneStart_4d;
- vec4 v1 = ViewMatrix * boneEnd_4d;
+ vec4 v0 = drw_view.viewmat * boneStart_4d;
+ vec4 v1 = drw_view.viewmat * boneEnd_4d;
/* Clip the bone to the camera origin plane (not the clip plane)
* to avoid glitches if one end is behind the camera origin (in persp). */
- float clip_dist = (ProjectionMatrix[3][3] == 0.0) ?
+ float clip_dist = (drw_view.winmat[3][3] == 0.0) ?
-1e-7 :
1e20; /* hardcoded, -1e-8 is giving gliches. */
vec3 bvec = v1.xyz - v0.xyz;
@@ -48,8 +48,8 @@ void main()
v1.xyz = clip_pt;
}
- vec4 p0 = ProjectionMatrix * v0;
- vec4 p1 = ProjectionMatrix * v1;
+ vec4 p0 = drw_view.winmat * v0;
+ vec4 p1 = drw_view.winmat * v1;
float h = (is_head) ? p0.w : p1.w;
@@ -58,7 +58,7 @@ void main()
/* 2D screen aligned pos at the point */
vec2 vpos = pos.x * x_screen_vec + pos.y * y_screen_vec;
- vpos *= (ProjectionMatrix[3][3] == 0.0) ? h : 1.0;
+ vpos *= (drw_view.winmat[3][3] == 0.0) ? h : 1.0;
vpos *= (do_wire) ? 1.0 : 0.5;
if (finalInnerColor.a > 0.0) {
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
index 6ff8d0665d1..dc5c43f417e 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl
@@ -45,7 +45,7 @@ void main()
if (gl_VertexID == 0) {
if (isConstantScreenSizeNormals) {
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
if (is_persp) {
float dist_fac = length(cameraPos - world_pos);
float cos_fac = dot(cameraForward, cameraVec(world_pos));
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
index f1fbdac7847..76a944c6987 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl
@@ -9,7 +9,7 @@ void main()
vec3 up = normalize(imat * screenVecs[1].xyz);
vec3 screen_pos = (right * pos.x + up * pos.z) * size;
vec4 pos_4d = ModelMatrix * vec4(local_pos + screen_pos, 1.0);
- gl_Position = ViewProjectionMatrix * pos_4d;
+ gl_Position = drw_view.persmat * pos_4d;
/* Manual stipple: one segment out of 2 is transparent. */
finalColor = ((gl_VertexID & 1) == 0) ? colorSkinRoot : vec4(0.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
index 374fb50af75..a50bc5e6e68 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl
@@ -74,7 +74,7 @@ void main()
finalColor = EDIT_MESH_facedot_color(norAndFlag.w);
/* Bias Facedot Z position in clipspace. */
- gl_Position.z -= (ProjectionMatrix[3][3] == 0.0) ? 0.00035 : 1e-6;
+ gl_Position.z -= (drw_view.winmat[3][3] == 0.0) ? 0.00035 : 1e-6;
gl_PointSize = sizeFaceDot;
bool occluded = test_occlusion();
@@ -87,7 +87,7 @@ void main()
/* Facing based color blend */
vec3 vpos = point_world_to_view(world_pos);
vec3 view_normal = normalize(normal_object_to_view(vnor) + 1e-4);
- vec3 view_vec = (ProjectionMatrix[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0);
+ vec3 view_vec = (drw_view.winmat[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0);
float facing = dot(view_vec, view_normal);
facing = 1.0 - abs(facing) * 0.2;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
index bb086e8d9f5..9a3036d5940 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl
@@ -55,9 +55,9 @@ float angle_normalized_v2v2(vec2 v1, vec2 v2)
return (q) ? a : M_PI - a;
}
-float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio)
+float area_ratio_to_stretch(float ratio, float tot_ratio)
{
- ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio;
+ ratio *= tot_ratio;
return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
}
@@ -74,7 +74,7 @@ void main()
stretch = stretch;
stretch = 1.0 - stretch * stretch;
#else
- float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio, totalAreaRatioInv);
+ float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio);
#endif
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
index b2578970c9b..acaf04219c0 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl
@@ -198,8 +198,8 @@ void main()
vec3 edge = obmat[3].xyz - world_pos;
vec3 n0 = normalize(cross(edge, p0 - world_pos));
vec3 n1 = normalize(cross(edge, world_pos - p1));
- bool persp = (ProjectionMatrix[3][3] == 0.0);
- vec3 V = (persp) ? normalize(ViewMatrixInverse[3].xyz - world_pos) : ViewMatrixInverse[2].xyz;
+ bool persp = (drw_view.winmat[3][3] == 0.0);
+ vec3 V = (persp) ? normalize(drw_view.viewinv[3].xyz - world_pos) : drw_view.viewinv[2].xyz;
/* Discard non-silhouette edges. */
bool facing0 = dot(n0, V) > 0.0;
bool facing1 = dot(n1, V) > 0.0;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
index 25f4984f119..54a4231590e 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl
@@ -53,7 +53,7 @@ void main()
P += cameraPos * plane_axes;
float dist, fade;
- bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ bool is_persp = drw_view.winmat[3][3] == 0.0;
if (is_persp) {
vec3 V = cameraPos - P;
dist = length(V);
@@ -83,7 +83,7 @@ void main()
dist = 1.0; /* Avoid branch after. */
if (flag_test(grid_flag, PLANE_XY)) {
- float angle = 1.0 - abs(ViewMatrixInverse[2].z);
+ float angle = 1.0 - abs(drw_view.viewinv[2].z);
dist = 1.0 + angle * 2.0;
angle *= angle;
fade *= 1.0 - angle * angle;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
index b81f1a24358..b43b1eb4a52 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl
@@ -39,5 +39,5 @@ void main()
local_pos.z = clamp(local_pos.z, -1.0, 0.0);
}
- gl_Position = ViewProjectionMatrix * vec4(real_pos, 1.0);
+ gl_Position = drw_view.persmat * vec4(real_pos, 1.0);
}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
index 29346a44863..25e13e7c212 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl
@@ -15,7 +15,7 @@ void main(void)
vec2 edge_dir = compute_dir(interp_in[0].ss_pos, interp_in[1].ss_pos) *
drw_view.viewport_size_inverse;
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
float line_size = float(lineThickness) * sizePixel;
view_clipping_distances_set(gl_in[0]);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
index bc74a436f5e..e6281f75b8f 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl
@@ -18,7 +18,7 @@ vec2 proj(vec4 pos)
void main()
{
- gl_Position = ViewProjectionMatrix * vec4(pos, 1.0);
+ gl_Position = drw_view.persmat * vec4(pos, 1.0);
interp.ss_pos = proj(gl_Position);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
index 5027525b9b3..70892954cd8 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl
@@ -9,7 +9,7 @@
void main()
{
- gl_Position = ViewProjectionMatrix * vec4(pos, 1.0);
+ gl_Position = drw_view.persmat * vec4(pos, 1.0);
gl_PointSize = float(pointSize + 2);
int frame = gl_VertexID + cacheStart;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl
new file mode 100644
index 00000000000..f9ec475d21f
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl
@@ -0,0 +1,81 @@
+
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+
+uint outline_colorid_get(void)
+{
+ int flag = int(abs(ObjectInfo.w));
+ bool is_active = (flag & DRW_BASE_ACTIVE) != 0;
+
+ if (isTransform) {
+ return 0u; /* colorTransform */
+ }
+ else if (is_active) {
+ return 3u; /* colorActive */
+ }
+ else {
+ return 1u; /* colorSelect */
+ }
+
+ return 0u;
+}
+
+/* Replace top 2 bits (of the 16bit output) by outlineId.
+ * This leaves 16K different IDs to create outlines between objects.
+ vec3 world_pos = point_object_to_world(pos);
+ * SHIFT = (32 - (16 - 2)) */
+#define SHIFT 18u
+
+void main()
+{
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
+ float time, thickness;
+ vec3 center_wpos, tan, binor;
+
+ hair_get_center_pos_tan_binor_time(is_persp,
+ ModelMatrixInverse,
+ drw_view.viewinv[3].xyz,
+ drw_view.viewinv[2].xyz,
+ center_wpos,
+ tan,
+ binor,
+ time,
+ thickness);
+ vec3 world_pos;
+ if (hairThicknessRes > 1) {
+ /* Calculate the thickness, thicktime, worldpos taken into account the outline. */
+ float outline_width = point_world_to_ndc(center_wpos).w * 1.25 *
+ drw_view.viewport_size_inverse.y * drw_view.wininv[1][1];
+ thickness += outline_width;
+ float thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1);
+ thick_time = thickness * (thick_time * 2.0 - 1.0);
+ /* Take object scale into account.
+ * NOTE: This only works fine with uniform scaling. */
+ float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor);
+ world_pos = center_wpos + binor * thick_time * scale;
+ }
+ else {
+ world_pos = center_wpos;
+ }
+
+ gl_Position = point_world_to_ndc(world_pos);
+
+#ifdef USE_GEOM
+ vert.pos = point_world_to_view(world_pos);
+#endif
+
+ /* Small bias to always be on top of the geom. */
+ gl_Position.z -= 1e-3;
+
+ /* ID 0 is nothing (background) */
+ interp.ob_id = uint(resource_handle + 1);
+
+ /* Should be 2 bits only [0..3]. */
+ uint outline_id = outline_colorid_get();
+
+ /* Combine for 16bit uint target. */
+ interp.ob_id = (outline_id << 14u) | ((interp.ob_id << SHIFT) >> SHIFT);
+
+ view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
index 8a196620af9..5e0074e9f0b 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl
@@ -11,7 +11,7 @@ void vert_from_gl_in(int v)
void main()
{
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
vec3 view_vec = (is_persp) ? normalize(vert[1].pos) : vec3(0.0, 0.0, -1.0);
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
index b6d5cd96c12..92be9ec3bcb 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl
@@ -22,14 +22,14 @@ void main()
if (!gpStrokeOrder3d) {
/* Stroke order 2D. Project to gpDepthPlane. */
- bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ bool is_persp = drw_view.winmat[3][3] == 0.0;
vec2 uvs = vec2(gl_FragCoord.xy) * drw_view.viewport_size_inverse;
vec3 pos_ndc = vec3(uvs, gl_FragCoord.z) * 2.0 - 1.0;
- vec4 pos_world = ViewProjectionMatrixInverse * vec4(pos_ndc, 1.0);
+ vec4 pos_world = drw_view.persinv * vec4(pos_ndc, 1.0);
vec3 pos = pos_world.xyz / pos_world.w;
vec3 ray_ori = pos;
- vec3 ray_dir = (is_persp) ? (ViewMatrixInverse[3].xyz - pos) : ViewMatrixInverse[2].xyz;
+ vec3 ray_dir = (is_persp) ? (drw_view.viewinv[3].xyz - pos) : drw_view.viewinv[2].xyz;
vec3 isect = ray_plane_intersection(ray_ori, ray_dir, gpDepthPlane);
vec4 ndc = point_world_to_ndc(isect);
gl_FragDepth = (ndc.z / ndc.w) * 0.5 + 0.5;
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
index fb981a8167a..c48e7cce550 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl
@@ -23,7 +23,7 @@ void main()
#ifdef USE_DOTS
gl_Position = point_world_to_ndc(world_pos);
/* World sized points. */
- gl_PointSize = sizePixel * draw_size * ProjectionMatrix[1][1] * sizeViewport.y / gl_Position.w;
+ gl_PointSize = sizePixel * draw_size * drw_view.winmat[1][1] * sizeViewport.y / gl_Position.w;
#else
if ((vclass & VCLASS_SCREENALIGNED) != 0) {
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl
new file mode 100644
index 00000000000..7af6bdb9fdb
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl
@@ -0,0 +1,5 @@
+
+void main()
+{
+ out_color = vec4(vec3(0.0), 1.0 - mask_weight);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl
new file mode 100644
index 00000000000..7be3c8e6dfb
--- /dev/null
+++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl
@@ -0,0 +1,34 @@
+#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+float retrieve_selection()
+{
+ if (is_point_domain) {
+ return texelFetch(selection_tx, hair_get_base_id()).r;
+ }
+ return texelFetch(selection_tx, hair_get_strand_id()).r;
+}
+
+void main()
+{
+ bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ float time, thick_time, thickness;
+ vec3 world_pos, tan, binor;
+ hair_get_pos_tan_binor_time(is_persp,
+ ModelMatrixInverse,
+ ViewMatrixInverse[3].xyz,
+ ViewMatrixInverse[2].xyz,
+ world_pos,
+ tan,
+ binor,
+ time,
+ thickness,
+ thick_time);
+
+ gl_Position = point_world_to_ndc(world_pos);
+
+ mask_weight = 1.0 - (selection_opacity - retrieve_selection() * selection_opacity);
+
+ view_clipping_distances(world_pos);
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
index 0a498471b46..e1a4a3602e3 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl
@@ -1,4 +1,4 @@
void main()
{
fragColor = color;
-} \ No newline at end of file
+}
diff --git a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
index 41bd7791dd7..d189ab1b72c 100644
--- a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
+++ b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl
@@ -72,7 +72,7 @@ void wire_object_color_get(out vec3 rim_col, out vec3 wire_col)
void main()
{
bool no_attr = all(equal(nor, vec3(0)));
- vec3 wnor = no_attr ? ViewMatrixInverse[2].xyz : normalize(normal_object_to_world(nor));
+ vec3 wnor = no_attr ? drw_view.viewinv[2].xyz : normalize(normal_object_to_world(nor));
vec3 wpos = point_object_to_world(pos);
if (isHair) {
@@ -81,8 +81,8 @@ void main()
wnor = -normalize(mat3(obmat) * nor);
}
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
- vec3 V = (is_persp) ? normalize(ViewMatrixInverse[3].xyz - wpos) : ViewMatrixInverse[2].xyz;
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
+ vec3 V = (is_persp) ? normalize(drw_view.viewinv[3].xyz - wpos) : drw_view.viewinv[2].xyz;
float facing = dot(wnor, V);
diff --git a/source/blender/draw/engines/select/select_engine.c b/source/blender/draw/engines/select/select_engine.c
index 88ae5ac707e..026a1f52ac1 100644
--- a/source/blender/draw/engines/select/select_engine.c
+++ b/source/blender/draw/engines/select/select_engine.c
@@ -201,7 +201,7 @@ static void select_cache_populate(void *vedata, Object *ob)
if (!e_data.context.is_dirty && sel_data && sel_data->is_drawn) {
/* The object indices have already been drawn. Fill depth pass.
- * Opti: Most of the time this depth pass is not used. */
+ * Optimization: Most of the time this depth pass is not used. */
struct Mesh *me = ob->data;
if (e_data.context.select_mode & SCE_SELECT_FACE) {
struct GPUBatch *geom_faces = DRW_mesh_batch_cache_get_triangles_with_select_id(me);
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
index 880f17b0c9d..e7ca868a4ff 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl
@@ -31,9 +31,9 @@ void cavity_compute(vec2 screenco,
/* find the offset in screen space by multiplying a point
* in camera space at the depth of the point by the projection matrix. */
vec2 offset;
- float homcoord = ProjectionMatrix[2][3] * position.z + ProjectionMatrix[3][3];
- offset.x = ProjectionMatrix[0][0] * world_data.cavity_distance / homcoord;
- offset.y = ProjectionMatrix[1][1] * world_data.cavity_distance / homcoord;
+ float homcoord = drw_view.winmat[2][3] * position.z + drw_view.winmat[3][3];
+ offset.x = drw_view.winmat[0][0] * world_data.cavity_distance / homcoord;
+ offset.y = drw_view.winmat[1][1] * world_data.cavity_distance / homcoord;
/* convert from -1.0...1.0 range to 0.0..1.0 for easy use with texture coordinates */
offset *= 0.5;
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
index d8f8a1cc03f..11d7c85d43a 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl
@@ -16,7 +16,7 @@
(dof_aperturesize * (dof_distance / zdepth - 1.0) * dof_invsensorsize)
#define linear_depth(z) \
- ((ProjectionMatrix[3][3] == 0.0) ? \
+ ((drw_view.winmat[3][3] == 0.0) ? \
(nearFar.x * nearFar.y) / (z * (nearFar.x - nearFar.y) + nearFar.y) : \
(z * 2.0 - 1.0) * nearFar.y)
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
index cfc94ef7c9a..04fef8d8b32 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl
@@ -43,13 +43,13 @@ void workbench_hair_random_material(float rand,
void main()
{
- bool is_persp = (ProjectionMatrix[3][3] == 0.0);
+ bool is_persp = (drw_view.winmat[3][3] == 0.0);
float time, thick_time, thickness;
vec3 world_pos, tan, binor;
hair_get_pos_tan_binor_time(is_persp,
ModelMatrixInverse,
- ViewMatrixInverse[3].xyz,
- ViewMatrixInverse[2].xyz,
+ drw_view.viewinv[3].xyz,
+ drw_view.viewinv[2].xyz,
world_pos,
tan,
binor,
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
index d8f1b83d747..213279b1913 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl
@@ -24,7 +24,7 @@ float linear_zdepth(float depth, vec4 viewvecs[2], mat4 proj_mat)
*/
float calculate_transparent_weight(void)
{
- float z = linear_zdepth(gl_FragCoord.z, ViewVecs, ProjectionMatrix);
+ float z = linear_zdepth(gl_FragCoord.z, drw_view.viewvecs, drw_view.winmat);
#if 0
/* Eq 10 : Good for surfaces with varying opacity (like particles) */
float a = min(1.0, alpha * 10.0) + 0.01;
diff --git a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
index 49e26cd3e0c..afba3a0d784 100644
--- a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl
@@ -237,7 +237,7 @@ void main()
fragColor = vec4(Lscat, Tr);
#else
vec2 screen_uv = gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0).xy);
- bool is_persp = ProjectionMatrix[3][3] == 0.0;
+ bool is_persp = drw_view.winmat[3][3] == 0.0;
vec3 volume_center = ModelMatrix[3].xyz;
diff --git a/source/blender/draw/engines/workbench/workbench_engine.c b/source/blender/draw/engines/workbench/workbench_engine.c
index 9eb35c25bf4..a0459a967f3 100644
--- a/source/blender/draw/engines/workbench/workbench_engine.c
+++ b/source/blender/draw/engines/workbench/workbench_engine.c
@@ -409,7 +409,7 @@ void workbench_cache_populate(void *ved, Object *ob)
return;
}
- if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL, OB_POINTCLOUD)) {
+ if (ELEM(ob->type, OB_MESH, OB_POINTCLOUD)) {
bool use_sculpt_pbvh, use_texpaint_mode, draw_shadow, has_transp_mat = false;
eV3DShadingColorType color_type = workbench_color_type_get(
wpd, ob, &use_sculpt_pbvh, &use_texpaint_mode, &draw_shadow);
diff --git a/source/blender/draw/engines/workbench/workbench_render.c b/source/blender/draw/engines/workbench/workbench_render.c
index e5dcf6c5624..931f6a2dc92 100644
--- a/source/blender/draw/engines/workbench/workbench_render.c
+++ b/source/blender/draw/engines/workbench/workbench_render.c
@@ -17,6 +17,7 @@
#include "ED_view3d.h"
+#include "GPU_context.h"
#include "GPU_shader.h"
#include "DEG_depsgraph.h"
@@ -188,6 +189,10 @@ void workbench_render(void *ved, RenderEngine *engine, RenderLayer *render_layer
workbench_draw_finish(data);
+ /* Perform render step between samples to allow
+ * flushing of freed GPUBackend resources. */
+ GPU_render_step();
+
/* Write render output. */
const char *viewname = RE_GetActiveRenderView(engine->re);
RenderPass *rp = RE_pass_find_by_name(render_layer, RE_PASSNAME_COMBINED, viewname);