diff options
Diffstat (limited to 'source/blender/draw')
299 files changed, 22316 insertions, 3931 deletions
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 9cb3743dd02..e6b532ed25a 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -23,11 +23,11 @@ set(INC ../nodes ../render ../render/intern + ../compositor/realtime_compositor ../windowmanager ../../../intern/atomic ../../../intern/clog - ../../../intern/glew-mx ../../../intern/guardedalloc ../../../intern/opensubdiv @@ -67,28 +67,27 @@ set(SRC intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc intern/mesh_extractors/extract_mesh_vbo_tan.cc intern/mesh_extractors/extract_mesh_vbo_uv.cc - intern/mesh_extractors/extract_mesh_vbo_vcol.cc intern/mesh_extractors/extract_mesh_vbo_weights.cc intern/draw_attributes.cc intern/draw_cache_impl_curve.cc intern/draw_cache_impl_curves.cc - intern/draw_cache_impl_displist.c intern/draw_cache_impl_gpencil.c intern/draw_cache_impl_lattice.c intern/draw_cache_impl_mesh.cc - intern/draw_cache_impl_metaball.c intern/draw_cache_impl_particles.c - intern/draw_cache_impl_pointcloud.c + intern/draw_cache_impl_pointcloud.cc intern/draw_cache_impl_subdivision.cc intern/draw_cache_impl_volume.c intern/draw_color_management.cc + intern/draw_command.cc intern/draw_common.c intern/draw_curves.cc - intern/draw_debug.c + intern/draw_debug.cc intern/draw_fluid.c intern/draw_hair.cc intern/draw_instance_data.c intern/draw_manager.c + intern/draw_manager.cc intern/draw_manager_data.c intern/draw_manager_exec.c intern/draw_manager_profiling.c @@ -104,6 +103,7 @@ set(SRC intern/smaa_textures.c engines/basic/basic_engine.c engines/basic/basic_shader.c + engines/compositor/compositor_engine.cc engines/image/image_engine.cc engines/image/image_shader.cc engines/eevee/eevee_bloom.c @@ -135,10 +135,18 @@ set(SRC engines/eevee/eevee_temporal_sampling.c engines/eevee/eevee_volumes.c engines/eevee_next/eevee_camera.cc + engines/eevee_next/eevee_cryptomatte.cc + engines/eevee_next/eevee_depth_of_field.cc engines/eevee_next/eevee_engine.cc + engines/eevee_next/eevee_film.cc + engines/eevee_next/eevee_hizbuffer.cc engines/eevee_next/eevee_instance.cc + engines/eevee_next/eevee_light.cc engines/eevee_next/eevee_material.cc + engines/eevee_next/eevee_motion_blur.cc engines/eevee_next/eevee_pipeline.cc + engines/eevee_next/eevee_renderbuffers.cc + engines/eevee_next/eevee_sampling.cc engines/eevee_next/eevee_shader.cc engines/eevee_next/eevee_sync.cc engines/eevee_next/eevee_velocity.cc @@ -191,6 +199,7 @@ set(SRC engines/overlay/overlay_paint.c engines/overlay/overlay_particle.c engines/overlay/overlay_sculpt.c + engines/overlay/overlay_sculpt_curves.cc engines/overlay/overlay_shader.c engines/overlay/overlay_volume.c engines/overlay/overlay_wireframe.c @@ -205,31 +214,58 @@ set(SRC intern/draw_cache_impl.h intern/draw_cache_inline.h intern/draw_color_management.h + intern/draw_command.hh intern/draw_common.h intern/draw_common_shader_shared.h intern/draw_curves_private.h intern/draw_debug.h + intern/draw_debug.hh intern/draw_hair_private.h + intern/draw_handle.hh intern/draw_instance_data.h intern/draw_manager.h + intern/draw_manager.hh intern/draw_manager_profiling.h intern/draw_manager_testing.h intern/draw_manager_text.h + intern/draw_pass.hh + intern/draw_resource.cc + intern/draw_resource.hh intern/draw_shader.h intern/draw_shader_shared.h + intern/draw_state.h intern/draw_subdivision.h intern/draw_texture_pool.h + intern/draw_view.cc intern/draw_view.h + intern/draw_view.hh intern/draw_view_data.h intern/mesh_extractors/extract_mesh.hh intern/smaa_textures.h engines/basic/basic_engine.h engines/basic/basic_private.h + engines/compositor/compositor_engine.h engines/eevee/eevee_engine.h engines/eevee/eevee_lightcache.h engines/eevee/eevee_lut.h engines/eevee/eevee_private.h + engines/eevee_next/eevee_camera.hh + engines/eevee_next/eevee_depth_of_field.hh engines/eevee_next/eevee_engine.h + engines/eevee_next/eevee_film.hh + engines/eevee_next/eevee_hizbuffer.hh + engines/eevee_next/eevee_instance.hh + engines/eevee_next/eevee_light.hh + engines/eevee_next/eevee_material.hh + engines/eevee_next/eevee_motion_blur.hh + engines/eevee_next/eevee_pipeline.hh + engines/eevee_next/eevee_renderbuffers.hh + engines/eevee_next/eevee_sampling.hh + engines/eevee_next/eevee_shader.hh + engines/eevee_next/eevee_sync.hh + engines/eevee_next/eevee_velocity.hh + engines/eevee_next/eevee_view.hh + engines/eevee_next/eevee_world.hh engines/external/external_engine.h engines/image/image_batches.hh engines/image/image_buffer_cache.hh @@ -256,6 +292,7 @@ set(SRC set(LIB bf_blenkernel bf_blenlib + bf_realtime_compositor bf_windowmanager ) @@ -336,6 +373,7 @@ set(GLSL_SRC engines/eevee/shaders/raytrace_lib.glsl engines/eevee/shaders/renderpass_lib.glsl engines/eevee/shaders/renderpass_postprocess_frag.glsl + engines/eevee/shaders/cryptomatte_lib.glsl engines/eevee/shaders/cryptomatte_frag.glsl engines/eevee/shaders/cryptomatte_vert.glsl engines/eevee/shaders/ltc_lib.glsl @@ -350,6 +388,7 @@ set(GLSL_SRC engines/eevee/shaders/volumetric_frag.glsl engines/eevee/shaders/volumetric_geom.glsl engines/eevee/shaders/volumetric_vert.glsl + engines/eevee/shaders/volumetric_resolve_comp.glsl engines/eevee/shaders/volumetric_resolve_frag.glsl engines/eevee/shaders/volumetric_scatter_frag.glsl engines/eevee/shaders/volumetric_integration_frag.glsl @@ -357,18 +396,54 @@ set(GLSL_SRC engines/eevee_next/shaders/eevee_attributes_lib.glsl engines/eevee_next/shaders/eevee_camera_lib.glsl + engines/eevee_next/shaders/eevee_colorspace_lib.glsl + engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl + engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl + engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl + engines/eevee_next/shaders/eevee_film_comp.glsl + engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl + engines/eevee_next/shaders/eevee_film_frag.glsl + engines/eevee_next/shaders/eevee_film_lib.glsl engines/eevee_next/shaders/eevee_geom_curves_vert.glsl engines/eevee_next/shaders/eevee_geom_gpencil_vert.glsl engines/eevee_next/shaders/eevee_geom_mesh_vert.glsl engines/eevee_next/shaders/eevee_geom_world_vert.glsl + engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl + engines/eevee_next/shaders/eevee_hiz_update_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl + engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl + engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl + engines/eevee_next/shaders/eevee_light_eval_lib.glsl + engines/eevee_next/shaders/eevee_light_iter_lib.glsl + engines/eevee_next/shaders/eevee_light_lib.glsl + engines/eevee_next/shaders/eevee_ltc_lib.glsl + engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl + engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl + engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl + engines/eevee_next/shaders/eevee_motion_blur_lib.glsl engines/eevee_next/shaders/eevee_nodetree_lib.glsl + engines/eevee_next/shaders/eevee_sampling_lib.glsl engines/eevee_next/shaders/eevee_surf_deferred_frag.glsl engines/eevee_next/shaders/eevee_surf_depth_frag.glsl engines/eevee_next/shaders/eevee_surf_forward_frag.glsl engines/eevee_next/shaders/eevee_surf_lib.glsl engines/eevee_next/shaders/eevee_surf_world_frag.glsl engines/eevee_next/shaders/eevee_velocity_lib.glsl - engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl engines/eevee_next/eevee_defines.hh engines/eevee_next/eevee_shader_shared.hh @@ -403,22 +478,25 @@ set(GLSL_SRC engines/workbench/workbench_shader_shared.h + intern/shaders/common_aabb_lib.glsl intern/shaders/common_attribute_lib.glsl intern/shaders/common_colormanagement_lib.glsl + intern/shaders/common_debug_draw_lib.glsl + intern/shaders/common_debug_print_lib.glsl + intern/shaders/common_debug_shape_lib.glsl + intern/shaders/common_fullscreen_vert.glsl + intern/shaders/common_fxaa_lib.glsl intern/shaders/common_globals_lib.glsl intern/shaders/common_gpencil_lib.glsl - intern/shaders/common_pointcloud_lib.glsl intern/shaders/common_hair_lib.glsl - intern/shaders/common_hair_refine_vert.glsl intern/shaders/common_hair_refine_comp.glsl - intern/shaders/common_math_lib.glsl + intern/shaders/common_hair_refine_vert.glsl + intern/shaders/common_intersect_lib.glsl intern/shaders/common_math_geom_lib.glsl - intern/shaders/common_view_clipping_lib.glsl - intern/shaders/common_view_lib.glsl - intern/shaders/common_fxaa_lib.glsl + intern/shaders/common_math_lib.glsl + intern/shaders/common_pointcloud_lib.glsl + intern/shaders/common_shape_lib.glsl intern/shaders/common_smaa_lib.glsl - intern/shaders/common_fullscreen_vert.glsl - intern/shaders/common_subdiv_custom_data_interp_comp.glsl intern/shaders/common_subdiv_ibo_lines_comp.glsl intern/shaders/common_subdiv_ibo_tris_comp.glsl @@ -431,8 +509,20 @@ set(GLSL_SRC intern/shaders/common_subdiv_vbo_edituv_strech_area_comp.glsl intern/shaders/common_subdiv_vbo_lnor_comp.glsl intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl - + intern/shaders/common_view_clipping_lib.glsl + intern/shaders/common_view_lib.glsl + intern/shaders/draw_command_generate_comp.glsl + intern/shaders/draw_debug_draw_display_frag.glsl + intern/shaders/draw_debug_draw_display_vert.glsl + intern/shaders/draw_debug_info.hh + intern/shaders/draw_debug_print_display_frag.glsl + intern/shaders/draw_debug_print_display_vert.glsl + intern/shaders/draw_resource_finalize_comp.glsl + intern/shaders/draw_visibility_comp.glsl + + intern/draw_command_shared.hh intern/draw_common_shader_shared.h + intern/draw_defines.h intern/draw_shader_shared.h engines/gpencil/shaders/gpencil_frag.glsl @@ -454,6 +544,7 @@ set(GLSL_SRC engines/basic/shaders/basic_conservative_depth_geom.glsl engines/basic/shaders/basic_depth_vert.glsl + engines/basic/shaders/basic_depth_curves_vert.glsl engines/basic/shaders/basic_depth_pointcloud_vert.glsl engines/basic/shaders/basic_depth_frag.glsl @@ -531,6 +622,7 @@ set(GLSL_SRC engines/overlay/shaders/overlay_motion_path_line_vert.glsl engines/overlay/shaders/overlay_motion_path_point_vert.glsl engines/overlay/shaders/overlay_outline_detect_frag.glsl + engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl engines/overlay/shaders/overlay_outline_prepass_frag.glsl engines/overlay/shaders/overlay_outline_prepass_geom.glsl engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl @@ -550,6 +642,8 @@ set(GLSL_SRC engines/overlay/shaders/overlay_particle_vert.glsl engines/overlay/shaders/overlay_point_varying_color_frag.glsl engines/overlay/shaders/overlay_point_varying_color_varying_outline_aa_frag.glsl + engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl + engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl engines/overlay/shaders/overlay_sculpt_mask_frag.glsl engines/overlay/shaders/overlay_sculpt_mask_vert.glsl engines/overlay/shaders/overlay_uniform_color_frag.glsl @@ -633,6 +727,7 @@ add_dependencies(bf_draw bf_dna) if(WITH_GTESTS) if(WITH_OPENGL_DRAW_TESTS) set(TEST_SRC + tests/draw_pass_test.cc tests/draw_testing.cc tests/shaders_test.cc diff --git a/source/blender/draw/engines/basic/basic_engine.c b/source/blender/draw/engines/basic/basic_engine.c index 04a3c27959d..975d9e299bf 100644 --- a/source/blender/draw/engines/basic/basic_engine.c +++ b/source/blender/draw/engines/basic/basic_engine.c @@ -53,6 +53,7 @@ typedef struct BASIC_PrivateData { DRWShadingGroup *depth_shgrp[2]; DRWShadingGroup *depth_shgrp_cull[2]; DRWShadingGroup *depth_hair_shgrp[2]; + DRWShadingGroup *depth_curves_shgrp[2]; DRWShadingGroup *depth_pointcloud_shgrp[2]; bool use_material_slot_selection; } BASIC_PrivateData; /* Transient data */ @@ -99,6 +100,9 @@ static void basic_cache_init(void *vedata) stl->g_data->depth_hair_shgrp[i] = grp = DRW_shgroup_create( BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]); + stl->g_data->depth_curves_shgrp[i] = grp = DRW_shgroup_create( + BASIC_shaders_curves_depth_sh_get(draw_ctx->sh_cfg), psl->depth_pass[i]); + sh = DRW_state_is_select() ? BASIC_shaders_depth_conservative_sh_get(draw_ctx->sh_cfg) : BASIC_shaders_depth_sh_get(draw_ctx->sh_cfg); state |= DRW_STATE_CULL_BACK; @@ -156,8 +160,12 @@ static void basic_cache_populate(void *vedata, Object *ob) basic_cache_populate_particles(vedata, ob); } - /* Make flat object selectable in ortho view if wireframe is enabled. */ const bool do_in_front = (ob->dtx & OB_DRAW_IN_FRONT) != 0; + if (ob->type == OB_CURVES) { + DRW_shgroup_curves_create_sub(ob, stl->g_data->depth_curves_shgrp[do_in_front], NULL); + } + + /* Make flat object selectable in ortho view if wireframe is enabled. */ if ((draw_ctx->v3d->overlay.flag & V3D_OVERLAY_WIREFRAMES) || (draw_ctx->v3d->shading.type == OB_WIRE) || (ob->dtx & OB_DRAWWIRE) || (ob->dt == OB_WIRE)) { int flat_axis = 0; diff --git a/source/blender/draw/engines/basic/basic_private.h b/source/blender/draw/engines/basic/basic_private.h index 22b458baca2..197831b9ee8 100644 --- a/source/blender/draw/engines/basic/basic_private.h +++ b/source/blender/draw/engines/basic/basic_private.h @@ -11,6 +11,7 @@ extern "C" { GPUShader *BASIC_shaders_depth_sh_get(eGPUShaderConfig config); GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config); +GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config); GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config); GPUShader *BASIC_shaders_pointcloud_depth_conservative_sh_get(eGPUShaderConfig config); void BASIC_shaders_free(void); diff --git a/source/blender/draw/engines/basic/basic_shader.c b/source/blender/draw/engines/basic/basic_shader.c index 3d40c627fff..5b7636ca9fd 100644 --- a/source/blender/draw/engines/basic/basic_shader.c +++ b/source/blender/draw/engines/basic/basic_shader.c @@ -24,6 +24,7 @@ typedef struct BASIC_Shaders { /* Depth Pre Pass */ struct GPUShader *depth; struct GPUShader *pointcloud_depth; + struct GPUShader *curves_depth; struct GPUShader *depth_conservative; struct GPUShader *pointcloud_depth_conservative; } BASIC_Shaders; @@ -53,6 +54,16 @@ GPUShader *BASIC_shaders_pointcloud_depth_sh_get(eGPUShaderConfig config) return sh_data->pointcloud_depth; } +GPUShader *BASIC_shaders_curves_depth_sh_get(eGPUShaderConfig config) +{ + BASIC_Shaders *sh_data = &e_data.sh_data[config]; + if (sh_data->curves_depth == NULL) { + sh_data->curves_depth = GPU_shader_create_from_info_name( + config == GPU_SHADER_CFG_CLIPPED ? "basic_depth_curves_clipped" : "basic_depth_curves"); + } + return sh_data->curves_depth; +} + GPUShader *BASIC_shaders_depth_conservative_sh_get(eGPUShaderConfig config) { BASIC_Shaders *sh_data = &e_data.sh_data[config]; diff --git a/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl new file mode 100644 index 00000000000..b0da9754fc6 --- /dev/null +++ b/source/blender/draw/engines/basic/shaders/basic_depth_curves_vert.glsl @@ -0,0 +1,27 @@ + +#pragma BLENDER_REQUIRE(common_hair_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) + +void main() +{ + GPU_INTEL_VERTEX_SHADER_WORKAROUND + + bool is_persp = (ProjectionMatrix[3][3] == 0.0); + float time, thick_time, thickness; + vec3 world_pos, tan, binor; + hair_get_pos_tan_binor_time(is_persp, + ModelMatrixInverse, + ViewMatrixInverse[3].xyz, + ViewMatrixInverse[2].xyz, + world_pos, + tan, + binor, + time, + thickness, + thick_time); + + gl_Position = point_world_to_ndc(world_pos); + + view_clipping_distances(world_pos); +} diff --git a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh index bae50eb48fa..561cef0e442 100644 --- a/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh +++ b/source/blender/draw/engines/basic/shaders/infos/basic_depth_info.hh @@ -27,6 +27,9 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud) .vertex_source("basic_depth_pointcloud_vert.glsl") .additional_info("draw_pointcloud"); +GPU_SHADER_CREATE_INFO(basic_curves) + .vertex_source("basic_depth_curves_vert.glsl") + .additional_info("draw_hair"); /** \} */ /* -------------------------------------------------------------------- */ @@ -46,7 +49,8 @@ GPU_SHADER_CREATE_INFO(basic_pointcloud) #define BASIC_OBTYPE_VARIATIONS(prefix, ...) \ BASIC_CONSERVATIVE_VARIATIONS(prefix##_mesh, "basic_mesh", __VA_ARGS__) \ - BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__) + BASIC_CONSERVATIVE_VARIATIONS(prefix##_pointcloud, "basic_pointcloud", __VA_ARGS__) \ + BASIC_CLIPPING_VARIATIONS(prefix##_curves, "basic_curves", __VA_ARGS__) /** \} */ diff --git a/source/blender/draw/engines/compositor/compositor_engine.cc b/source/blender/draw/engines/compositor/compositor_engine.cc new file mode 100644 index 00000000000..f36a59a4ce6 --- /dev/null +++ b/source/blender/draw/engines/compositor/compositor_engine.cc @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_listbase.h" +#include "BLI_math_vec_types.hh" +#include "BLI_string_ref.hh" +#include "BLI_utildefines.h" + +#include "BLT_translation.h" + +#include "DNA_ID_enums.h" +#include "DNA_scene_types.h" + +#include "DEG_depsgraph_query.h" + +#include "DRW_render.h" + +#include "IMB_colormanagement.h" + +#include "COM_context.hh" +#include "COM_evaluator.hh" +#include "COM_texture_pool.hh" + +#include "GPU_texture.h" + +namespace blender::draw::compositor { + +class TexturePool : public realtime_compositor::TexturePool { + public: + GPUTexture *allocate_texture(int2 size, eGPUTextureFormat format) override + { + DrawEngineType *owner = (DrawEngineType *)this; + return DRW_texture_pool_query_2d(size.x, size.y, format, owner); + } +}; + +class Context : public realtime_compositor::Context { + private: + /* A pointer to the info message of the compositor engine. This is a char array of size + * GPU_INFO_SIZE. The message is cleared prior to updating or evaluating the compositor. */ + char *info_message_; + + public: + Context(realtime_compositor::TexturePool &texture_pool, char *info_message) + : realtime_compositor::Context(texture_pool), info_message_(info_message) + { + } + + const Scene *get_scene() const override + { + return DRW_context_state_get()->scene; + } + + int2 get_output_size() override + { + return int2(float2(DRW_viewport_size_get())); + } + + GPUTexture *get_output_texture() override + { + return DRW_viewport_texture_list_get()->color; + } + + GPUTexture *get_input_texture(int UNUSED(view_layer), eScenePassType UNUSED(pass_type)) override + { + return get_output_texture(); + } + + StringRef get_view_name() override + { + const SceneRenderView *view = static_cast<SceneRenderView *>( + BLI_findlink(&get_scene()->r.views, DRW_context_state_get()->v3d->multiview_eye)); + return view->name; + } + + void set_info_message(StringRef message) const override + { + message.copy(info_message_, GPU_INFO_SIZE); + } +}; + +class Engine { + private: + TexturePool texture_pool_; + Context context_; + realtime_compositor::Evaluator evaluator_; + /* Stores the viewport size at the time the last compositor evaluation happened. See the + * update_viewport_size method for more information. */ + int2 last_viewport_size_; + + public: + Engine(char *info_message) + : context_(texture_pool_, info_message), + evaluator_(context_, node_tree()), + last_viewport_size_(context_.get_output_size()) + { + } + + /* Update the viewport size and evaluate the compositor. */ + void draw() + { + update_viewport_size(); + evaluator_.evaluate(); + } + + /* If the size of the viewport changed from the last time the compositor was evaluated, update + * the viewport size and reset the evaluator. That's because the evaluator compiles the node tree + * in a manner that is specifically optimized for the size of the viewport. This should be called + * before evaluating the compositor. */ + void update_viewport_size() + { + if (last_viewport_size_ == context_.get_output_size()) { + return; + } + + last_viewport_size_ = context_.get_output_size(); + + evaluator_.reset(); + } + + /* If the compositor node tree changed, reset the evaluator. */ + void update(const Depsgraph *depsgraph) + { + if (DEG_id_type_updated(depsgraph, ID_NT)) { + evaluator_.reset(); + } + } + + /* Get a reference to the compositor node tree. */ + static bNodeTree &node_tree() + { + return *DRW_context_state_get()->scene->nodetree; + } +}; + +} // namespace blender::draw::compositor + +using namespace blender::draw::compositor; + +struct COMPOSITOR_Data { + DrawEngineType *engine_type; + DRWViewportEmptyList *fbl; + DRWViewportEmptyList *txl; + DRWViewportEmptyList *psl; + DRWViewportEmptyList *stl; + Engine *instance_data; + char info[GPU_INFO_SIZE]; +}; + +static void compositor_engine_init(void *data) +{ + COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data); + + if (!compositor_data->instance_data) { + compositor_data->instance_data = new Engine(compositor_data->info); + } +} + +static void compositor_engine_free(void *instance_data) +{ + Engine *engine = static_cast<Engine *>(instance_data); + delete engine; +} + +static void compositor_engine_draw(void *data) +{ + const COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data); + compositor_data->instance_data->draw(); +} + +static void compositor_engine_update(void *data) +{ + COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data); + + /* Clear any info message that was set in a previous update. */ + compositor_data->info[0] = '\0'; + + if (compositor_data->instance_data) { + compositor_data->instance_data->update(DRW_context_state_get()->depsgraph); + } +} + +extern "C" { + +static const DrawEngineDataSize compositor_data_size = DRW_VIEWPORT_DATA_SIZE(COMPOSITOR_Data); + +DrawEngineType draw_engine_compositor_type = { + nullptr, /* next */ + nullptr, /* prev */ + N_("Compositor"), /* idname */ + &compositor_data_size, /* vedata_size */ + &compositor_engine_init, /* engine_init */ + nullptr, /* engine_free */ + &compositor_engine_free, /* instance_free */ + nullptr, /* cache_init */ + nullptr, /* cache_populate */ + nullptr, /* cache_finish */ + &compositor_engine_draw, /* draw_scene */ + &compositor_engine_update, /* view_update */ + nullptr, /* id_update */ + nullptr, /* render_to_image */ + nullptr, /* store_metadata */ +}; +} diff --git a/source/blender/draw/engines/compositor/compositor_engine.h b/source/blender/draw/engines/compositor/compositor_engine.h new file mode 100644 index 00000000000..5de0de8a0b3 --- /dev/null +++ b/source/blender/draw/engines/compositor/compositor_engine.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +extern DrawEngineType draw_engine_compositor_type; + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/draw/engines/eevee/eevee_bloom.c b/source/blender/draw/engines/eevee/eevee_bloom.c index d12ce7213f9..4528027a9ea 100644 --- a/source/blender/draw/engines/eevee/eevee_bloom.c +++ b/source/blender/draw/engines/eevee/eevee_bloom.c @@ -125,7 +125,8 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name, struct GPUShader *sh, DRWPass **pass, bool upsample, - bool resolve) + bool resolve, + bool resolve_add_base) { struct GPUBatch *quad = DRW_cache_fullscreen_quad_get(); @@ -141,7 +142,7 @@ static DRWShadingGroup *eevee_create_bloom_pass(const char *name, } if (resolve) { DRW_shgroup_uniform_vec3(grp, "bloomColor", effects->bloom_color, 1); - DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", true); + DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", resolve_add_base); } return grp; @@ -193,18 +194,21 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved EEVEE_shaders_bloom_downsample_get(use_antiflicker), &psl->bloom_downsample_first, false, + false, false); eevee_create_bloom_pass("Bloom Downsample", effects, EEVEE_shaders_bloom_downsample_get(false), &psl->bloom_downsample, false, + false, false); eevee_create_bloom_pass("Bloom Upsample", effects, EEVEE_shaders_bloom_upsample_get(use_highres), &psl->bloom_upsample, true, + false, false); grp = eevee_create_bloom_pass("Bloom Blit", @@ -212,6 +216,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved EEVEE_shaders_bloom_blit_get(use_antiflicker), &psl->bloom_blit, false, + false, false); DRW_shgroup_uniform_vec4(grp, "curveThreshold", effects->bloom_curve_threshold, 1); DRW_shgroup_uniform_float(grp, "clampIntensity", &effects->bloom_clamp, 1); @@ -221,6 +226,7 @@ void EEVEE_bloom_cache_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *ved EEVEE_shaders_bloom_resolve_get(use_highres), &psl->bloom_resolve, true, + true, true); } } @@ -304,13 +310,13 @@ void EEVEE_bloom_output_init(EEVEE_ViewLayerData *UNUSED(sldata), {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(txl->bloom_accum)}); /* Create Pass and shgroup. */ - DRWShadingGroup *grp = eevee_create_bloom_pass("Bloom Accumulate", - effects, - EEVEE_shaders_bloom_resolve_get(use_highres), - &psl->bloom_accum_ps, - true, - true); - DRW_shgroup_uniform_bool_copy(grp, "bloomAddBase", false); + eevee_create_bloom_pass("Bloom Accumulate", + effects, + EEVEE_shaders_bloom_resolve_get(use_highres), + &psl->bloom_accum_ps, + true, + true, + false); } void EEVEE_bloom_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata) diff --git a/source/blender/draw/engines/eevee/eevee_cryptomatte.c b/source/blender/draw/engines/eevee/eevee_cryptomatte.c index 33063e14c03..d805a039e8f 100644 --- a/source/blender/draw/engines/eevee/eevee_cryptomatte.c +++ b/source/blender/draw/engines/eevee/eevee_cryptomatte.c @@ -25,7 +25,6 @@ * they take into account to create the render passes. When accurate mode is off the number of * levels is used as the number of cryptomatte samples to take. When accuracy mode is on the number * of render samples is used. - * */ #include "DRW_engine.h" @@ -94,7 +93,7 @@ BLI_INLINE int eevee_cryptomatte_pixel_stride(const ViewLayer *view_layer) /** \} */ /* -------------------------------------------------------------------- */ -/** \name Init Renderpasses +/** \name Init Render-Passes * \{ */ void EEVEE_cryptomatte_renderpasses_init(EEVEE_Data *vedata) @@ -249,7 +248,9 @@ void EEVEE_cryptomatte_object_curves_cache_populate(EEVEE_Data *vedata, { BLI_assert(ob->type == OB_CURVES); Material *material = BKE_object_material_get_eval(ob, CURVES_MATERIAL_NR); - eevee_cryptomatte_curves_cache_populate(vedata, sldata, ob, NULL, NULL, material); + DRWShadingGroup *grp = eevee_cryptomatte_shading_group_create( + vedata, sldata, ob, material, true); + DRW_shgroup_curves_create_sub(ob, grp, NULL); } void EEVEE_cryptomatte_particle_hair_cache_populate(EEVEE_Data *vedata, @@ -420,27 +421,31 @@ void EEVEE_cryptomatte_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EE void EEVEE_cryptomatte_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer) { + /* NOTE: Name channels lowercase rgba so that compression rules check in OpenEXR DWA code uses + * lossless compression. Reportedly this naming is the only one which works good from the + * interoperability point of view. Using XYZW naming is not portable. */ + char cryptomatte_pass_name[MAX_NAME]; const short num_passes = eevee_cryptomatte_passes_per_layer(view_layer); if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_OBJECT) != 0) { for (short pass = 0; pass < num_passes; pass++) { BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoObject%02d", pass); RE_engine_register_pass( - engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA); + engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA); } } if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_MATERIAL) != 0) { for (short pass = 0; pass < num_passes; pass++) { BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoMaterial%02d", pass); RE_engine_register_pass( - engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA); + engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA); } } if ((view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_ASSET) != 0) { for (short pass = 0; pass < num_passes; pass++) { BLI_snprintf_rlen(cryptomatte_pass_name, MAX_NAME, "CryptoAsset%02d", pass); RE_engine_register_pass( - engine, scene, view_layer, cryptomatte_pass_name, 4, "RGBA", SOCK_RGBA); + engine, scene, view_layer, cryptomatte_pass_name, 4, "rgba", SOCK_RGBA); } } } diff --git a/source/blender/draw/engines/eevee/eevee_engine.c b/source/blender/draw/engines/eevee/eevee_engine.c index 227757bad23..5ae4b730cfa 100644 --- a/source/blender/draw/engines/eevee/eevee_engine.c +++ b/source/blender/draw/engines/eevee/eevee_engine.c @@ -109,7 +109,7 @@ void EEVEE_cache_populate(void *vedata, Object *ob) } if (DRW_object_is_renderable(ob) && (ob_visibility & OB_VISIBLE_SELF)) { - if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) { + if (ob->type == OB_MESH) { EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow); } else if (ob->type == OB_CURVES) { @@ -312,12 +312,12 @@ static void eevee_draw_scene(void *vedata) /* Volumetrics Resolve Opaque */ EEVEE_volumes_resolve(sldata, vedata); - /* Renderpasses */ + /* Render-passes. */ EEVEE_renderpasses_output_accumulate(sldata, vedata, false); /* Transparent */ - /* TODO(fclem): should be its own Frame-buffer. - * This is needed because dualsource blending only works with 1 color buffer. */ + /* TODO(@fclem): should be its own Frame-buffer. + * This is needed because dual-source blending only works with 1 color buffer. */ GPU_framebuffer_texture_attach(fbl->main_color_fb, dtxl->depth, 0, 0); GPU_framebuffer_bind(fbl->main_color_fb); DRW_draw_pass(psl->transparent_pass); @@ -366,7 +366,7 @@ static void eevee_draw_scene(void *vedata) static void eevee_view_update(void *vedata) { EEVEE_StorageList *stl = ((EEVEE_Data *)vedata)->stl; - if (stl->g_data) { + if (stl && stl->g_data) { stl->g_data->view_updated = true; } } @@ -451,8 +451,8 @@ static void eevee_render_to_image(void *vedata, } EEVEE_PrivateData *g_data = ved->stl->g_data; - int initial_frame = CFRA; - float initial_subframe = SUBFRA; + int initial_frame = scene->r.cfra; + float initial_subframe = scene->r.subframe; float shuttertime = (do_motion_blur) ? scene->eevee.motion_blur_shutter : 0.0f; int time_steps_tot = (do_motion_blur) ? max_ii(1, scene->eevee.motion_blur_steps) : 1; g_data->render_timesteps = time_steps_tot; @@ -588,7 +588,7 @@ static void eevee_render_to_image(void *vedata, /* Restore original viewport size. */ DRW_render_viewport_size_set((int[2]){g_data->size_orig[0], g_data->size_orig[1]}); - if (CFRA != initial_frame || SUBFRA != initial_subframe) { + if (scene->r.cfra != initial_frame || scene->r.subframe != initial_subframe) { /* Restore original frame number. This is because the render pipeline expects it. */ RE_engine_frame_set(engine, initial_frame, initial_subframe); } diff --git a/source/blender/draw/engines/eevee/eevee_lightcache.c b/source/blender/draw/engines/eevee/eevee_lightcache.c index 7f722ff1764..614ea0b0892 100644 --- a/source/blender/draw/engines/eevee/eevee_lightcache.c +++ b/source/blender/draw/engines/eevee/eevee_lightcache.c @@ -849,7 +849,7 @@ static void eevee_lightbake_delete_resources(EEVEE_LightBake *lbake) DRW_opengl_context_enable(); } - /* XXX Free the resources contained in the viewlayer data + /* XXX: Free the resources contained in the view-layer data * to be able to free the context before deleting the depsgraph. */ if (lbake->sldata) { EEVEE_view_layer_data_free(lbake->sldata); diff --git a/source/blender/draw/engines/eevee/eevee_materials.c b/source/blender/draw/engines/eevee/eevee_materials.c index efd27c19654..94f29d64628 100644 --- a/source/blender/draw/engines/eevee/eevee_materials.c +++ b/source/blender/draw/engines/eevee/eevee_materials.c @@ -806,7 +806,7 @@ void EEVEE_materials_cache_populate(EEVEE_Data *vedata, !DRW_state_is_image_render(); /* First get materials for this mesh. */ - if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) { + if (ELEM(ob->type, OB_MESH, OB_SURF)) { const int materials_len = DRW_cache_object_material_count_get(ob); EeveeMaterialCache *matcache = BLI_array_alloca(matcache, materials_len); diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h index ad218d80cdf..573c29b78a1 100644 --- a/source/blender/draw/engines/eevee/eevee_private.h +++ b/source/blender/draw/engines/eevee/eevee_private.h @@ -1015,7 +1015,7 @@ typedef struct EEVEE_PrivateData { struct GHash *material_hash; float background_alpha; /* TODO: find a better place for this. */ bool disable_ligthprobes; - /* Chosen lightcache: can come from Lookdev or the viewlayer. */ + /** Chosen light-cache: can come from Lookdev or the view-layer. */ struct LightCache *light_cache; /* For planar probes */ float planar_texel_size[2]; @@ -1050,7 +1050,7 @@ typedef struct EEVEE_PrivateData { float studiolight_glossy_clamp; float studiolight_filter_quality; - /* Renderpasses */ + /* Render-passes */ /* Bitmask containing the active render_passes */ eViewLayerEEVEEPassType render_passes; uint aov_hash; @@ -1261,6 +1261,7 @@ struct GPUShader *EEVEE_shaders_volumes_scatter_sh_get(void); struct GPUShader *EEVEE_shaders_volumes_scatter_with_lights_sh_get(void); struct GPUShader *EEVEE_shaders_volumes_integration_sh_get(void); struct GPUShader *EEVEE_shaders_volumes_resolve_sh_get(bool accum); +struct GPUShader *EEVEE_shaders_volumes_resolve_comp_sh_get(bool float_target); struct GPUShader *EEVEE_shaders_volumes_accum_sh_get(void); struct GPUShader *EEVEE_shaders_ggx_lut_sh_get(void); struct GPUShader *EEVEE_shaders_ggx_refraction_lut_sh_get(void); diff --git a/source/blender/draw/engines/eevee/eevee_render.c b/source/blender/draw/engines/eevee/eevee_render.c index bef19c589c2..c3b909f5fb9 100644 --- a/source/blender/draw/engines/eevee/eevee_render.c +++ b/source/blender/draw/engines/eevee/eevee_render.c @@ -24,6 +24,7 @@ #include "DEG_depsgraph_query.h" #include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_framebuffer.h" #include "GPU_state.h" @@ -223,7 +224,7 @@ void EEVEE_render_cache(void *vedata, } if (ob_visibility & OB_VISIBLE_SELF) { - if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL)) { + if (ob->type == OB_MESH) { EEVEE_materials_cache_populate(vedata, sldata, ob, &cast_shadow); if (do_cryptomatte) { EEVEE_cryptomatte_cache_populate(data, sldata, ob); @@ -646,6 +647,10 @@ void EEVEE_render_draw(EEVEE_Data *vedata, RenderEngine *engine, RenderLayer *rl /* XXX Seems to fix TDR issue with NVidia drivers on linux. */ GPU_finish(); + /* Perform render step between samples to allow + * flushing of freed GPUBackend resources. */ + GPU_render_step(); + RE_engine_update_progress(engine, (float)(render_samples++) / (float)tot_sample); } } diff --git a/source/blender/draw/engines/eevee/eevee_sampling.c b/source/blender/draw/engines/eevee/eevee_sampling.c index a1a3e98f34f..34d3cd74b36 100644 --- a/source/blender/draw/engines/eevee/eevee_sampling.c +++ b/source/blender/draw/engines/eevee/eevee_sampling.c @@ -74,7 +74,8 @@ void EEVEE_sample_ellipse(int sample_ofs, BLI_halton_2d(ht_primes, ht_offset, sample_ofs, ht_point); - /* Decorelate AA and shadow samples. (see T68594) */ + /* Decorrelate AA and shadow samples. (see T68594) */ + ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0); ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0); @@ -97,7 +98,7 @@ void EEVEE_random_rotation_m4(int sample_ofs, float scale, float r_mat[4][4]) BLI_halton_3d(ht_primes, ht_offset, sample_ofs, ht_point); - /* Decorelate AA and shadow samples. (see T68594) */ + /* Decorrelate AA and shadow samples. (see T68594) */ ht_point[0] = fmod(ht_point[0] * 1151.0, 1.0); ht_point[1] = fmod(ht_point[1] * 1069.0, 1.0); ht_point[2] = fmod(ht_point[2] * 1151.0, 1.0); diff --git a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c index 5af794c9158..0d0e551f3dc 100644 --- a/source/blender/draw/engines/eevee/eevee_screen_raytrace.c +++ b/source/blender/draw/engines/eevee/eevee_screen_raytrace.c @@ -198,7 +198,7 @@ void EEVEE_reflection_compute(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *v if (((effects->enabled_effects & EFFECT_SSR) != 0) && stl->g_data->valid_double_buffer) { DRW_stats_group_start("SSR"); - /* Raytrace. */ + /* Ray-trace. */ GPU_framebuffer_bind(fbl->screen_tracing_fb); DRW_draw_pass(psl->ssr_raytrace); diff --git a/source/blender/draw/engines/eevee/eevee_shaders.c b/source/blender/draw/engines/eevee/eevee_shaders.c index 5709621fc05..a7290b3894e 100644 --- a/source/blender/draw/engines/eevee/eevee_shaders.c +++ b/source/blender/draw/engines/eevee/eevee_shaders.c @@ -133,6 +133,7 @@ static struct { struct GPUShader *scatter_with_lights_sh; struct GPUShader *volumetric_integration_sh; struct GPUShader *volumetric_resolve_sh[2]; + struct GPUShader *volumetric_resolve_comp_sh[2]; struct GPUShader *volumetric_accum_sh; /* Shader strings */ @@ -181,6 +182,7 @@ extern char datatoc_closure_type_lib_glsl[]; extern char datatoc_closure_eval_volume_lib_glsl[]; extern char datatoc_common_uniforms_lib_glsl[]; extern char datatoc_common_utiltex_lib_glsl[]; +extern char datatoc_cryptomatte_lib_glsl[]; extern char datatoc_cryptomatte_frag_glsl[]; extern char datatoc_cryptomatte_vert_glsl[]; extern char datatoc_cubemap_lib_glsl[]; @@ -260,6 +262,7 @@ extern char datatoc_volumetric_frag_glsl[]; extern char datatoc_volumetric_geom_glsl[]; extern char datatoc_volumetric_integration_frag_glsl[]; extern char datatoc_volumetric_lib_glsl[]; +extern char datatoc_volumetric_resolve_comp_glsl[]; extern char datatoc_volumetric_resolve_frag_glsl[]; extern char datatoc_volumetric_scatter_frag_glsl[]; extern char datatoc_volumetric_vert_glsl[]; @@ -304,6 +307,7 @@ static void eevee_shader_library_ensure(void) DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_refraction_lib); DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_surface_lib); DRW_SHADER_LIB_ADD(e_data.lib, closure_eval_volume_lib); + DRW_SHADER_LIB_ADD(e_data.lib, cryptomatte_lib); DRW_SHADER_LIB_ADD(e_data.lib, surface_vert); e_data.surface_lit_frag = DRW_shader_library_create_shader_string(e_data.lib, @@ -901,6 +905,20 @@ struct GPUShader *EEVEE_shaders_volumes_resolve_sh_get(bool accum) return e_data.volumetric_resolve_sh[index]; } +struct GPUShader *EEVEE_shaders_volumes_resolve_comp_sh_get(bool float_target) +{ + const int index = (float_target ? 1 : 0); + if (e_data.volumetric_resolve_comp_sh[index] == NULL) { + e_data.volumetric_resolve_comp_sh[index] = DRW_shader_create_compute_with_shaderlib( + datatoc_volumetric_resolve_comp_glsl, + e_data.lib, + float_target ? "#define TARGET_IMG_FLOAT\n" SHADER_DEFINES : SHADER_DEFINES, + __func__); + } + + return e_data.volumetric_resolve_comp_sh[index]; +} + struct GPUShader *EEVEE_shaders_volumes_accum_sh_get() { if (e_data.volumetric_accum_sh == NULL) { @@ -1190,8 +1208,8 @@ Material *EEVEE_material_default_diffuse_get(void) if (!e_data.diffuse_mat) { Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default diffuse"); - bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname); - ma->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname); ma->use_nodes = true; bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_DIFFUSE); @@ -1217,8 +1235,8 @@ Material *EEVEE_material_default_glossy_get(void) if (!e_data.glossy_mat) { Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default metal"); - bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname); - ma->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname); ma->use_nodes = true; bNode *bsdf = nodeAddStaticNode(NULL, ntree, SH_NODE_BSDF_GLOSSY); @@ -1246,8 +1264,8 @@ Material *EEVEE_material_default_error_get(void) if (!e_data.error_mat) { Material *ma = BKE_id_new_nomain(ID_MA, "EEVEEE default error"); - bNodeTree *ntree = ntreeAddTree(NULL, "Shader Nodetree", ntreeType_Shader->idname); - ma->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + NULL, &ma->id, "Shader Nodetree", ntreeType_Shader->idname); ma->use_nodes = true; /* Use emission and output material to be compatible with both World and Material. */ diff --git a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c index 536242f67d8..a3ab4cdb830 100644 --- a/source/blender/draw/engines/eevee/eevee_shadows_cascade.c +++ b/source/blender/draw/engines/eevee/eevee_shadows_cascade.c @@ -357,7 +357,7 @@ static void eevee_shadow_cascade_setup(EEVEE_LightsInfo *linfo, mul_m4_m4m4(csm_data->shadowmat[c], texcomat, viewprojmat); #ifdef DEBUG_CSM - DRW_debug_m4_as_bbox(viewprojmat, dbg_col, true); + DRW_debug_m4_as_bbox(viewprojmat, true, dbg_col); #endif } diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c index b8bef61f8b1..b2e5a0abe94 100644 --- a/source/blender/draw/engines/eevee/eevee_volumes.c +++ b/source/blender/draw/engines/eevee/eevee_volumes.c @@ -30,6 +30,7 @@ #include "DEG_depsgraph_query.h" #include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_material.h" #include "GPU_texture.h" #include "eevee_private.h" @@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size)); tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1); + /* Clamp 3D texture size based on device maximum. */ + int maxSize = GPU_max_texture_3d_size(); + BLI_assert(tex_size[0] <= maxSize); + tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0]; + tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1]; + tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2]; + common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]); common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]); common_data->vol_coord_scale[2] = 1.0f / viewport_size[0]; @@ -306,9 +314,14 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata, return; } + GPUShader *sh = GPU_material_get_shader(mat); + if (sh == NULL) { + return; + } + /* TODO(fclem): Reuse main shading group to avoid shading binding cost just like for surface * shaders. */ - DRWShadingGroup *grp = DRW_shgroup_material_create(mat, vedata->psl->volumetric_objects_ps); + DRWShadingGroup *grp = DRW_shgroup_create(sh, vedata->psl->volumetric_objects_ps); grp = DRW_shgroup_volume_create_sub(scene, ob, grp, mat); @@ -316,6 +329,8 @@ void EEVEE_volumes_cache_object_add(EEVEE_ViewLayerData *sldata, return; } + DRW_shgroup_add_material_resources(grp, mat); + /* TODO(fclem): remove those "unnecessary" UBOs */ DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo); DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo); @@ -381,18 +396,37 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata) grp, NULL, USE_VOLUME_OPTI ? 1 : common_data->vol_tex_size[2]); DRW_PASS_CREATE(psl->volumetric_resolve_ps, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM); - grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_sh_get(false), - psl->volumetric_resolve_ps); - DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter); - DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit); - DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src); - DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo); - DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo); - DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo); - DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined); - DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo); + if (GPU_compute_shader_support() && GPU_shader_image_load_store_support()) { + const bool use_float_target = DRW_state_is_image_render(); + grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_comp_sh_get(use_float_target), + psl->volumetric_resolve_ps); + DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter); + DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit); + DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src); + DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo); + DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo); + DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo); + DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined); + DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo); + DRW_shgroup_uniform_image_ref(grp, "target_img", &txl->color); - DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + const float *size = DRW_viewport_size_get(); + DRW_shgroup_call_compute(grp, size[0], size[1], 1); + } + else { + grp = DRW_shgroup_create(EEVEE_shaders_volumes_resolve_sh_get(false), + psl->volumetric_resolve_ps); + DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter); + DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit); + DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src); + DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo); + DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo); + DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo); + DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined); + DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo); + + DRW_shgroup_call_procedural_triangles(grp, NULL, 1); + } } } @@ -531,11 +565,16 @@ void EEVEE_volumes_resolve(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *veda } /* Apply for opaque geometry. */ - GPU_framebuffer_bind(fbl->main_color_fb); - DRW_draw_pass(psl->volumetric_resolve_ps); + if (GPU_compute_shader_support() && GPU_shader_image_load_store_support()) { + DRW_draw_pass(psl->volumetric_resolve_ps); + } + else { + GPU_framebuffer_bind(fbl->main_color_fb); + DRW_draw_pass(psl->volumetric_resolve_ps); - /* Restore. */ - GPU_framebuffer_bind(fbl->main_fb); + /* Restore. */ + GPU_framebuffer_bind(fbl->main_fb); + } } } diff --git a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl index 0f5290a7c07..ffca97b6b8f 100644 --- a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl @@ -181,6 +181,8 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection) /* Glue with the old system. */ CLOSURE_VARS_DECLARE_2(Diffuse, Glossy); + /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */ + in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0; in_Diffuse_0.N = diffuse.N; in_Diffuse_0.albedo = diffuse.color; in_Glossy_1.N = reflection.N; @@ -207,6 +209,8 @@ Closure closure_eval(ClosureDiffuse diffuse, /* Glue with the old system. */ CLOSURE_VARS_DECLARE_3(Diffuse, Glossy, Glossy); + /* WORKAROUND: This is to avoid regression in 3.2 and avoid messing with EEVEE-Next. */ + in_common.occlusion = (diffuse.sss_radius.g == -1.0) ? diffuse.sss_radius.r : 1.0; in_Diffuse_0.N = diffuse.N; in_Diffuse_0.albedo = diffuse.color; in_Glossy_1.N = reflection.N; diff --git a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl index 4070ede116b..eeccb393a5c 100644 --- a/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/closure_type_lib.glsl @@ -6,8 +6,8 @@ #ifndef VOLUMETRICS -uniform int outputSsrId; /*Default = 1;*/ -uniform int outputSssId; /*Default = 1;*/ +uniform int outputSsrId; /* Default = 1; */ +uniform int outputSssId; /* Default = 1; */ #endif diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl new file mode 100644 index 00000000000..0f8810ff7ac --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_lib.glsl @@ -0,0 +1,19 @@ +/* NOTE: this lib is included in the cryptomatte vertex shader to work around the issue that eevee + * cannot use create infos for its static shaders. Keep in sync with draw_shader_shared.h */ +#ifdef HAIR_SHADER +/* Define the maximum number of attribute we allow in a curves UBO. + * This should be kept in sync with `GPU_ATTR_MAX` */ +# define DRW_ATTRIBUTE_PER_CURVES_MAX 15 + +struct CurvesInfos { + /* Per attribute scope, follows loading order. + * NOTE: uint as bool in GLSL is 4 bytes. + * NOTE: GLSL pad arrays of scalar to 16 bytes (std140). */ + uvec4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX]; +}; +layout(std140) uniform drw_curves +{ + CurvesInfos _drw_curves; +}; +# define drw_curves (_drw_curves) +#endif diff --git a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl index f8dbc4772e9..14fbc98469a 100644 --- a/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl +++ b/source/blender/draw/engines/eevee/shaders/cryptomatte_vert.glsl @@ -3,4 +3,5 @@ #pragma BLENDER_REQUIRE(common_view_lib.glsl) #pragma BLENDER_REQUIRE(common_math_lib.glsl) #pragma BLENDER_REQUIRE(common_attribute_lib.glsl) +#pragma BLENDER_REQUIRE(cryptomatte_lib.glsl) #pragma BLENDER_REQUIRE(surface_vert.glsl) diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl index 688ae4915e1..7dec30a96b1 100644 --- a/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_resolve_frag.glsl @@ -124,7 +124,7 @@ void dof_slight_focus_gather(float radius, out vec4 out_color, out float out_wei dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion); dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion); - /* Fix weighting issues on perfectly focus > slight focus transitionning areas. */ + /* Fix weighting issues on perfectly focus > slight focus transitioning areas. */ if (abs(center_data.coc) < 0.5) { bg_col = center_data.color; bg_weight = 1.0; diff --git a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl index 06dcbeaed66..7230758a93f 100644 --- a/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/effect_dof_scatter_frag.glsl @@ -67,7 +67,7 @@ void main(void) /* Occlude the sprite with geometry from the same field * using a VSM like chebychev test (slide 85). */ float mean = occlusion_data.x; - float variance = occlusion_data.x; + float variance = occlusion_data.y; shapes *= variance * safe_rcp(variance + sqr(max(cocs * correction_fac - mean, 0.0))); } diff --git a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl index 9ecc50d9df5..c7f6687d2e2 100644 --- a/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/lightprobe_filter_diffuse_frag.glsl @@ -100,7 +100,7 @@ void main() coef = 0.315392 * (3.0 * cubevec.y * cubevec.y - 1.0) * 1.0 / 4.0; } else if (comp == 7) { - coef = 1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0; + coef = -1.092548 * cubevec.x * cubevec.y * 1.0 / 4.0; } else { /* (comp == 8) */ coef = 0.546274 * (cubevec.x * cubevec.x - cubevec.z * cubevec.z) * 1.0 / 4.0; diff --git a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl index 15c68dc5829..87e944a2ac0 100644 --- a/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/prepass_frag.glsl @@ -91,3 +91,17 @@ void main() } #endif } + +/* Passthrough. */ +float attr_load_temperature_post(float attr) +{ + return attr; +} +vec4 attr_load_color_post(vec4 attr) +{ + return attr; +} +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl index 2926f8c5a89..062a40f35c2 100644 --- a/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl +++ b/source/blender/draw/engines/eevee/shaders/shadow_vert.glsl @@ -73,7 +73,7 @@ int g_curves_attr_id = 0; int curves_attribute_element_id() { int id = hairStrandID; - if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) { + if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) { id = hair_get_base_id(); } @@ -152,3 +152,7 @@ vec4 attr_load_color_post(vec4 attr) { return attr; } +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl index ace6c7d788d..88755705a53 100644 --- a/source/blender/draw/engines/eevee/shaders/surface_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/surface_frag.glsl @@ -152,7 +152,8 @@ void main() /* Only supported attrib for world/background shaders. */ vec3 attr_load_orco(vec4 orco) { - return g_data.P; + /* Retain precision better than g_data.P (see T99128). */ + return -normal_view_to_world(viewCameraVec(viewPosition)); } /* Unsupported. */ vec4 attr_load_tangent(vec4 tangent) @@ -181,3 +182,7 @@ vec4 attr_load_color_post(vec4 attr) { return attr; } +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl index 8e1bafe8d92..69762027643 100644 --- a/source/blender/draw/engines/eevee/shaders/surface_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/surface_lib.glsl @@ -97,11 +97,12 @@ GlobalData init_globals(void) GlobalData surf; # if defined(WORLD_BACKGROUND) || defined(PROBE_CAPTURE) - surf.P = -cameraVec(worldPosition); - surf.N = surf.Ng = -surf.P; + surf.P = transform_direction(ViewMatrixInverse, -viewCameraVec(viewPosition)); + surf.N = surf.Ng = surf.Ni = -surf.P; surf.ray_length = 0.0; # else surf.P = worldPosition; + surf.Ni = worldNormal; surf.N = safe_normalize(worldNormal); surf.Ng = safe_normalize(cross(dFdx(surf.P), dFdy(surf.P))); surf.ray_length = distance(surf.P, cameraPos); @@ -109,6 +110,7 @@ GlobalData init_globals(void) surf.barycentric_coords = vec2(0.0); surf.barycentric_dists = vec3(0.0); surf.N = (FrontFacing) ? surf.N : -surf.N; + surf.Ni = (FrontFacing) ? surf.Ni : -surf.Ni; # ifdef HAIR_SHADER vec3 V = cameraVec(surf.P); /* Shade as a cylinder. */ @@ -123,7 +125,7 @@ GlobalData init_globals(void) cos_theta = hairThickTime / hairThickness; } float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta)); - surf.N = safe_normalize(worldNormal * sin_theta + B * cos_theta); + surf.N = surf.Ni = safe_normalize(worldNormal * sin_theta + B * cos_theta); surf.curve_T = -hairTangent; /* Costly, but follows cycles per pixel tangent space (not following curve shape). */ surf.curve_B = cross(V, surf.curve_T); diff --git a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl index a8e95e13b12..54aad7891dc 100644 --- a/source/blender/draw/engines/eevee/shaders/surface_vert.glsl +++ b/source/blender/draw/engines/eevee/shaders/surface_vert.glsl @@ -80,7 +80,7 @@ int g_curves_attr_id = 0; int curves_attribute_element_id() { int id = hairStrandID; - if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) { + if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) { id = hair_get_base_id(); } @@ -165,3 +165,7 @@ vec4 attr_load_color_post(vec4 attr) { return attr; } +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl index 88ade8451a4..9ed21fc0bf5 100644 --- a/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/volumetric_frag.glsl @@ -86,6 +86,8 @@ void main() discard; return; } +#else /* WORLD_SHADER */ + volumeOrco = worldPosition; #endif #ifdef CLEAR @@ -176,3 +178,7 @@ vec4 attr_load_color_post(vec4 attr) #endif return attr; } +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl new file mode 100644 index 00000000000..2b0139ff923 --- /dev/null +++ b/source/blender/draw/engines/eevee/shaders/volumetric_resolve_comp.glsl @@ -0,0 +1,38 @@ + +#pragma BLENDER_REQUIRE(volumetric_lib.glsl) + +/* Based on Frosbite Unified Volumetric. + * https://www.ea.com/frostbite/news/physically-based-unified-volumetric-rendering-in-frostbite */ + +/* Step 4 : Apply final integration on top of the scene color. */ + +uniform sampler2D inSceneDepth; + +layout(local_size_x = 1, local_size_y = 1) in; + +#ifdef TARGET_IMG_FLOAT +layout(binding = 0, rgba32f) uniform image2D target_img; +#else +layout(binding = 0, rgba16f) uniform image2D target_img; +#endif + +void main() +{ + ivec2 co = ivec2(gl_GlobalInvocationID.xy); + vec2 uvs = co / vec2(textureSize(inSceneDepth, 0)); + float scene_depth = texture(inSceneDepth, uvs).r; + + vec3 transmittance, scattering; + volumetric_resolve(uvs, scene_depth, transmittance, scattering); + + /* Approximate volume alpha by using a monochromatic transmittance + * and adding it to the scene alpha. */ + float alpha = dot(transmittance, vec3(1.0 / 3.0)); + + vec4 color0 = vec4(scattering, 1.0 - alpha); + vec4 color1 = vec4(transmittance, alpha); + + vec4 color_in = imageLoad(target_img, co); + vec4 color_out = color0 + color1 * color_in; + imageStore(target_img, co, color_out); +} diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl index b3b9c7af19c..2d51fbd9edc 100644 --- a/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl +++ b/source/blender/draw/engines/eevee/shaders/volumetric_vert.glsl @@ -87,3 +87,8 @@ vec4 attr_load_color_post(vec4 attr) { return attr; } + +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ + return attr; +} diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.cc b/source/blender/draw/engines/eevee_next/eevee_camera.cc index e6d2e2db764..b9040f0f3ab 100644 --- a/source/blender/draw/engines/eevee_next/eevee_camera.cc +++ b/source/blender/draw/engines/eevee_next/eevee_camera.cc @@ -29,10 +29,8 @@ namespace blender::eevee { void Camera::init() { const Object *camera_eval = inst_.camera_eval_object; - synced_ = false; - data_.swap(); - CameraData &data = data_.current(); + CameraData &data = data_; if (camera_eval) { const ::Camera *cam = reinterpret_cast<const ::Camera *>(camera_eval->data); @@ -77,9 +75,8 @@ void Camera::init() void Camera::sync() { const Object *camera_eval = inst_.camera_eval_object; - CameraData &data = data_.current(); - data.filter_size = inst_.scene->r.gauss; + CameraData &data = data_; if (inst_.drw_view) { DRW_view_viewmat_get(inst_.drw_view, data.viewmat.ptr(), false); @@ -127,6 +124,10 @@ void Camera::sync() data.equirect_scale *= data.uv_scale; data.equirect_scale_inv = 1.0f / data.equirect_scale; +#else + data.fisheye_fov = data.fisheye_lens = -1.0f; + data.equirect_bias = float2(0.0f); + data.equirect_scale = float2(0.0f); #endif } else if (inst_.drw_view) { @@ -137,14 +138,8 @@ void Camera::sync() data.equirect_scale = float2(0.0f); } - data_.current().push_update(); - - synced_ = true; - - /* Detect changes in parameters. */ - if (data_.current() != data_.previous()) { - // inst_.sampling.reset(); - } + data_.initialized = true; + data_.push_update(); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.hh b/source/blender/draw/engines/eevee_next/eevee_camera.hh index dfec738b1f3..49f9b14e11b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_camera.hh +++ b/source/blender/draw/engines/eevee_next/eevee_camera.hh @@ -61,8 +61,7 @@ inline bool operator==(const CameraData &a, const CameraData &b) return compare_m4m4(a.persmat.ptr(), b.persmat.ptr(), FLT_MIN) && (a.uv_scale == b.uv_scale) && (a.uv_bias == b.uv_bias) && (a.equirect_scale == b.equirect_scale) && (a.equirect_bias == b.equirect_bias) && (a.fisheye_fov == b.fisheye_fov) && - (a.fisheye_lens == b.fisheye_lens) && (a.filter_size == b.filter_size) && - (a.type == b.type); + (a.fisheye_lens == b.fisheye_lens) && (a.type == b.type); } inline bool operator!=(const CameraData &a, const CameraData &b) @@ -83,10 +82,7 @@ class Camera { private: Instance &inst_; - /** Double buffered to detect changes and have history for re-projection. */ - SwapChain<CameraDataBuf, 2> data_; - /** Detects wrong usage. */ - bool synced_ = false; + CameraDataBuf data_; public: Camera(Instance &inst) : inst_(inst){}; @@ -100,28 +96,32 @@ class Camera { **/ const CameraData &data_get() const { - BLI_assert(synced_); - return data_.current(); + BLI_assert(data_.initialized); + return data_; } const GPUUniformBuf *ubo_get() const { - return data_.current(); + return data_; } bool is_panoramic() const { - return eevee::is_panoramic(data_.current().type); + return eevee::is_panoramic(data_.type); } bool is_orthographic() const { - return data_.current().type == CAMERA_ORTHO; + return data_.type == CAMERA_ORTHO; + } + bool is_perspective() const + { + return data_.type == CAMERA_PERSP; } const float3 &position() const { - return *reinterpret_cast<const float3 *>(data_.current().viewinv[3]); + return *reinterpret_cast<const float3 *>(data_.viewinv[3]); } const float3 &forward() const { - return *reinterpret_cast<const float3 *>(data_.current().viewinv[2]); + return *reinterpret_cast<const float3 *>(data_.viewinv[2]); } }; diff --git a/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc new file mode 100644 index 00000000000..10be121f533 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.cc @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ +#include "BKE_cryptomatte.hh" + +#include "GPU_material.h" + +#include "eevee_cryptomatte.hh" +#include "eevee_instance.hh" +#include "eevee_renderbuffers.hh" + +namespace blender::eevee { + +void Cryptomatte::begin_sync() +{ + const eViewLayerEEVEEPassType enabled_passes = static_cast<eViewLayerEEVEEPassType>( + inst_.film.enabled_passes_get() & + (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET | + EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET)); + + session_.reset(); + object_layer_ = nullptr; + asset_layer_ = nullptr; + material_layer_ = nullptr; + + if (enabled_passes && !inst_.is_viewport()) { + session_.reset(BKE_cryptomatte_init_from_view_layer(inst_.view_layer)); + + for (const std::string &layer_name : + bke::cryptomatte::BKE_cryptomatte_layer_names_get(*session_)) { + StringRef layer_name_ref = layer_name; + bke::cryptomatte::CryptomatteLayer *layer = bke::cryptomatte::BKE_cryptomatte_layer_get( + *session_, layer_name); + if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_OBJECT)) { + object_layer_ = layer; + } + else if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_ASSET)) { + asset_layer_ = layer; + } + else if (layer_name_ref.endswith(RE_PASSNAME_CRYPTOMATTE_MATERIAL)) { + material_layer_ = layer; + } + } + } + + if (!(enabled_passes & + (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET))) { + cryptomatte_object_buf.resize(16); + } +} + +void Cryptomatte::sync_object(Object *ob, ResourceHandle res_handle) +{ + const eViewLayerEEVEEPassType enabled_passes = inst_.film.enabled_passes_get(); + if (!(enabled_passes & + (EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET))) { + return; + } + + uint32_t resource_id = res_handle.resource_index(); + float2 object_hashes(0.0f, 0.0f); + + if (enabled_passes & EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT) { + object_hashes[0] = register_id(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT, ob->id); + } + + if (enabled_passes & EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET) { + Object *asset = ob; + while (asset->parent) { + asset = asset->parent; + } + object_hashes[1] = register_id(EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET, asset->id); + } + cryptomatte_object_buf.get_or_resize(resource_id) = object_hashes; +} + +void Cryptomatte::sync_material(const ::Material *material) +{ + /* Material crypto hashes are generated during shader codegen stage. We only need to register + * them to store inside the metadata. */ + if (material_layer_ && material) { + material_layer_->add_ID(material->id); + } +} + +void Cryptomatte::end_sync() +{ + cryptomatte_object_buf.push_update(); + + object_layer_ = nullptr; + asset_layer_ = nullptr; + material_layer_ = nullptr; +} + +float Cryptomatte::register_id(const eViewLayerEEVEEPassType layer, const ID &id) const +{ + BLI_assert(ELEM(layer, + EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT, + EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET, + EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL)); + + uint32_t cryptomatte_hash = 0; + if (session_) { + if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT) { + BLI_assert(object_layer_); + cryptomatte_hash = object_layer_->add_ID(id); + } + else if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET) { + BLI_assert(asset_layer_); + cryptomatte_hash = asset_layer_->add_ID(id); + } + else if (layer == EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL) { + BLI_assert(material_layer_); + cryptomatte_hash = material_layer_->add_ID(id); + } + } + else { + const char *name = &id.name[2]; + const int name_len = BLI_strnlen(name, MAX_NAME - 2); + cryptomatte_hash = BKE_cryptomatte_hash(name, name_len); + } + + return BKE_cryptomatte_hash_to_float(cryptomatte_hash); +} + +void Cryptomatte::store_metadata(RenderResult *render_result) +{ + if (session_) { + BKE_cryptomatte_store_metadata(&*session_, render_result, inst_.view_layer); + } +} + +} // namespace blender::eevee
\ No newline at end of file diff --git a/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh new file mode 100644 index 00000000000..86ab3d97b4b --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_cryptomatte.hh @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup eevee + * + * Cryptomatte. + * + * During rasterization, cryptomatte hashes are stored into a single array texture. + * The film pass then resamples this texture using pixel filter weighting. + * Each cryptomatte layer can hold N samples. These are stored in sequential layers + * of the array texture. The samples are sorted and merged only for final rendering. + */ + +#pragma once + +#include "eevee_shader_shared.hh" + +#include "BKE_cryptomatte.hh" + +extern "C" { +struct Material; +struct CryptomatteSession; +} + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Cryptomatte + * \{ */ + +class Cryptomatte { + private: + class Instance &inst_; + + bke::cryptomatte::CryptomatteSessionPtr session_; + + /* Cached pointer to the cryptomatte layer instances. */ + bke::cryptomatte::CryptomatteLayer *object_layer_ = nullptr; + bke::cryptomatte::CryptomatteLayer *asset_layer_ = nullptr; + bke::cryptomatte::CryptomatteLayer *material_layer_ = nullptr; + + /** Contains per object hashes (object and asset hash). Indexed by resource ID. */ + CryptomatteObjectBuf cryptomatte_object_buf; + + public: + Cryptomatte(Instance &inst) : inst_(inst){}; + + void begin_sync(); + void sync_object(Object *ob, ResourceHandle res_handle); + void sync_material(const ::Material *material); + void end_sync(); + + template<typename T> void bind_resources(draw::detail::PassBase<T> *pass) + { + pass->bind_ssbo(CRYPTOMATTE_BUF_SLOT, &cryptomatte_object_buf); + } + + /* Register ID to use inside cryptomatte layer and returns associated hash as float. */ + float register_id(const eViewLayerEEVEEPassType layer, const ID &id) const; + void store_metadata(RenderResult *render_result); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index f75ebd2bd13..248dfae6df9 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -11,12 +11,18 @@ #pragma once -/** - * Number of items in a culling batch. Needs to be Power of 2. Must be <= to 65536. - * Current limiting factor is the sorting phase which is single pass and only sort within a - * thread-group which maximum size is 1024. - */ -#define CULLING_BATCH_SIZE 1024 +/* Hierarchical Z down-sampling. */ +#define HIZ_MIP_COUNT 8 +/* NOTE: The shader is written to update 5 mipmaps using LDS. */ +#define HIZ_GROUP_SIZE 32 + +/* Avoid too much overhead caused by resizing the light buffers too many time. */ +#define LIGHT_CHUNK 256 + +#define CULLING_SELECT_GROUP_SIZE 256 +#define CULLING_SORT_GROUP_SIZE 256 +#define CULLING_ZBIN_GROUP_SIZE 1024 +#define CULLING_TILE_GROUP_SIZE 1024 /** * IMPORTANT: Some data packing are tweaked for these values. @@ -34,12 +40,68 @@ #define SHADOW_MAX_PAGE 4096 #define SHADOW_PAGE_PER_ROW 64 -#define HIZ_MIP_COUNT 6u -/* Group size is 2x smaller because we simply copy the level 0. */ -#define HIZ_GROUP_SIZE 1u << (HIZ_MIP_COUNT - 2u) - +/* Ray-tracing. */ #define RAYTRACE_GROUP_SIZE 16 #define RAYTRACE_MAX_TILES (16384 / RAYTRACE_GROUP_SIZE) * (16384 / RAYTRACE_GROUP_SIZE) /* Minimum visibility size. */ #define LIGHTPROBE_FILTER_VIS_GROUP_SIZE 16 + +/* Film. */ +#define FILM_GROUP_SIZE 16 + +/* Motion Blur. */ +#define MOTION_BLUR_GROUP_SIZE 32 +#define MOTION_BLUR_DILATE_GROUP_SIZE 512 + +/* Depth Of Field. */ +#define DOF_TILES_SIZE 8 +#define DOF_TILES_FLATTEN_GROUP_SIZE DOF_TILES_SIZE +#define DOF_TILES_DILATE_GROUP_SIZE 8 +#define DOF_BOKEH_LUT_SIZE 32 +#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5 +#define DOF_SLIGHT_FOCUS_SAMPLE_MAX 16 +#define DOF_MIP_COUNT 4 +#define DOF_REDUCE_GROUP_SIZE (1 << (DOF_MIP_COUNT - 1)) +#define DOF_DEFAULT_GROUP_SIZE 32 +#define DOF_STABILIZE_GROUP_SIZE 16 +#define DOF_FILTER_GROUP_SIZE 8 +#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE +#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2) + +/* Resource bindings. */ + +/* Texture. */ +#define RBUFS_UTILITY_TEX_SLOT 14 + +/* Images. */ +#define RBUFS_NORMAL_SLOT 0 +#define RBUFS_LIGHT_SLOT 1 +#define RBUFS_DIFF_COLOR_SLOT 2 +#define RBUFS_SPEC_COLOR_SLOT 3 +#define RBUFS_EMISSION_SLOT 4 +#define RBUFS_AOV_COLOR_SLOT 5 +#define RBUFS_AOV_VALUE_SLOT 6 +#define RBUFS_CRYPTOMATTE_SLOT 7 + +/* Uniform Buffers. */ +/* Only during prepass. */ +#define VELOCITY_CAMERA_PREV_BUF 3 +#define VELOCITY_CAMERA_CURR_BUF 4 +#define VELOCITY_CAMERA_NEXT_BUF 5 + +/* Storage Buffers. */ +#define LIGHT_CULL_BUF_SLOT 0 +#define LIGHT_BUF_SLOT 1 +#define LIGHT_ZBIN_BUF_SLOT 2 +#define LIGHT_TILE_BUF_SLOT 3 +#define RBUFS_AOV_BUF_SLOT 5 +#define SAMPLING_BUF_SLOT 6 +#define CRYPTOMATTE_BUF_SLOT 7 + +/* Only during pre-pass. */ +#define VELOCITY_OBJ_PREV_BUF_SLOT 0 +#define VELOCITY_OBJ_NEXT_BUF_SLOT 1 +#define VELOCITY_GEO_PREV_BUF_SLOT 2 +#define VELOCITY_GEO_NEXT_BUF_SLOT 3 +#define VELOCITY_INDIRECTION_BUF_SLOT 4 diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc new file mode 100644 index 00000000000..bc0891ceb92 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc @@ -0,0 +1,761 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Depth of field post process effect. + * + * There are 2 methods to achieve this effect. + * - The first uses projection matrix offsetting and sample accumulation to give + * reference quality depth of field. But this needs many samples to hide the + * under-sampling. + * - The second one is a post-processing based one. It follows the + * implementation described in the presentation + * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie. + * There are some difference with our actual implementation that prioritize quality. + */ + +#include "DRW_render.h" + +#include "BKE_camera.h" +#include "DNA_camera_types.h" + +#include "GPU_platform.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" + +#include "eevee_camera.hh" +#include "eevee_instance.hh" +#include "eevee_sampling.hh" +#include "eevee_shader.hh" +#include "eevee_shader_shared.hh" + +#include "eevee_depth_of_field.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +void DepthOfField::init() +{ + const SceneEEVEE &sce_eevee = inst_.scene->eevee; + const Object *camera_object_eval = inst_.camera_eval_object; + const ::Camera *camera = (camera_object_eval) ? + reinterpret_cast<const ::Camera *>(camera_object_eval->data) : + nullptr; + if (camera == nullptr) { + /* Set to invalid value for update detection */ + data_.scatter_color_threshold = -1.0f; + return; + } + /* Reminder: These are parameters not interpolated by motion blur. */ + int update = 0; + int sce_flag = sce_eevee.flag; + update += assign_if_different(do_jitter_, (sce_flag & SCE_EEVEE_DOF_JITTER) != 0); + update += assign_if_different(user_overblur_, sce_eevee.bokeh_overblur / 100.0f); + update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size); + update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold); + update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max); + update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades)); + if (update > 0) { + inst_.sampling.reset(); + } +} + +void DepthOfField::sync() +{ + const Camera &camera = inst_.camera; + const Object *camera_object_eval = inst_.camera_eval_object; + const ::Camera *camera_data = (camera_object_eval) ? + reinterpret_cast<const ::Camera *>(camera_object_eval->data) : + nullptr; + + int update = 0; + + if (camera_data == nullptr || (camera_data->dof.flag & CAM_DOF_ENABLED) == 0) { + update += assign_if_different(jitter_radius_, 0.0f); + update += assign_if_different(fx_radius_, 0.0f); + if (update > 0) { + inst_.sampling.reset(); + } + return; + } + + float2 anisotropic_scale = {clamp_f(1.0f / camera_data->dof.aperture_ratio, 1e-5f, 1.0f), + clamp_f(camera_data->dof.aperture_ratio, 1e-5f, 1.0f)}; + update += assign_if_different(data_.bokeh_anisotropic_scale, anisotropic_scale); + update += assign_if_different(data_.bokeh_rotation, camera_data->dof.aperture_rotation); + update += assign_if_different(focus_distance_, + BKE_camera_object_dof_distance(camera_object_eval)); + data_.bokeh_anisotropic_scale_inv = 1.0f / data_.bokeh_anisotropic_scale; + + float fstop = max_ff(camera_data->dof.aperture_fstop, 1e-5f); + + if (update) { + inst_.sampling.reset(); + } + + float aperture = 1.0f / (2.0f * fstop); + if (camera.is_perspective()) { + aperture *= camera_data->lens * 1e-3f; + } + + if (camera.is_orthographic()) { + /* FIXME: Why is this needed? Some kind of implicit unit conversion? */ + aperture *= 0.04f; + /* Really strange behavior from Cycles but replicating. */ + focus_distance_ += camera.data_get().clip_near; + } + + if (camera.is_panoramic()) { + /* FIXME: Eyeballed. */ + aperture *= 0.185f; + } + + if (camera_data->dof.aperture_ratio < 1.0) { + /* If ratio is scaling the bokeh outwards, we scale the aperture so that + * the gather kernel size will encompass the maximum axis. */ + aperture /= max_ff(camera_data->dof.aperture_ratio, 1e-5f); + } + + float jitter_radius, fx_radius; + + /* Balance blur radius between fx dof and jitter dof. */ + if (do_jitter_ && (inst_.sampling.dof_ring_count_get() > 0) && !camera.is_panoramic() && + !inst_.is_viewport()) { + /* Compute a minimal overblur radius to fill the gaps between the samples. + * This is just the simplified form of dividing the area of the bokeh by + * the number of samples. */ + float minimal_overblur = 1.0f / sqrtf(inst_.sampling.dof_sample_count_get()); + + fx_radius = (minimal_overblur + user_overblur_) * aperture; + /* Avoid dilating the shape. Over-blur only soften. */ + jitter_radius = max_ff(0.0f, aperture - fx_radius); + } + else { + jitter_radius = 0.0f; + fx_radius = aperture; + } + + /* Disable post fx if result wouldn't be noticeable. */ + if (fx_max_coc_ <= 0.5f) { + fx_radius = 0.0f; + } + + update += assign_if_different(jitter_radius_, jitter_radius); + update += assign_if_different(fx_radius_, fx_radius); + if (update > 0) { + inst_.sampling.reset(); + } + + if (fx_radius_ == 0.0f) { + return; + } + + /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */ + int2 max_render_res = inst_.film.render_extent_get(); + int2 half_res = math::divide_ceil(max_render_res, int2(2)); + int2 reduce_size = math::ceil_to_multiple(half_res, int2(DOF_REDUCE_GROUP_SIZE)); + + data_.gather_uv_fac = 1.0f / float2(reduce_size); + + /* Now that we know the maximum render resolution of every view, using depth of field, allocate + * the reduced buffers. Color needs to be signed format here. See note in shader for + * explanation. Do not use texture pool because of needs mipmaps. */ + reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT); + reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT); + reduced_color_tx_.ensure_mip_views(); + reduced_coc_tx_.ensure_mip_views(); + + /* Resize the scatter list to contain enough entry to cover half the screen with sprites (which + * is unlikely due to local contrast test). */ + data_.scatter_max_rect = (reduced_color_tx_.pixel_count() / 4) / 2; + scatter_fg_list_buf_.resize(data_.scatter_max_rect); + scatter_bg_list_buf_.resize(data_.scatter_max_rect); + + bokeh_lut_pass_sync(); + setup_pass_sync(); + stabilize_pass_sync(); + downsample_pass_sync(); + reduce_pass_sync(); + tiles_flatten_pass_sync(); + tiles_dilate_pass_sync(); + gather_pass_sync(); + filter_pass_sync(); + scatter_pass_sync(); + hole_fill_pass_sync(); + resolve_pass_sync(); +} + +void DepthOfField::jitter_apply(float4x4 &winmat, float4x4 &viewmat) +{ + if (jitter_radius_ == 0.0f) { + return; + } + + float radius, theta; + inst_.sampling.dof_disk_sample_get(&radius, &theta); + + if (data_.bokeh_blades >= 3.0f) { + theta = circle_to_polygon_angle(data_.bokeh_blades, theta); + radius *= circle_to_polygon_radius(data_.bokeh_blades, theta); + } + radius *= jitter_radius_; + theta += data_.bokeh_rotation; + + /* Sample in View Space. */ + float2 sample = float2(radius * cosf(theta), radius * sinf(theta)); + sample *= data_.bokeh_anisotropic_scale; + /* Convert to NDC Space. */ + float3 jitter = float3(UNPACK2(sample), -focus_distance_); + float3 center = float3(0.0f, 0.0f, -focus_distance_); + mul_project_m4_v3(winmat.ptr(), jitter); + mul_project_m4_v3(winmat.ptr(), center); + + const bool is_ortho = (winmat[2][3] != -1.0f); + if (is_ortho) { + sample *= focus_distance_; + } + /* Translate origin. */ + sub_v2_v2(viewmat[3], sample); + /* Skew winmat Z axis. */ + add_v2_v2(winmat[2], center - jitter); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Passes setup. + * \{ */ + +void DepthOfField::bokeh_lut_pass_sync() +{ + const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f); + if (!has_anisotropy && (data_.bokeh_blades == 0.0)) { + /* No need for LUTs in these cases. */ + use_bokeh_lut_ = false; + return; + } + use_bokeh_lut_ = true; + + /* Precompute bokeh texture. */ + bokeh_lut_ps_.init(); + bokeh_lut_ps_.shader_set(inst_.shaders.static_shader_get(DOF_BOKEH_LUT)); + bokeh_lut_ps_.bind_ubo("dof_buf", data_); + bokeh_lut_ps_.bind_image("out_gather_lut_img", &bokeh_gather_lut_tx_); + bokeh_lut_ps_.bind_image("out_scatter_lut_img", &bokeh_scatter_lut_tx_); + bokeh_lut_ps_.bind_image("out_resolve_lut_img", &bokeh_resolve_lut_tx_); + bokeh_lut_ps_.dispatch(int3(1, 1, 1)); +} + +void DepthOfField::setup_pass_sync() +{ + RenderBuffers &render_buffers = inst_.render_buffers; + + setup_ps_.init(); + setup_ps_.shader_set(inst_.shaders.static_shader_get(DOF_SETUP)); + setup_ps_.bind_texture("color_tx", &input_color_tx_, no_filter); + setup_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter); + setup_ps_.bind_ubo("dof_buf", data_); + setup_ps_.bind_image("out_color_img", &setup_color_tx_); + setup_ps_.bind_image("out_coc_img", &setup_coc_tx_); + setup_ps_.dispatch(&dispatch_setup_size_); + setup_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::stabilize_pass_sync() +{ + RenderBuffers &render_buffers = inst_.render_buffers; + VelocityModule &velocity = inst_.velocity; + + stabilize_ps_.init(); + stabilize_ps_.shader_set(inst_.shaders.static_shader_get(DOF_STABILIZE)); + stabilize_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS])); + stabilize_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT])); + /* This is only for temporal stability. The next step is not needed. */ + stabilize_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[STEP_PREVIOUS])); + stabilize_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter); + stabilize_ps_.bind_texture("color_tx", &setup_color_tx_, no_filter); + stabilize_ps_.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter); + stabilize_ps_.bind_texture("in_history_tx", &stabilize_input_, with_filter); + stabilize_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter); + stabilize_ps_.bind_ubo("dof_buf", data_); + stabilize_ps_.push_constant("use_history", &stabilize_valid_history_, 1); + stabilize_ps_.bind_image("out_coc_img", reduced_coc_tx_.mip_view(0)); + stabilize_ps_.bind_image("out_color_img", reduced_color_tx_.mip_view(0)); + stabilize_ps_.bind_image("out_history_img", &stabilize_output_tx_); + stabilize_ps_.dispatch(&dispatch_stabilize_size_); + stabilize_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS); +} + +void DepthOfField::downsample_pass_sync() +{ + downsample_ps_.init(); + downsample_ps_.shader_set(inst_.shaders.static_shader_get(DOF_DOWNSAMPLE)); + downsample_ps_.bind_texture("color_tx", reduced_color_tx_.mip_view(0), no_filter); + downsample_ps_.bind_texture("coc_tx", reduced_coc_tx_.mip_view(0), no_filter); + downsample_ps_.bind_image("out_color_img", &downsample_tx_); + downsample_ps_.dispatch(&dispatch_downsample_size_); + downsample_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::reduce_pass_sync() +{ + reduce_ps_.init(); + reduce_ps_.shader_set(inst_.shaders.static_shader_get(DOF_REDUCE)); + reduce_ps_.bind_ubo("dof_buf", data_); + reduce_ps_.bind_texture("downsample_tx", &downsample_tx_, no_filter); + reduce_ps_.bind_ssbo("scatter_fg_list_buf", scatter_fg_list_buf_); + reduce_ps_.bind_ssbo("scatter_bg_list_buf", scatter_bg_list_buf_); + reduce_ps_.bind_ssbo("scatter_fg_indirect_buf", scatter_fg_indirect_buf_); + reduce_ps_.bind_ssbo("scatter_bg_indirect_buf", scatter_bg_indirect_buf_); + reduce_ps_.bind_image("inout_color_lod0_img", reduced_color_tx_.mip_view(0)); + reduce_ps_.bind_image("out_color_lod1_img", reduced_color_tx_.mip_view(1)); + reduce_ps_.bind_image("out_color_lod2_img", reduced_color_tx_.mip_view(2)); + reduce_ps_.bind_image("out_color_lod3_img", reduced_color_tx_.mip_view(3)); + reduce_ps_.bind_image("in_coc_lod0_img", reduced_coc_tx_.mip_view(0)); + reduce_ps_.bind_image("out_coc_lod1_img", reduced_coc_tx_.mip_view(1)); + reduce_ps_.bind_image("out_coc_lod2_img", reduced_coc_tx_.mip_view(2)); + reduce_ps_.bind_image("out_coc_lod3_img", reduced_coc_tx_.mip_view(3)); + reduce_ps_.dispatch(&dispatch_reduce_size_); + /* NOTE: Command buffer barrier is done automatically by the GPU backend. */ + reduce_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE); +} + +void DepthOfField::tiles_flatten_pass_sync() +{ + tiles_flatten_ps_.init(); + tiles_flatten_ps_.shader_set(inst_.shaders.static_shader_get(DOF_TILES_FLATTEN)); + /* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus + * flag from the setup pass. A better way would be to do the brute-force in focus gather without + * this. */ + tiles_flatten_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter); + tiles_flatten_ps_.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current()); + tiles_flatten_ps_.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current()); + tiles_flatten_ps_.dispatch(&dispatch_tiles_flatten_size_); + tiles_flatten_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); +} + +void DepthOfField::tiles_dilate_pass_sync() +{ + for (int pass = 0; pass < 2; pass++) { + PassSimple &drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_; + eShaderType sh_type = (pass == 0) ? DOF_TILES_DILATE_MINMAX : DOF_TILES_DILATE_MINABS; + drw_pass.init(); + drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type)); + drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.previous()); + drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.previous()); + drw_pass.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current()); + drw_pass.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current()); + drw_pass.push_constant("ring_count", &tiles_dilate_ring_count_, 1); + drw_pass.push_constant("ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1); + drw_pass.dispatch(&dispatch_tiles_dilate_size_); + drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); + } +} + +void DepthOfField::gather_pass_sync() +{ + for (int pass = 0; pass < 2; pass++) { + PassSimple &drw_pass = (pass == 0) ? gather_fg_ps_ : gather_bg_ps_; + SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_; + SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_; + eShaderType sh_type = (pass == 0) ? + (use_bokeh_lut_ ? DOF_GATHER_FOREGROUND_LUT : + DOF_GATHER_FOREGROUND) : + (use_bokeh_lut_ ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND); + drw_pass.init(); + inst_.sampling.bind_resources(&drw_pass); + drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type)); + drw_pass.bind_ubo("dof_buf", data_); + drw_pass.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear); + drw_pass.bind_texture("color_tx", reduced_color_tx_, gather_nearest); + drw_pass.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest); + drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current()); + drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current()); + drw_pass.bind_image("out_color_img", &color_chain.current()); + drw_pass.bind_image("out_weight_img", &weight_chain.current()); + drw_pass.bind_image("out_occlusion_img", &occlusion_tx_); + drw_pass.bind_texture("bokeh_lut_tx", &bokeh_gather_lut_tx_); + drw_pass.dispatch(&dispatch_gather_size_); + drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH); + } +} + +void DepthOfField::filter_pass_sync() +{ + for (int pass = 0; pass < 2; pass++) { + PassSimple &drw_pass = (pass == 0) ? filter_fg_ps_ : filter_bg_ps_; + SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_; + SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_; + drw_pass.init(); + drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_FILTER)); + drw_pass.bind_texture("color_tx", &color_chain.previous()); + drw_pass.bind_texture("weight_tx", &weight_chain.previous()); + drw_pass.bind_image("out_color_img", &color_chain.current()); + drw_pass.bind_image("out_weight_img", &weight_chain.current()); + drw_pass.dispatch(&dispatch_filter_size_); + drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH); + } +} + +void DepthOfField::scatter_pass_sync() +{ + for (int pass = 0; pass < 2; pass++) { + PassSimple &drw_pass = (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_; + drw_pass.init(); + drw_pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL); + drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_SCATTER)); + drw_pass.push_constant("use_bokeh_lut", use_bokeh_lut_); + drw_pass.bind_texture("bokeh_lut_tx", &bokeh_scatter_lut_tx_); + drw_pass.bind_texture("occlusion_tx", &occlusion_tx_); + if (pass == 0) { + drw_pass.bind_ssbo("scatter_list_buf", scatter_fg_list_buf_); + drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_fg_indirect_buf_); + /* Avoid background gather pass writing to the occlusion_tx mid pass. */ + drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); + } + else { + drw_pass.bind_ssbo("scatter_list_buf", scatter_bg_list_buf_); + drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_bg_indirect_buf_); + } + } +} + +void DepthOfField::hole_fill_pass_sync() +{ + hole_fill_ps_.init(); + inst_.sampling.bind_resources(&hole_fill_ps_); + hole_fill_ps_.shader_set(inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL)); + hole_fill_ps_.bind_ubo("dof_buf", data_); + hole_fill_ps_.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear); + hole_fill_ps_.bind_texture("color_tx", reduced_color_tx_, gather_nearest); + hole_fill_ps_.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest); + hole_fill_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current()); + hole_fill_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current()); + hole_fill_ps_.bind_image("out_color_img", &hole_fill_color_tx_); + hole_fill_ps_.bind_image("out_weight_img", &hole_fill_weight_tx_); + hole_fill_ps_.dispatch(&dispatch_gather_size_); + hole_fill_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::resolve_pass_sync() +{ + eGPUSamplerState with_filter = GPU_SAMPLER_FILTER; + RenderBuffers &render_buffers = inst_.render_buffers; + eShaderType sh_type = use_bokeh_lut_ ? DOF_RESOLVE_LUT : DOF_RESOLVE; + + resolve_ps_.init(); + inst_.sampling.bind_resources(&resolve_ps_); + resolve_ps_.shader_set(inst_.shaders.static_shader_get(sh_type)); + resolve_ps_.bind_ubo("dof_buf", data_); + resolve_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter); + resolve_ps_.bind_texture("color_tx", &input_color_tx_, no_filter); + resolve_ps_.bind_texture("stable_color_tx", &resolve_stable_color_tx_, no_filter); + resolve_ps_.bind_texture("color_bg_tx", &color_bg_tx_.current(), with_filter); + resolve_ps_.bind_texture("color_fg_tx", &color_fg_tx_.current(), with_filter); + resolve_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current()); + resolve_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current()); + resolve_ps_.bind_texture("weight_bg_tx", &weight_bg_tx_.current()); + resolve_ps_.bind_texture("weight_fg_tx", &weight_fg_tx_.current()); + resolve_ps_.bind_texture("color_hole_fill_tx", &hole_fill_color_tx_); + resolve_ps_.bind_texture("weight_hole_fill_tx", &hole_fill_weight_tx_); + resolve_ps_.bind_texture("bokeh_lut_tx", &bokeh_resolve_lut_tx_); + resolve_ps_.bind_image("out_color_img", &output_color_tx_); + resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); + resolve_ps_.dispatch(&dispatch_resolve_size_); + resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Post-FX Rendering. + * \{ */ + +/* Similar to Film::update_sample_table() but with constant filter radius and constant sample + * count. */ +void DepthOfField::update_sample_table() +{ + float2 subpixel_offset = inst_.film.pixel_jitter_get(); + /* Since the film jitter is in full-screen res, divide by 2 to get the jitter in half res. */ + subpixel_offset *= 0.5; + + /* Same offsets as in dof_spatial_filtering(). */ + const std::array<int2, 4> plus_offsets = {int2(-1, 0), int2(0, -1), int2(1, 0), int2(0, 1)}; + + const float radius = 1.5f; + int i = 0; + for (int2 offset : plus_offsets) { + float2 pixel_ofs = float2(offset) - subpixel_offset; + data_.filter_samples_weight[i++] = film_filter_weight(radius, math::length_squared(pixel_ofs)); + } + data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset)); +} + +void DepthOfField::render(View &view, + GPUTexture **input_tx, + GPUTexture **output_tx, + DepthOfFieldBuffer &dof_buffer) +{ + if (fx_radius_ == 0.0f) { + return; + } + + input_color_tx_ = *input_tx; + output_color_tx_ = *output_tx; + extent_ = {GPU_texture_width(input_color_tx_), GPU_texture_height(input_color_tx_)}; + + { + const CameraData &cam_data = inst_.camera.data_get(); + data_.camera_type = cam_data.type; + /* OPTI(fclem) Could be optimized. */ + float3 jitter = float3(fx_radius_, 0.0f, -focus_distance_); + float3 center = float3(0.0f, 0.0f, -focus_distance_); + mul_project_m4_v3(cam_data.winmat.ptr(), jitter); + mul_project_m4_v3(cam_data.winmat.ptr(), center); + /* Simplify CoC calculation to a simple MADD. */ + if (inst_.camera.is_orthographic()) { + data_.coc_mul = (center[0] - jitter[0]) * 0.5f * extent_[0]; + data_.coc_bias = focus_distance_ * data_.coc_mul; + } + else { + data_.coc_bias = -(center[0] - jitter[0]) * 0.5f * extent_[0]; + data_.coc_mul = focus_distance_ * data_.coc_bias; + } + + float min_fg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_near); + float max_bg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_far); + if (data_.camera_type != CAMERA_ORTHO) { + /* Background is at infinity so maximum CoC is the limit of coc_radius_from_camera_depth + * at -inf. We only do this for perspective camera since orthographic coc limit is inf. */ + max_bg_coc = data_.coc_bias; + } + /* Clamp with user defined max. */ + data_.coc_abs_max = min_ff(max_ff(fabsf(min_fg_coc), fabsf(max_bg_coc)), fx_max_coc_); + /* TODO(fclem): Make this dependent of the quality of the gather pass. */ + data_.scatter_coc_threshold = 4.0f; + + update_sample_table(); + + data_.push_update(); + } + + int2 half_res = math::divide_ceil(extent_, int2(2)); + int2 quarter_res = math::divide_ceil(extent_, int2(4)); + int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE)); + + dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1); + dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_STABILIZE_GROUP_SIZE)), 1); + dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)), + 1); + dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1); + dispatch_tiles_flatten_size_ = int3(math::divide_ceil(half_res, int2(DOF_TILES_SIZE)), 1); + dispatch_tiles_dilate_size_ = int3( + math::divide_ceil(tile_res, int2(DOF_TILES_DILATE_GROUP_SIZE)), 1); + dispatch_gather_size_ = int3(math::divide_ceil(half_res, int2(DOF_GATHER_GROUP_SIZE)), 1); + dispatch_filter_size_ = int3(math::divide_ceil(half_res, int2(DOF_FILTER_GROUP_SIZE)), 1); + dispatch_resolve_size_ = int3(math::divide_ceil(extent_, int2(DOF_RESOLVE_GROUP_SIZE)), 1); + + if (GPU_type_matches_ex(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + /* On Mesa, there is a sync bug which can make a portion of the main pass (usually one shader) + * leave blocks of un-initialized memory. Doing a flush seems to alleviate the issue. */ + GPU_flush(); + } + + DRW_stats_group_start("Depth of Field"); + + Manager &drw = *inst_.manager; + + { + DRW_stats_group_start("Setup"); + { + bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F); + bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F); + bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F); + + if (use_bokeh_lut_) { + drw.submit(bokeh_lut_ps_, view); + } + } + { + setup_color_tx_.acquire(half_res, GPU_RGBA16F); + setup_coc_tx_.acquire(half_res, GPU_R16F); + + drw.submit(setup_ps_, view); + } + { + stabilize_output_tx_.acquire(half_res, GPU_RGBA16F); + stabilize_valid_history_ = !dof_buffer.stabilize_history_tx_.ensure_2d(GPU_RGBA16F, + half_res); + + if (stabilize_valid_history_ == false) { + /* Avoid uninitialized memory that can contain NaNs. */ + dof_buffer.stabilize_history_tx_.clear(float4(0.0f)); + } + + stabilize_input_ = dof_buffer.stabilize_history_tx_; + /* Outputs to reduced_*_tx_ mip 0. */ + drw.submit(stabilize_ps_, view); + + /* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse + * the one that we just consumed. */ + TextureFromPool::swap(stabilize_output_tx_, dof_buffer.stabilize_history_tx_); + + /* Used by stabilize pass. */ + stabilize_output_tx_.release(); + setup_color_tx_.release(); + } + { + DRW_stats_group_start("Tile Prepare"); + + /* WARNING: If format changes, make sure dof_tile_* GLSL constants are properly encoded. */ + tiles_fg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F); + tiles_bg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F); + tiles_fg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F); + tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F); + + drw.submit(tiles_flatten_ps_, view); + + /* Used by tile_flatten and stabilize_ps pass. */ + setup_coc_tx_.release(); + + /* Error introduced by gather center jittering. */ + const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f); + int dilation_end_radius = ceilf((fx_max_coc_ * error_multiplier) / (DOF_TILES_SIZE * 2)); + + /* Run dilation twice. One for minmax and one for minabs. */ + for (int pass = 0; pass < 2; pass++) { + /* This algorithm produce the exact dilation radius by dividing it in multiple passes. */ + int dilation_radius = 0; + while (dilation_radius < dilation_end_radius) { + int remainder = dilation_end_radius - dilation_radius; + /* Do not step over any unvisited tile. */ + int max_multiplier = dilation_radius + 1; + + int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier)); + int multiplier = min_ii(max_multiplier, floorf(remainder / (float)ring_count)); + + dilation_radius += ring_count * multiplier; + + tiles_dilate_ring_count_ = ring_count; + tiles_dilate_ring_width_mul_ = multiplier; + + tiles_fg_tx_.swap(); + tiles_bg_tx_.swap(); + + drw.submit((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_, view); + } + } + + tiles_fg_tx_.previous().release(); + tiles_bg_tx_.previous().release(); + + DRW_stats_group_end(); + } + + downsample_tx_.acquire(quarter_res, GPU_RGBA16F); + + drw.submit(downsample_ps_, view); + + scatter_fg_indirect_buf_.clear_to_zero(); + scatter_bg_indirect_buf_.clear_to_zero(); + + drw.submit(reduce_ps_, view); + + /* Used by reduce pass. */ + downsample_tx_.release(); + + DRW_stats_group_end(); + } + + for (int is_background = 0; is_background < 2; is_background++) { + DRW_stats_group_start(is_background ? "Background Convolution" : "Foreground Convolution"); + + SwapChain<TextureFromPool, 2> &color_tx = is_background ? color_bg_tx_ : color_fg_tx_; + SwapChain<TextureFromPool, 2> &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_; + Framebuffer &scatter_fb = is_background ? scatter_bg_fb_ : scatter_fg_fb_; + PassSimple &gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_; + PassSimple &filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_; + PassSimple &scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_; + + color_tx.current().acquire(half_res, GPU_RGBA16F); + weight_tx.current().acquire(half_res, GPU_R16F); + occlusion_tx_.acquire(half_res, GPU_RG16F); + + drw.submit(gather_ps, view); + + { + /* Filtering pass. */ + color_tx.swap(); + weight_tx.swap(); + + color_tx.current().acquire(half_res, GPU_RGBA16F); + weight_tx.current().acquire(half_res, GPU_R16F); + + drw.submit(filter_ps, view); + + color_tx.previous().release(); + weight_tx.previous().release(); + } + + GPU_memory_barrier(GPU_BARRIER_FRAMEBUFFER); + + scatter_fb.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current())); + + GPU_framebuffer_bind(scatter_fb); + drw.submit(scatter_ps, view); + + /* Used by scatter pass. */ + occlusion_tx_.release(); + + DRW_stats_group_end(); + } + { + DRW_stats_group_start("Hole Fill"); + + bokeh_gather_lut_tx_.release(); + bokeh_scatter_lut_tx_.release(); + + hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F); + hole_fill_weight_tx_.acquire(half_res, GPU_R16F); + + drw.submit(hole_fill_ps_, view); + + /* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */ + + DRW_stats_group_end(); + } + { + DRW_stats_group_start("Resolve"); + + resolve_stable_color_tx_ = dof_buffer.stabilize_history_tx_; + + drw.submit(resolve_ps_, view); + + color_bg_tx_.current().release(); + color_fg_tx_.current().release(); + weight_bg_tx_.current().release(); + weight_fg_tx_.current().release(); + tiles_fg_tx_.current().release(); + tiles_bg_tx_.current().release(); + hole_fill_color_tx_.release(); + hole_fill_weight_tx_.release(); + bokeh_resolve_lut_tx_.release(); + + DRW_stats_group_end(); + } + + DRW_stats_group_end(); + + /* Swap buffers so that next effect has the right input. */ + SWAP(GPUTexture *, *input_tx, *output_tx); +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh new file mode 100644 index 00000000000..bac0e394d66 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Depth of field post process effect. + * + * There are 2 methods to achieve this effect. + * - The first uses projection matrix offsetting and sample accumulation to give + * reference quality depth of field. But this needs many samples to hide the + * under-sampling. + * - The second one is a post-processing based one. It follows the + * implementation described in the presentation + * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie. + * There are some difference with our actual implementation that prioritize quality. + */ + +#pragma once + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +struct DepthOfFieldBuffer { + /** + * Per view history texture for stabilize pass. + * Swapped with stabilize_output_tx_ in order to reuse the previous history during DoF + * processing. + * Note this should be private as its inner working only concerns the Depth Of Field + * implementation. The view itself should not touch it. + */ + Texture stabilize_history_tx_ = {"dof_taa"}; +}; + +class DepthOfField { + private: + class Instance &inst_; + + /** Samplers */ + static constexpr eGPUSamplerState gather_bilinear = GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER; + static constexpr eGPUSamplerState gather_nearest = GPU_SAMPLER_MIPMAP; + + /** Input/Output texture references. */ + GPUTexture *input_color_tx_ = nullptr; + GPUTexture *output_color_tx_ = nullptr; + + /** Bokeh LUT precompute pass. */ + TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"}; + TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"}; + TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"}; + PassSimple bokeh_lut_ps_ = {"BokehLut"}; + + /** Outputs half-resolution color and Circle Of Confusion. */ + TextureFromPool setup_coc_tx_ = {"dof_setup_coc"}; + TextureFromPool setup_color_tx_ = {"dof_setup_color"}; + int3 dispatch_setup_size_ = int3(-1); + PassSimple setup_ps_ = {"Setup"}; + + /** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */ + Texture reduced_coc_tx_ = {"dof_reduced_coc"}; + Texture reduced_color_tx_ = {"dof_reduced_color"}; + + /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */ + TextureFromPool stabilize_output_tx_ = {"dof_taa"}; + GPUTexture *stabilize_input_ = nullptr; + bool1 stabilize_valid_history_ = false; + int3 dispatch_stabilize_size_ = int3(-1); + PassSimple stabilize_ps_ = {"Stabilize"}; + + /** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */ + TextureFromPool downsample_tx_ = {"dof_downsample"}; + int3 dispatch_downsample_size_ = int3(-1); + PassSimple downsample_ps_ = {"Downsample"}; + + /** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */ + DepthOfFieldScatterListBuf scatter_fg_list_buf_; + DepthOfFieldScatterListBuf scatter_bg_list_buf_; + DrawIndirectBuf scatter_fg_indirect_buf_; + DrawIndirectBuf scatter_bg_indirect_buf_; + int3 dispatch_reduce_size_ = int3(-1); + PassSimple reduce_ps_ = {"Reduce"}; + + /** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */ + SwapChain<TextureFromPool, 2> tiles_fg_tx_; + SwapChain<TextureFromPool, 2> tiles_bg_tx_; + int3 dispatch_tiles_flatten_size_ = int3(-1); + PassSimple tiles_flatten_ps_ = {"TilesFlatten"}; + + /** Dilates the min & max CoCs to cover maximum COC values. */ + int tiles_dilate_ring_count_ = -1; + int tiles_dilate_ring_width_mul_ = -1; + int3 dispatch_tiles_dilate_size_ = int3(-1); + PassSimple tiles_dilate_minmax_ps_ = {"TilesDilateMinmax"}; + PassSimple tiles_dilate_minabs_ps_ = {"TilesDilateMinabs"}; + + /** Gather convolution for low intensity pixels and low contrast areas. */ + SwapChain<TextureFromPool, 2> color_bg_tx_; + SwapChain<TextureFromPool, 2> color_fg_tx_; + SwapChain<TextureFromPool, 2> weight_bg_tx_; + SwapChain<TextureFromPool, 2> weight_fg_tx_; + TextureFromPool occlusion_tx_ = {"dof_occlusion"}; + int3 dispatch_gather_size_ = int3(-1); + PassSimple gather_fg_ps_ = {"GatherFg"}; + PassSimple gather_bg_ps_ = {"GatherBg"}; + + /** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */ + TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"}; + TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"}; + PassSimple hole_fill_ps_ = {"HoleFill"}; + + /** Small Filter pass to reduce noise out of gather passes. */ + int3 dispatch_filter_size_ = int3(-1); + PassSimple filter_fg_ps_ = {"FilterFg"}; + PassSimple filter_bg_ps_ = {"FilterBg"}; + + /** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */ + Framebuffer scatter_fg_fb_ = {"dof_scatter_fg"}; + Framebuffer scatter_bg_fb_ = {"dof_scatter_bg"}; + PassSimple scatter_fg_ps_ = {"ScatterFg"}; + PassSimple scatter_bg_ps_ = {"ScatterBg"}; + + /** Recombine the results and also perform a slight out of focus gather. */ + GPUTexture *resolve_stable_color_tx_ = nullptr; + int3 dispatch_resolve_size_ = int3(-1); + PassSimple resolve_ps_ = {"Resolve"}; + + DepthOfFieldDataBuf data_; + + /** Scene settings that are immutable. */ + float user_overblur_; + float fx_max_coc_; + /** Use jittered depth of field where we randomize camera location. */ + bool do_jitter_; + /** Enable bokeh lookup texture. */ + bool use_bokeh_lut_; + + /** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */ + float fx_radius_; + /** Circle of Confusion radius for jittered DoF. Is in view X direction in [0..1] range. */ + float jitter_radius_; + /** Focus distance in view space. */ + float focus_distance_; + /** Extent of the input buffer. */ + int2 extent_; + + public: + DepthOfField(Instance &inst) : inst_(inst){}; + ~DepthOfField(){}; + + void init(); + + void sync(); + + /** + * Apply Depth Of Field jittering to the view and projection matrices.. + */ + void jitter_apply(float4x4 &winmat, float4x4 &viewmat); + + /** + * Will swap input and output texture if rendering happens. The actual output of this function + * is in input_tx. + */ + void render(View &view, + GPUTexture **input_tx, + GPUTexture **output_tx, + DepthOfFieldBuffer &dof_buffer); + + bool postfx_enabled() const + { + return fx_radius_ > 0.0f; + } + + private: + void bokeh_lut_pass_sync(); + void setup_pass_sync(); + void stabilize_pass_sync(); + void downsample_pass_sync(); + void reduce_pass_sync(); + void tiles_flatten_pass_sync(); + void tiles_dilate_pass_sync(); + void gather_pass_sync(); + void filter_pass_sync(); + void scatter_pass_sync(); + void hole_fill_pass_sync(); + void resolve_pass_sync(); + + void update_sample_table(); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_engine.cc b/source/blender/draw/engines/eevee_next/eevee_engine.cc index be0adfad568..5ef198838c9 100644 --- a/source/blender/draw/engines/eevee_next/eevee_engine.cc +++ b/source/blender/draw/engines/eevee_next/eevee_engine.cc @@ -12,6 +12,8 @@ #include "DRW_render.h" +#include "RE_pipeline.h" + #include "eevee_engine.h" /* Own include. */ #include "eevee_instance.hh" @@ -97,6 +99,8 @@ static void eevee_draw_scene(void *vedata) DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); ved->instance->draw_viewport(dfbl); STRNCPY(ved->info, ved->instance->info.c_str()); + /* Reset view for other following engines. */ + DRW_view_set_active(nullptr); } static void eevee_cache_init(void *vedata) @@ -136,7 +140,7 @@ static void eevee_instance_free(void *instance) delete reinterpret_cast<eevee::Instance *>(instance); } -static void eevee_render_to_image(void *UNUSED(vedata), +static void eevee_render_to_image(void *vedata, struct RenderEngine *engine, struct RenderLayer *layer, const struct rcti *UNUSED(rect)) @@ -144,7 +148,39 @@ static void eevee_render_to_image(void *UNUSED(vedata), if (!GPU_shader_storage_buffer_objects_support()) { return; } - UNUSED_VARS(engine, layer); + + eevee::Instance *instance = new eevee::Instance(); + + Render *render = engine->re; + Depsgraph *depsgraph = DRW_context_state_get()->depsgraph; + Object *camera_original_ob = RE_GetCamera(engine->re); + const char *viewname = RE_GetActiveRenderView(engine->re); + int size[2] = {engine->resolution_x, engine->resolution_y}; + + rctf view_rect; + rcti rect; + RE_GetViewPlane(render, &view_rect, &rect); + + instance->init(size, &rect, engine, depsgraph, nullptr, camera_original_ob, layer); + instance->render_frame(layer, viewname); + + EEVEE_Data *ved = static_cast<EEVEE_Data *>(vedata); + if (ved->instance) { + delete ved->instance; + } + ved->instance = instance; +} + +static void eevee_store_metadata(void *vedata, struct RenderResult *render_result) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + EEVEE_Data *ved = static_cast<EEVEE_Data *>(vedata); + eevee::Instance *instance = ved->instance; + instance->store_metadata(render_result); + delete instance; + ved->instance = nullptr; } static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer) @@ -152,7 +188,7 @@ static void eevee_render_update_passes(RenderEngine *engine, Scene *scene, ViewL if (!GPU_shader_storage_buffer_objects_support()) { return; } - UNUSED_VARS(engine, scene, view_layer); + eevee::Instance::update_passes(engine, scene, view_layer); } static const DrawEngineDataSize eevee_data_size = DRW_VIEWPORT_DATA_SIZE(EEVEE_Data); @@ -174,7 +210,7 @@ DrawEngineType draw_engine_eevee_next_type = { nullptr, nullptr, &eevee_render_to_image, - nullptr, + &eevee_store_metadata, }; RenderEngineType DRW_engine_viewport_eevee_next_type = { diff --git a/source/blender/draw/engines/eevee_next/eevee_film.cc b/source/blender/draw/engines/eevee_next/eevee_film.cc new file mode 100644 index 00000000000..b89746d99e2 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_film.cc @@ -0,0 +1,727 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * A film is a fullscreen buffer (usually at output extent) + * that will be able to accumulate sample in any distorted camera_type + * using a pixel filter. + * + * Input needs to be jittered so that the filter converges to the right result. + */ + +#include "BLI_hash.h" +#include "BLI_rect.h" + +#include "GPU_framebuffer.h" +#include "GPU_texture.h" + +#include "DRW_render.h" +#include "RE_pipeline.h" + +#include "eevee_film.hh" +#include "eevee_instance.hh" + +namespace blender::eevee { + +ENUM_OPERATORS(eViewLayerEEVEEPassType, 1 << EEVEE_RENDER_PASS_MAX_BIT) + +/* -------------------------------------------------------------------- */ +/** \name Arbitrary Output Variables + * \{ */ + +void Film::init_aovs() +{ + Vector<ViewLayerAOV *> aovs; + + aovs_info.display_id = -1; + aovs_info.display_is_value = false; + aovs_info.value_len = aovs_info.color_len = 0; + + if (inst_.is_viewport()) { + /* Viewport case. */ + if (inst_.v3d->shading.render_pass == EEVEE_RENDER_PASS_AOV) { + /* AOV display, request only a single AOV. */ + ViewLayerAOV *aov = (ViewLayerAOV *)BLI_findstring( + &inst_.view_layer->aovs, inst_.v3d->shading.aov_name, offsetof(ViewLayerAOV, name)); + + if (aov == nullptr) { + /* AOV not found in view layer. */ + return; + } + + aovs.append(aov); + aovs_info.display_id = 0; + aovs_info.display_is_value = (aov->type == AOV_TYPE_VALUE); + } + else { + /* TODO(fclem): The realtime compositor could ask for several AOVs. */ + } + } + else { + /* Render case. */ + LISTBASE_FOREACH (ViewLayerAOV *, aov, &inst_.view_layer->aovs) { + aovs.append(aov); + } + } + + if (aovs.size() > AOV_MAX) { + inst_.info = "Error: Too many AOVs"; + return; + } + + for (ViewLayerAOV *aov : aovs) { + bool is_value = (aov->type == AOV_TYPE_VALUE); + uint &index = is_value ? aovs_info.value_len : aovs_info.color_len; + uint &hash = is_value ? aovs_info.hash_value[index] : aovs_info.hash_color[index]; + hash = BLI_hash_string(aov->name); + index++; + } +} + +float *Film::read_aov(ViewLayerAOV *aov) +{ + bool is_value = (aov->type == AOV_TYPE_VALUE); + Texture &accum_tx = is_value ? value_accum_tx_ : color_accum_tx_; + + Span<uint> aovs_hash(is_value ? aovs_info.hash_value : aovs_info.hash_color, + is_value ? aovs_info.value_len : aovs_info.color_len); + /* Find AOV index. */ + uint hash = BLI_hash_string(aov->name); + int aov_index = -1; + int i = 0; + for (uint candidate_hash : aovs_hash) { + if (candidate_hash == hash) { + aov_index = i; + break; + } + i++; + } + + accum_tx.ensure_layer_views(); + + int index = aov_index + (is_value ? data_.aov_value_id : data_.aov_color_id); + GPUTexture *pass_tx = accum_tx.layer_view(index); + + return (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Mist Pass + * \{ */ + +void Film::sync_mist() +{ + const CameraData &cam = inst_.camera.data_get(); + const ::World *world = inst_.scene->world; + float mist_start = world ? world->miststa : cam.clip_near; + float mist_distance = world ? world->mistdist : fabsf(cam.clip_far - cam.clip_near); + int mist_type = world ? world->mistype : (int)WO_MIST_LINEAR; + + switch (mist_type) { + case WO_MIST_QUADRATIC: + data_.mist_exponent = 2.0f; + break; + case WO_MIST_LINEAR: + data_.mist_exponent = 1.0f; + break; + case WO_MIST_INVERSE_QUADRATIC: + data_.mist_exponent = 0.5f; + break; + } + + data_.mist_scale = 1.0 / mist_distance; + data_.mist_bias = -mist_start / mist_distance; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name FilmData + * \{ */ + +inline bool operator==(const FilmData &a, const FilmData &b) +{ + return (a.extent == b.extent) && (a.offset == b.offset) && + (a.filter_radius == b.filter_radius) && (a.scaling_factor == b.scaling_factor) && + (a.background_opacity == b.background_opacity); +} + +inline bool operator!=(const FilmData &a, const FilmData &b) +{ + return !(a == b); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Film + * \{ */ + +static eViewLayerEEVEEPassType enabled_passes(const ViewLayer *view_layer) +{ + eViewLayerEEVEEPassType result = eViewLayerEEVEEPassType(view_layer->eevee.render_passes); + +#define ENABLE_FROM_LEGACY(name_legacy, name_eevee) \ + SET_FLAG_FROM_TEST(result, \ + (view_layer->passflag & SCE_PASS_##name_legacy) != 0, \ + EEVEE_RENDER_PASS_##name_eevee); + + ENABLE_FROM_LEGACY(COMBINED, COMBINED) + ENABLE_FROM_LEGACY(Z, Z) + ENABLE_FROM_LEGACY(MIST, MIST) + ENABLE_FROM_LEGACY(NORMAL, NORMAL) + ENABLE_FROM_LEGACY(SHADOW, SHADOW) + ENABLE_FROM_LEGACY(AO, AO) + ENABLE_FROM_LEGACY(EMIT, EMIT) + ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT) + ENABLE_FROM_LEGACY(DIFFUSE_COLOR, DIFFUSE_COLOR) + ENABLE_FROM_LEGACY(GLOSSY_COLOR, SPECULAR_COLOR) + ENABLE_FROM_LEGACY(DIFFUSE_DIRECT, DIFFUSE_LIGHT) + ENABLE_FROM_LEGACY(GLOSSY_DIRECT, SPECULAR_LIGHT) + ENABLE_FROM_LEGACY(ENVIRONMENT, ENVIRONMENT) + ENABLE_FROM_LEGACY(VECTOR, VECTOR) + +#undef ENABLE_FROM_LEGACY + + SET_FLAG_FROM_TEST(result, + view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_OBJECT, + EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT); + SET_FLAG_FROM_TEST(result, + view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_ASSET, + EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET); + SET_FLAG_FROM_TEST(result, + view_layer->cryptomatte_flag & VIEW_LAYER_CRYPTOMATTE_MATERIAL, + EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL); + + return result; +} + +void Film::init(const int2 &extent, const rcti *output_rect) +{ + Sampling &sampling = inst_.sampling; + Scene &scene = *inst_.scene; + SceneEEVEE &scene_eevee = scene.eevee; + + init_aovs(); + + { + /* Enable passes that need to be rendered. */ + eViewLayerEEVEEPassType render_passes = eViewLayerEEVEEPassType(0); + + if (inst_.is_viewport()) { + /* Viewport Case. */ + render_passes = eViewLayerEEVEEPassType(inst_.v3d->shading.render_pass); + + if (inst_.overlays_enabled() || inst_.gpencil_engine_enabled) { + /* Overlays and Grease Pencil needs the depth for correct compositing. + * Using the render pass ensure we store the center depth. */ + render_passes |= EEVEE_RENDER_PASS_Z; + } + } + else { + /* Render Case. */ + render_passes = enabled_passes(inst_.view_layer); + } + + /* Filter obsolete passes. */ + render_passes &= ~(EEVEE_RENDER_PASS_UNUSED_8 | EEVEE_RENDER_PASS_BLOOM); + + if (scene_eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) { + /* Disable motion vector pass if motion blur is enabled. */ + render_passes &= ~EEVEE_RENDER_PASS_VECTOR; + } + + /* TODO(@fclem): Can't we rely on depsgraph update notification? */ + if (assign_if_different(enabled_passes_, render_passes)) { + sampling.reset(); + } + } + { + rcti fallback_rect; + if (BLI_rcti_is_empty(output_rect)) { + BLI_rcti_init(&fallback_rect, 0, extent[0], 0, extent[1]); + output_rect = &fallback_rect; + } + + FilmData data = data_; + data.extent = int2(BLI_rcti_size_x(output_rect), BLI_rcti_size_y(output_rect)); + data.offset = int2(output_rect->xmin, output_rect->ymin); + data.extent_inv = 1.0f / float2(data.extent); + /* Disable filtering if sample count is 1. */ + data.filter_radius = (sampling.sample_count() == 1) ? 0.0f : + clamp_f(scene.r.gauss, 0.0f, 100.0f); + /* TODO(fclem): parameter hidden in experimental. + * We need to figure out LOD bias first in order to preserve texture crispiness. */ + data.scaling_factor = 1; + data.cryptomatte_samples_len = inst_.view_layer->cryptomatte_levels; + + data.background_opacity = (scene.r.alphamode == R_ALPHAPREMUL) ? 0.0f : 1.0f; + if (inst_.is_viewport() && false /* TODO(fclem): StudioLight */) { + data.background_opacity = inst_.v3d->shading.studiolight_background; + } + + FilmData &data_prev_ = data_; + if (assign_if_different(data_prev_, data)) { + sampling.reset(); + } + + const eViewLayerEEVEEPassType data_passes = EEVEE_RENDER_PASS_Z | EEVEE_RENDER_PASS_NORMAL | + EEVEE_RENDER_PASS_VECTOR; + const eViewLayerEEVEEPassType color_passes_1 = EEVEE_RENDER_PASS_DIFFUSE_LIGHT | + EEVEE_RENDER_PASS_SPECULAR_LIGHT | + EEVEE_RENDER_PASS_VOLUME_LIGHT | + EEVEE_RENDER_PASS_EMIT; + const eViewLayerEEVEEPassType color_passes_2 = EEVEE_RENDER_PASS_DIFFUSE_COLOR | + EEVEE_RENDER_PASS_SPECULAR_COLOR | + EEVEE_RENDER_PASS_ENVIRONMENT | + EEVEE_RENDER_PASS_MIST | + EEVEE_RENDER_PASS_SHADOW | EEVEE_RENDER_PASS_AO; + + data_.exposure_scale = pow2f(scene.view_settings.exposure); + data_.has_data = (enabled_passes_ & data_passes) != 0; + data_.any_render_pass_1 = (enabled_passes_ & color_passes_1) != 0; + data_.any_render_pass_2 = (enabled_passes_ & color_passes_2) != 0; + } + { + /* Set pass offsets. */ + + data_.display_id = aovs_info.display_id; + data_.display_storage_type = aovs_info.display_is_value ? PASS_STORAGE_VALUE : + PASS_STORAGE_COLOR; + + /* Combined is in a separate buffer. */ + data_.combined_id = (enabled_passes_ & EEVEE_RENDER_PASS_COMBINED) ? 0 : -1; + /* Depth is in a separate buffer. */ + data_.depth_id = (enabled_passes_ & EEVEE_RENDER_PASS_Z) ? 0 : -1; + + data_.color_len = 0; + data_.value_len = 0; + + auto pass_index_get = [&](eViewLayerEEVEEPassType pass_type) { + ePassStorageType storage_type = pass_storage_type(pass_type); + int index = (enabled_passes_ & pass_type) ? + (storage_type == PASS_STORAGE_VALUE ? data_.value_len : data_.color_len)++ : + -1; + if (inst_.is_viewport() && inst_.v3d->shading.render_pass == pass_type) { + data_.display_id = index; + data_.display_storage_type = storage_type; + } + return index; + }; + + data_.mist_id = pass_index_get(EEVEE_RENDER_PASS_MIST); + data_.normal_id = pass_index_get(EEVEE_RENDER_PASS_NORMAL); + data_.vector_id = pass_index_get(EEVEE_RENDER_PASS_VECTOR); + data_.diffuse_light_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_LIGHT); + data_.diffuse_color_id = pass_index_get(EEVEE_RENDER_PASS_DIFFUSE_COLOR); + data_.specular_light_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_LIGHT); + data_.specular_color_id = pass_index_get(EEVEE_RENDER_PASS_SPECULAR_COLOR); + data_.volume_light_id = pass_index_get(EEVEE_RENDER_PASS_VOLUME_LIGHT); + data_.emission_id = pass_index_get(EEVEE_RENDER_PASS_EMIT); + data_.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT); + data_.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW); + data_.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO); + + data_.aov_color_id = data_.color_len; + data_.aov_value_id = data_.value_len; + + data_.aov_color_len = aovs_info.color_len; + data_.aov_value_len = aovs_info.value_len; + + data_.color_len += data_.aov_color_len; + data_.value_len += data_.aov_value_len; + + int cryptomatte_id = 0; + auto cryptomatte_index_get = [&](eViewLayerEEVEEPassType pass_type) { + int index = -1; + if (enabled_passes_ & pass_type) { + index = cryptomatte_id; + cryptomatte_id += data_.cryptomatte_samples_len / 2; + + if (inst_.is_viewport() && inst_.v3d->shading.render_pass == pass_type) { + data_.display_id = index; + data_.display_storage_type = PASS_STORAGE_CRYPTOMATTE; + } + } + return index; + }; + data_.cryptomatte_object_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT); + data_.cryptomatte_asset_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET); + data_.cryptomatte_material_id = cryptomatte_index_get(EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL); + } + { + /* TODO(@fclem): Over-scans. */ + + data_.render_extent = math::divide_ceil(extent, int2(data_.scaling_factor)); + int2 weight_extent = inst_.camera.is_panoramic() ? data_.extent : int2(data_.scaling_factor); + + eGPUTextureFormat color_format = GPU_RGBA16F; + eGPUTextureFormat float_format = GPU_R16F; + eGPUTextureFormat weight_format = GPU_R32F; + eGPUTextureFormat depth_format = GPU_R32F; + eGPUTextureFormat cryptomatte_format = GPU_RGBA32F; + + int reset = 0; + reset += depth_tx_.ensure_2d(depth_format, data_.extent); + reset += combined_tx_.current().ensure_2d(color_format, data_.extent); + reset += combined_tx_.next().ensure_2d(color_format, data_.extent); + /* Two layers, one for nearest sample weight and one for weight accumulation. */ + reset += weight_tx_.current().ensure_2d_array(weight_format, weight_extent, 2); + reset += weight_tx_.next().ensure_2d_array(weight_format, weight_extent, 2); + reset += color_accum_tx_.ensure_2d_array(color_format, + (data_.color_len > 0) ? data_.extent : int2(1), + (data_.color_len > 0) ? data_.color_len : 1); + reset += value_accum_tx_.ensure_2d_array(float_format, + (data_.value_len > 0) ? data_.extent : int2(1), + (data_.value_len > 0) ? data_.value_len : 1); + /* Divided by two as two cryptomatte samples fit in pixel (RG, BA). */ + int cryptomatte_array_len = cryptomatte_layer_len_get() * data_.cryptomatte_samples_len / 2; + reset += cryptomatte_tx_.ensure_2d_array(cryptomatte_format, + (cryptomatte_array_len > 0) ? data_.extent : int2(1), + (cryptomatte_array_len > 0) ? cryptomatte_array_len : + 1); + + if (reset > 0) { + sampling.reset(); + data_.use_history = 0; + data_.use_reprojection = 0; + + /* Avoid NaN in uninitialized texture memory making history blending dangerous. */ + color_accum_tx_.clear(float4(0.0f)); + value_accum_tx_.clear(float4(0.0f)); + combined_tx_.current().clear(float4(0.0f)); + weight_tx_.current().clear(float4(0.0f)); + depth_tx_.clear(float4(0.0f)); + cryptomatte_tx_.clear(float4(0.0f)); + } + } + + force_disable_reprojection_ = (scene_eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0; +} + +void Film::sync() +{ + /* We use a fragment shader for viewport because we need to output the depth. */ + bool use_compute = (inst_.is_viewport() == false); + + eShaderType shader = use_compute ? FILM_COMP : FILM_FRAG; + + /* TODO(fclem): Shader variation for panoramic & scaled resolution. */ + + RenderBuffers &rbuffers = inst_.render_buffers; + VelocityModule &velocity = inst_.velocity; + + eGPUSamplerState filter = GPU_SAMPLER_FILTER; + + /* For viewport, only previous motion is supported. + * Still bind previous step to avoid undefined behavior. */ + eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT; + + accumulate_ps_.init(); + accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS); + accumulate_ps_.shader_set(inst_.shaders.static_shader_get(shader)); + accumulate_ps_.bind_ubo("film_buf", &data_); + accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS])); + accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT])); + accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next])); + accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx); + accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_); + accumulate_ps_.bind_texture("normal_tx", &rbuffers.normal_tx); + accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx); + accumulate_ps_.bind_texture("light_tx", &rbuffers.light_tx); + accumulate_ps_.bind_texture("diffuse_color_tx", &rbuffers.diffuse_color_tx); + accumulate_ps_.bind_texture("specular_color_tx", &rbuffers.specular_color_tx); + accumulate_ps_.bind_texture("volume_light_tx", &rbuffers.volume_light_tx); + accumulate_ps_.bind_texture("emission_tx", &rbuffers.emission_tx); + accumulate_ps_.bind_texture("environment_tx", &rbuffers.environment_tx); + accumulate_ps_.bind_texture("shadow_tx", &rbuffers.shadow_tx); + accumulate_ps_.bind_texture("ambient_occlusion_tx", &rbuffers.ambient_occlusion_tx); + accumulate_ps_.bind_texture("aov_color_tx", &rbuffers.aov_color_tx); + accumulate_ps_.bind_texture("aov_value_tx", &rbuffers.aov_value_tx); + accumulate_ps_.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx); + /* NOTE(@fclem): 16 is the max number of sampled texture in many implementations. + * If we need more, we need to pack more of the similar passes in the same textures as arrays or + * use image binding instead. */ + accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current()); + accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next()); + accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter); + accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next()); + accumulate_ps_.bind_image("depth_img", &depth_tx_); + accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_); + accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_); + accumulate_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_); + /* Sync with rendering passes. */ + accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS); + if (use_compute) { + accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1)); + } + else { + accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3); + } + + const int cryptomatte_layer_count = cryptomatte_layer_len_get(); + const bool is_cryptomatte_pass_enabled = cryptomatte_layer_count > 0; + const bool do_cryptomatte_sorting = inst_.is_viewport() == false; + cryptomatte_post_ps_.init(); + if (is_cryptomatte_pass_enabled && do_cryptomatte_sorting) { + cryptomatte_post_ps_.state_set(DRW_STATE_NO_DRAW); + cryptomatte_post_ps_.shader_set(inst_.shaders.static_shader_get(FILM_CRYPTOMATTE_POST)); + cryptomatte_post_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_); + cryptomatte_post_ps_.bind_image("weight_img", &weight_tx_.current()); + cryptomatte_post_ps_.push_constant("cryptomatte_layer_len", cryptomatte_layer_count); + cryptomatte_post_ps_.push_constant("cryptomatte_samples_per_layer", + inst_.view_layer->cryptomatte_levels); + int2 dispatch_size = math::divide_ceil(int2(cryptomatte_tx_.size()), int2(FILM_GROUP_SIZE)); + cryptomatte_post_ps_.dispatch(int3(UNPACK2(dispatch_size), 1)); + } +} + +void Film::end_sync() +{ + data_.use_reprojection = inst_.sampling.interactive_mode(); + + /* Just bypass the reprojection and reset the accumulation. */ + if (force_disable_reprojection_ && inst_.sampling.is_reset()) { + data_.use_reprojection = false; + data_.use_history = false; + } + + aovs_info.push_update(); + + sync_mist(); +} + +float2 Film::pixel_jitter_get() const +{ + float2 jitter = inst_.sampling.rng_2d_get(SAMPLING_FILTER_U); + + if (!use_box_filter && data_.filter_radius < M_SQRT1_2 && !inst_.camera.is_panoramic()) { + /* For filter size less than a pixel, change sampling strategy and use a uniform disk + * distribution covering the filter shape. This avoids putting samples in areas without any + * weights. */ + /* TODO(fclem): Importance sampling could be a better option here. */ + jitter = Sampling::sample_disk(jitter) * data_.filter_radius; + } + else { + /* Jitter the size of a whole pixel. [-0.5..0.5] */ + jitter -= 0.5f; + } + /* TODO(fclem): Mixed-resolution rendering: We need to offset to each of the target pixel covered + * by a render pixel, ideally, by choosing one randomly using another sampling dimension, or by + * repeating the same sample RNG sequence for each pixel offset. */ + return jitter; +} + +eViewLayerEEVEEPassType Film::enabled_passes_get() const +{ + if (inst_.is_viewport() && data_.use_reprojection) { + /* Enable motion vector rendering but not the accumulation buffer. */ + return enabled_passes_ | EEVEE_RENDER_PASS_VECTOR; + } + return enabled_passes_; +} + +int Film::cryptomatte_layer_len_get() const +{ + int result = 0; + result += data_.cryptomatte_object_id == -1 ? 0 : 1; + result += data_.cryptomatte_asset_id == -1 ? 0 : 1; + result += data_.cryptomatte_material_id == -1 ? 0 : 1; + return result; +} + +int Film::cryptomatte_layer_max_get() const +{ + if (data_.cryptomatte_material_id != -1) { + return 3; + } + if (data_.cryptomatte_asset_id != -1) { + return 2; + } + if (data_.cryptomatte_object_id != -1) { + return 1; + } + return 0; +} + +void Film::update_sample_table() +{ + data_.subpixel_offset = pixel_jitter_get(); + + int filter_radius_ceil = ceilf(data_.filter_radius); + float filter_radius_sqr = square_f(data_.filter_radius); + + data_.samples_len = 0; + if (use_box_filter || data_.filter_radius < 0.01f) { + /* Disable gather filtering. */ + data_.samples[0].texel = int2(0, 0); + data_.samples[0].weight = 1.0f; + data_.samples_weight_total = 1.0f; + data_.samples_len = 1; + } + /* NOTE: Threshold determined by hand until we don't hit the assert below. */ + else if (data_.filter_radius < 2.20f) { + /* Small filter Size. */ + int closest_index = 0; + float closest_distance = FLT_MAX; + data_.samples_weight_total = 0.0f; + /* TODO(fclem): For optimization, could try Z-tile ordering. */ + for (int y = -filter_radius_ceil; y <= filter_radius_ceil; y++) { + for (int x = -filter_radius_ceil; x <= filter_radius_ceil; x++) { + float2 pixel_offset = float2(x, y) - data_.subpixel_offset; + float distance_sqr = math::length_squared(pixel_offset); + if (distance_sqr < filter_radius_sqr) { + if (data_.samples_len >= FILM_PRECOMP_SAMPLE_MAX) { + BLI_assert_msg(0, "Precomputed sample table is too small."); + break; + } + FilmSample &sample = data_.samples[data_.samples_len]; + sample.texel = int2(x, y); + sample.weight = film_filter_weight(data_.filter_radius, distance_sqr); + data_.samples_weight_total += sample.weight; + + if (distance_sqr < closest_distance) { + closest_distance = distance_sqr; + closest_index = data_.samples_len; + } + data_.samples_len++; + } + } + } + /* Put the closest one in first position. */ + if (closest_index != 0) { + SWAP(FilmSample, data_.samples[closest_index], data_.samples[0]); + } + } + else { + /* Large Filter Size. */ + MutableSpan<FilmSample> sample_table(data_.samples, FILM_PRECOMP_SAMPLE_MAX); + /* To avoid hitting driver TDR and slowing rendering too much we use random sampling. */ + /* TODO(fclem): This case needs more work. We could distribute the samples better to avoid + * loading the same pixel twice. */ + data_.samples_len = sample_table.size(); + data_.samples_weight_total = 0.0f; + + int i = 0; + for (FilmSample &sample : sample_table) { + /* TODO(fclem): Own RNG. */ + float2 random_2d = inst_.sampling.rng_2d_get(SAMPLING_SSS_U); + /* This randomization makes sure we converge to the right result but also makes nearest + * neighbor filtering not converging rapidly. */ + random_2d.x = (random_2d.x + i) / float(FILM_PRECOMP_SAMPLE_MAX); + + float2 pixel_offset = math::floor(Sampling::sample_spiral(random_2d) * data_.filter_radius); + sample.texel = int2(pixel_offset); + + float distance_sqr = math::length_squared(pixel_offset - data_.subpixel_offset); + sample.weight = film_filter_weight(data_.filter_radius, distance_sqr); + data_.samples_weight_total += sample.weight; + i++; + } + } +} + +void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx) +{ + if (inst_.is_viewport()) { + DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); + DefaultTextureList *dtxl = DRW_viewport_texture_list_get(); + GPU_framebuffer_bind(dfbl->default_fb); + /* Clear when using render borders. */ + if (data_.extent != int2(GPU_texture_width(dtxl->color), GPU_texture_height(dtxl->color))) { + float4 clear_color = {0.0f, 0.0f, 0.0f, 0.0f}; + GPU_framebuffer_clear_color(dfbl->default_fb, clear_color); + } + GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent)); + } + + update_sample_table(); + + combined_final_tx_ = combined_final_tx; + + data_.display_only = false; + data_.push_update(); + + draw::View drw_view("MainView", view); + + DRW_manager_get()->submit(accumulate_ps_, drw_view); + + combined_tx_.swap(); + weight_tx_.swap(); + + /* Use history after first sample. */ + if (data_.use_history == 0) { + data_.use_history = 1; + } +} + +void Film::display() +{ + BLI_assert(inst_.is_viewport()); + + /* Acquire dummy render buffers for correct binding. They will not be used. */ + inst_.render_buffers.acquire(int2(1)); + + DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); + GPU_framebuffer_bind(dfbl->default_fb); + GPU_framebuffer_viewport_set(dfbl->default_fb, UNPACK2(data_.offset), UNPACK2(data_.extent)); + + combined_final_tx_ = inst_.render_buffers.combined_tx; + + data_.display_only = true; + data_.push_update(); + + draw::View drw_view("MainView", DRW_view_default_get()); + + DRW_manager_get()->submit(accumulate_ps_, drw_view); + + inst_.render_buffers.release(); + + /* IMPORTANT: Do not swap! No accumulation has happened. */ +} + +void Film::cryptomatte_sort() +{ + DRW_manager_get()->submit(cryptomatte_post_ps_); +} + +float *Film::read_pass(eViewLayerEEVEEPassType pass_type, int layer_offset) +{ + ePassStorageType storage_type = pass_storage_type(pass_type); + const bool is_value = storage_type == PASS_STORAGE_VALUE; + const bool is_cryptomatte = storage_type == PASS_STORAGE_CRYPTOMATTE; + + Texture &accum_tx = (pass_type == EEVEE_RENDER_PASS_COMBINED) ? + combined_tx_.current() : + (pass_type == EEVEE_RENDER_PASS_Z) ? + depth_tx_ : + (is_cryptomatte ? cryptomatte_tx_ : + (is_value ? value_accum_tx_ : color_accum_tx_)); + + accum_tx.ensure_layer_views(); + + int index = pass_id_get(pass_type); + GPUTexture *pass_tx = accum_tx.layer_view(index + layer_offset); + + GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE); + + float *result = (float *)GPU_texture_read(pass_tx, GPU_DATA_FLOAT, 0); + + if (pass_is_float3(pass_type)) { + /* Convert result in place as we cannot do this conversion on GPU. */ + for (auto px : IndexRange(accum_tx.width() * accum_tx.height())) { + *(reinterpret_cast<float3 *>(result) + px) = *(reinterpret_cast<float3 *>(result + px * 4)); + } + } + + return result; +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_film.hh b/source/blender/draw/engines/eevee_next/eevee_film.hh new file mode 100644 index 00000000000..5478c20aff2 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_film.hh @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The film class handles accumulation of samples with any distorted camera_type + * using a pixel filter. Inputs needs to be jittered so that the filter converges to the right + * result. + * + * In viewport, we switch between 2 accumulation mode depending on the scene state. + * - For static scene, we use a classic weighted accumulation. + * - For dynamic scene (if an update is detected), we use a more temporally stable accumulation + * following the Temporal Anti-Aliasing method (a.k.a. Temporal Super-Sampling). This does + * history reprojection and rectification to avoid most of the flickering. + */ + +#pragma once + +#include "DRW_render.h" + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Film + * \{ */ + +class Film { + public: + /** Stores indirection table of AOVs based on their name hash and their type. */ + AOVsInfoDataBuf aovs_info; + /** For debugging purpose but could be a user option in the future. */ + static constexpr bool use_box_filter = false; + + private: + Instance &inst_; + + /** Incoming combined buffer with post FX applied (motion blur + depth of field). */ + GPUTexture *combined_final_tx_ = nullptr; + + /** + * Main accumulation textures containing every render-pass except depth, cryptomatte and + * combined. + */ + Texture color_accum_tx_; + Texture value_accum_tx_; + /** Depth accumulation texture. Separated because using a different format. */ + Texture depth_tx_; + /** Cryptomatte texture. Separated because it requires full floats. */ + Texture cryptomatte_tx_; + /** Combined "Color" buffer. Double buffered to allow re-projection. */ + SwapChain<Texture, 2> combined_tx_; + /** Weight buffers. Double buffered to allow updating it during accumulation. */ + SwapChain<Texture, 2> weight_tx_; + /** User setting to disable reprojection. Useful for debugging or have a more precise render. */ + bool force_disable_reprojection_ = false; + + PassSimple accumulate_ps_ = {"Film.Accumulate"}; + PassSimple cryptomatte_post_ps_ = {"Film.Cryptomatte.Post"}; + + FilmDataBuf data_; + + eViewLayerEEVEEPassType enabled_passes_ = eViewLayerEEVEEPassType(0); + + public: + Film(Instance &inst) : inst_(inst){}; + ~Film(){}; + + void init(const int2 &full_extent, const rcti *output_rect); + + void sync(); + void end_sync(); + + /** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */ + void accumulate(const DRWView *view, GPUTexture *combined_final_tx); + + /** Sort and normalize cryptomatte samples. */ + void cryptomatte_sort(); + + /** Blit to display. No rendered sample needed. */ + void display(); + + float *read_pass(eViewLayerEEVEEPassType pass_type, int layer_offset); + float *read_aov(ViewLayerAOV *aov); + + /** Returns shading views internal resolution. */ + int2 render_extent_get() const + { + return data_.render_extent; + } + + float2 pixel_jitter_get() const; + + float background_opacity_get() const + { + return data_.background_opacity; + } + + eViewLayerEEVEEPassType enabled_passes_get() const; + int cryptomatte_layer_max_get() const; + int cryptomatte_layer_len_get() const; + + static ePassStorageType pass_storage_type(eViewLayerEEVEEPassType pass_type) + { + switch (pass_type) { + case EEVEE_RENDER_PASS_Z: + case EEVEE_RENDER_PASS_MIST: + case EEVEE_RENDER_PASS_SHADOW: + case EEVEE_RENDER_PASS_AO: + return PASS_STORAGE_VALUE; + case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT: + case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET: + case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL: + return PASS_STORAGE_CRYPTOMATTE; + default: + return PASS_STORAGE_COLOR; + } + } + + static bool pass_is_float3(eViewLayerEEVEEPassType pass_type) + { + switch (pass_type) { + case EEVEE_RENDER_PASS_NORMAL: + case EEVEE_RENDER_PASS_DIFFUSE_LIGHT: + case EEVEE_RENDER_PASS_DIFFUSE_COLOR: + case EEVEE_RENDER_PASS_SPECULAR_LIGHT: + case EEVEE_RENDER_PASS_SPECULAR_COLOR: + case EEVEE_RENDER_PASS_VOLUME_LIGHT: + case EEVEE_RENDER_PASS_EMIT: + case EEVEE_RENDER_PASS_ENVIRONMENT: + return true; + default: + return false; + } + } + + /* Returns layer offset in the accumulation texture. -1 if the pass is not enabled. */ + int pass_id_get(eViewLayerEEVEEPassType pass_type) const + { + switch (pass_type) { + case EEVEE_RENDER_PASS_COMBINED: + return data_.combined_id; + case EEVEE_RENDER_PASS_Z: + return data_.depth_id; + case EEVEE_RENDER_PASS_MIST: + return data_.mist_id; + case EEVEE_RENDER_PASS_NORMAL: + return data_.normal_id; + case EEVEE_RENDER_PASS_DIFFUSE_LIGHT: + return data_.diffuse_light_id; + case EEVEE_RENDER_PASS_DIFFUSE_COLOR: + return data_.diffuse_color_id; + case EEVEE_RENDER_PASS_SPECULAR_LIGHT: + return data_.specular_light_id; + case EEVEE_RENDER_PASS_SPECULAR_COLOR: + return data_.specular_color_id; + case EEVEE_RENDER_PASS_VOLUME_LIGHT: + return data_.volume_light_id; + case EEVEE_RENDER_PASS_EMIT: + return data_.emission_id; + case EEVEE_RENDER_PASS_ENVIRONMENT: + return data_.environment_id; + case EEVEE_RENDER_PASS_SHADOW: + return data_.shadow_id; + case EEVEE_RENDER_PASS_AO: + return data_.ambient_occlusion_id; + case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT: + return data_.cryptomatte_object_id; + case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET: + return data_.cryptomatte_asset_id; + case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL: + return data_.cryptomatte_material_id; + case EEVEE_RENDER_PASS_VECTOR: + return data_.vector_id; + default: + return -1; + } + } + + static const Vector<std::string> pass_to_render_pass_names(eViewLayerEEVEEPassType pass_type, + const ViewLayer *view_layer) + { + Vector<std::string> result; + + auto build_cryptomatte_passes = [&](const char *pass_name) { + const int num_cryptomatte_passes = (view_layer->cryptomatte_levels + 1) / 2; + for (int pass = 0; pass < num_cryptomatte_passes; pass++) { + std::stringstream ss; + ss.fill('0'); + ss << pass_name; + ss.width(2); + ss << pass; + result.append(ss.str()); + } + }; + + switch (pass_type) { + case EEVEE_RENDER_PASS_COMBINED: + result.append(RE_PASSNAME_COMBINED); + break; + case EEVEE_RENDER_PASS_Z: + result.append(RE_PASSNAME_Z); + break; + case EEVEE_RENDER_PASS_MIST: + result.append(RE_PASSNAME_MIST); + break; + case EEVEE_RENDER_PASS_NORMAL: + result.append(RE_PASSNAME_NORMAL); + break; + case EEVEE_RENDER_PASS_DIFFUSE_LIGHT: + result.append(RE_PASSNAME_DIFFUSE_DIRECT); + break; + case EEVEE_RENDER_PASS_DIFFUSE_COLOR: + result.append(RE_PASSNAME_DIFFUSE_COLOR); + break; + case EEVEE_RENDER_PASS_SPECULAR_LIGHT: + result.append(RE_PASSNAME_GLOSSY_DIRECT); + break; + case EEVEE_RENDER_PASS_SPECULAR_COLOR: + result.append(RE_PASSNAME_GLOSSY_COLOR); + break; + case EEVEE_RENDER_PASS_VOLUME_LIGHT: + result.append(RE_PASSNAME_VOLUME_LIGHT); + break; + case EEVEE_RENDER_PASS_EMIT: + result.append(RE_PASSNAME_EMIT); + break; + case EEVEE_RENDER_PASS_ENVIRONMENT: + result.append(RE_PASSNAME_ENVIRONMENT); + break; + case EEVEE_RENDER_PASS_SHADOW: + result.append(RE_PASSNAME_SHADOW); + break; + case EEVEE_RENDER_PASS_AO: + result.append(RE_PASSNAME_AO); + break; + case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT: + build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_OBJECT); + break; + case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET: + build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_ASSET); + break; + case EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL: + build_cryptomatte_passes(RE_PASSNAME_CRYPTOMATTE_MATERIAL); + break; + case EEVEE_RENDER_PASS_VECTOR: + result.append(RE_PASSNAME_VECTOR); + break; + default: + BLI_assert(0); + break; + } + return result; + } + + private: + void init_aovs(); + void sync_mist(); + + /** + * Precompute sample weights if they are uniform across the whole film extent. + */ + void update_sample_table(); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc new file mode 100644 index 00000000000..cf9049da514 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +#include "BKE_global.h" + +#include "eevee_instance.hh" + +#include "eevee_hizbuffer.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z buffer + * + * \{ */ + +void HiZBuffer::sync() +{ + RenderBuffers &render_buffers = inst_.render_buffers; + + int2 render_extent = inst_.film.render_extent_get(); + /* Padding to avoid complexity during down-sampling and screen tracing. */ + int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1))); + int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE)); + + hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT); + hiz_tx_.ensure_mip_views(); + GPU_texture_mipmap_mode(hiz_tx_, true, false); + + data_.uv_scale = float2(render_extent) / float2(hiz_extent); + data_.push_update(); + + { + hiz_update_ps_.init(); + hiz_update_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE)); + hiz_update_ps_.bind_ssbo("finished_tile_counter", atomic_tile_counter_); + hiz_update_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, with_filter); + hiz_update_ps_.bind_image("out_mip_0", hiz_tx_.mip_view(0)); + hiz_update_ps_.bind_image("out_mip_1", hiz_tx_.mip_view(1)); + hiz_update_ps_.bind_image("out_mip_2", hiz_tx_.mip_view(2)); + hiz_update_ps_.bind_image("out_mip_3", hiz_tx_.mip_view(3)); + hiz_update_ps_.bind_image("out_mip_4", hiz_tx_.mip_view(4)); + hiz_update_ps_.bind_image("out_mip_5", hiz_tx_.mip_view(5)); + hiz_update_ps_.bind_image("out_mip_6", hiz_tx_.mip_view(6)); + hiz_update_ps_.bind_image("out_mip_7", hiz_tx_.mip_view(7)); + /* TODO(@fclem): There might be occasions where we might not want to + * copy mip 0 for performance reasons if there is no need for it. */ + hiz_update_ps_.push_constant("update_mip_0", true); + hiz_update_ps_.dispatch(int3(dispatch_size, 1)); + hiz_update_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH); + } + + if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) { + debug_draw_ps_.init(); + debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM); + debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_DEBUG)); + this->bind_resources(&debug_draw_ps_); + debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3); + } +} + +void HiZBuffer::update() +{ + if (!is_dirty_) { + return; + } + + /* Bind another framebuffer in order to avoid triggering the feedback loop check. + * This is safe because we only use compute shaders in this section of the code. + * Ideally the check should be smarter. */ + GPUFrameBuffer *fb = GPU_framebuffer_active_get(); + if (G.debug & G_DEBUG_GPU) { + GPU_framebuffer_restore(); + } + + inst_.manager->submit(hiz_update_ps_); + + if (G.debug & G_DEBUG_GPU) { + GPU_framebuffer_bind(fb); + } +} + +void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb) +{ + if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) { + inst_.info = + "Debug Mode: HiZ Validation\n" + " - Red: pixel in front of HiZ tile value.\n" + " - Blue: No error."; + inst_.hiz_buffer.update(); + GPU_framebuffer_bind(view_fb); + inst_.manager->submit(debug_draw_ps_, view); + } +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh new file mode 100644 index 00000000000..8b8e4de55b1 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.hh @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The Hierarchical-Z buffer is texture containing a copy of the depth buffer with mipmaps. + * Each mip contains the maximum depth of each 4 pixels on the upper level. + * The size of the texture is padded to avoid messing with the mipmap pixels alignments. + */ + +#pragma once + +#include "DRW_render.h" + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z buffer + * \{ */ + +class HiZBuffer { + private: + Instance &inst_; + + /** The texture containing the hiz mip chain. */ + Texture hiz_tx_ = {"hiz_tx_"}; + /** + * Atomic counter counting the number of tile that have finished down-sampling. + * The last one will process the last few mip level. + */ + draw::StorageBuffer<uint4, true> atomic_tile_counter_ = {"atomic_tile_counter"}; + /** Single pass recursive downsample. */ + PassSimple hiz_update_ps_ = {"HizUpdate"}; + /** Debug pass. */ + PassSimple debug_draw_ps_ = {"HizUpdate.Debug"}; + /** Dirty flag to check if the update is necessary. */ + bool is_dirty_ = true; + + HiZDataBuf data_; + + public: + HiZBuffer(Instance &inst) : inst_(inst) + { + atomic_tile_counter_.clear_to_zero(); + }; + + void sync(); + + /** + * Tag the buffer for update if needed. + */ + void set_dirty() + { + is_dirty_ = true; + } + + /** + * Update the content of the HiZ buffer with the depth render target. + * Noop if the buffer has not been tagged as dirty. + * Should be called before each passes that needs to read the hiz buffer. + */ + void update(); + + void debug_draw(View &view, GPUFrameBuffer *view_fb); + + void bind_resources(DRWShadingGroup *grp) + { + DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_); + DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_); + } + + /* TODO(fclem): Hardcoded bind slots. */ + template<typename T> void bind_resources(draw::detail::PassBase<T> *pass) + { + pass->bind_texture("hiz_tx", &hiz_tx_); + pass->bind_ubo("hiz_buf", &data_); + } +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index 606630bcdef..8005b27c30e 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -17,6 +17,7 @@ #include "DNA_ID.h" #include "DNA_lightprobe_types.h" #include "DNA_modifier_types.h" +#include "RE_pipeline.h" #include "eevee_instance.hh" @@ -43,7 +44,7 @@ void Instance::init(const int2 &output_res, const View3D *v3d_, const RegionView3D *rv3d_) { - UNUSED_VARS(light_probe_, output_rect); + UNUSED_VARS(light_probe_); render = render_; depsgraph = depsgraph_; camera_orig_object = camera_object_; @@ -51,12 +52,23 @@ void Instance::init(const int2 &output_res, drw_view = drw_view_; v3d = v3d_; rv3d = rv3d_; + manager = DRW_manager_get(); + + if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) { + sampling.reset(); + } info = ""; update_eval_members(); - main_view.init(output_res); + sampling.init(scene); + camera.init(); + film.init(output_res, output_rect); + velocity.init(); + depth_of_field.init(); + motion_blur.init(); + main_view.init(); } void Instance::set_time(float time) @@ -88,16 +100,24 @@ void Instance::update_eval_members() void Instance::begin_sync() { materials.begin_sync(); - velocity.begin_sync(); + velocity.begin_sync(); /* NOTE: Also syncs camera. */ + lights.begin_sync(); + cryptomatte.begin_sync(); + gpencil_engine_enabled = false; + + depth_of_field.sync(); + motion_blur.sync(); + hiz_buffer.sync(); pipelines.sync(); main_view.sync(); world.sync(); + film.sync(); } void Instance::object_sync(Object *ob) { - const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH); + const bool is_renderable_type = ELEM(ob->type, OB_CURVES, OB_GPENCIL, OB_MESH, OB_LAMP); const int ob_visibility = DRW_object_visibility_in_active_context(ob); const bool partsys_is_visible = (ob_visibility & OB_VISIBLE_PARTICLES) != 0 && (ob->type == OB_MESH); @@ -108,12 +128,16 @@ void Instance::object_sync(Object *ob) return; } + /* TODO cleanup. */ + ObjectRef ob_ref = DRW_object_ref_get(ob); + ResourceHandle res_handle = manager->resource_handle(ob_ref); + ObjectHandle &ob_handle = sync.sync_object(ob); if (partsys_is_visible && ob != DRW_context_state_get()->object_edit) { LISTBASE_FOREACH (ModifierData *, md, &ob->modifiers) { if (md->type == eModifierType_ParticleSystem) { - sync.sync_curves(ob, ob_handle, md); + sync.sync_curves(ob, ob_handle, res_handle, md); } } } @@ -121,22 +145,18 @@ void Instance::object_sync(Object *ob) if (object_is_visible) { switch (ob->type) { case OB_LAMP: + lights.sync_light(ob, ob_handle); break; case OB_MESH: - case OB_CURVES_LEGACY: - case OB_SURF: - case OB_FONT: - case OB_MBALL: { - sync.sync_mesh(ob, ob_handle); + sync.sync_mesh(ob, ob_handle, res_handle, ob_ref); break; - } case OB_VOLUME: break; case OB_CURVES: - sync.sync_curves(ob, ob_handle); + sync.sync_curves(ob, ob_handle, res_handle); break; case OB_GPENCIL: - sync.sync_gpencil(ob, ob_handle); + sync.sync_gpencil(ob, ob_handle, res_handle); break; default: break; @@ -146,13 +166,38 @@ void Instance::object_sync(Object *ob) ob_handle.reset_recalc_flag(); } +/* Wrapper to use with DRW_render_object_iter. */ +void Instance::object_sync_render(void *instance_, + Object *ob, + RenderEngine *engine, + Depsgraph *depsgraph) +{ + UNUSED_VARS(engine, depsgraph); + Instance &inst = *reinterpret_cast<Instance *>(instance_); + inst.object_sync(ob); +} + void Instance::end_sync() { velocity.end_sync(); + lights.end_sync(); + sampling.end_sync(); + film.end_sync(); + cryptomatte.end_sync(); } void Instance::render_sync() { + DRW_cache_restart(); + + begin_sync(); + DRW_render_object_iter(this, render, depsgraph, object_sync_render); + end_sync(); + + DRW_render_instance_buffer_finish(); + /* Also we weed to have a correct FBO bound for #DRW_hair_update */ + // GPU_framebuffer_bind(); + // DRW_hair_update(); } /** \} */ @@ -167,7 +212,65 @@ void Instance::render_sync() **/ void Instance::render_sample() { + if (sampling.finished_viewport()) { + film.display(); + return; + } + + /* Motion blur may need to do re-sync after a certain number of sample. */ + if (!is_viewport() && sampling.do_render_sync()) { + render_sync(); + } + + sampling.step(); + main_view.render(); + + motion_blur.step(); +} + +void Instance::render_read_result(RenderLayer *render_layer, const char *view_name) +{ + eViewLayerEEVEEPassType pass_bits = film.enabled_passes_get(); + for (auto i : IndexRange(EEVEE_RENDER_PASS_MAX_BIT)) { + eViewLayerEEVEEPassType pass_type = eViewLayerEEVEEPassType(pass_bits & (1 << i)); + if (pass_type == 0) { + continue; + } + + Vector<std::string> pass_names = Film::pass_to_render_pass_names(pass_type, view_layer); + for (int64_t pass_offset : IndexRange(pass_names.size())) { + RenderPass *rp = RE_pass_find_by_name( + render_layer, pass_names[pass_offset].c_str(), view_name); + if (!rp) { + continue; + } + float *result = film.read_pass(pass_type, pass_offset); + + if (result) { + BLI_mutex_lock(&render->update_render_passes_mutex); + /* WORKAROUND: We use texture read to avoid using a framebuffer to get the render result. + * However, on some implementation, we need a buffer with a few extra bytes for the read to + * happen correctly (see GLTexture::read()). So we need a custom memory allocation. */ + /* Avoid memcpy(), replace the pointer directly. */ + MEM_SAFE_FREE(rp->rect); + rp->rect = result; + BLI_mutex_unlock(&render->update_render_passes_mutex); + } + } + } + + /* The vector pass is initialized to weird values. Set it to neutral value if not rendered. */ + if ((pass_bits & EEVEE_RENDER_PASS_VECTOR) == 0) { + for (std::string vector_pass_name : + Film::pass_to_render_pass_names(EEVEE_RENDER_PASS_VECTOR, view_layer)) { + RenderPass *vector_rp = RE_pass_find_by_name( + render_layer, vector_pass_name.c_str(), view_name); + if (vector_rp) { + memset(vector_rp->rect, 0, sizeof(float) * 4 * vector_rp->rectx * vector_rp->recty); + } + } + } } /** \} */ @@ -178,7 +281,28 @@ void Instance::render_sample() void Instance::render_frame(RenderLayer *render_layer, const char *view_name) { - UNUSED_VARS(render_layer, view_name); + while (!sampling.finished()) { + this->render_sample(); + + /* TODO(fclem) print progression. */ +#if 0 + /* TODO(fclem): Does not currently work. But would be better to just display to 2D view like + * cycles does. */ + if (G.background == false && first_read) { + /* Allow to preview the first sample. */ + /* TODO(fclem): Might want to not do this during animation render to avoid too much stall. */ + this->render_read_result(render_layer, view_name); + first_read = false; + DRW_render_context_disable(render->re); + /* Allow the 2D viewport to grab the ticket mutex to display the render. */ + DRW_render_context_enable(render->re); + } +#endif + } + + this->film.cryptomatte_sort(); + + this->render_read_result(render_layer, view_name); } void Instance::draw_viewport(DefaultFramebufferList *dfbl) @@ -187,6 +311,13 @@ void Instance::draw_viewport(DefaultFramebufferList *dfbl) render_sample(); velocity.step_swap(); + /* Do not request redraw during viewport animation to lock the framerate to the animation + * playback rate. This is in order to preserve motion blur aspect and also to avoid TAA reset + * that can show flickering. */ + if (!sampling.finished_viewport() && !DRW_state_is_playback()) { + DRW_viewport_request_redraw(); + } + if (materials.queued_shaders_count > 0) { std::stringstream ss; ss << "Compiling Shaders " << materials.queued_shaders_count; @@ -194,6 +325,76 @@ void Instance::draw_viewport(DefaultFramebufferList *dfbl) } } +void Instance::store_metadata(RenderResult *render_result) +{ + cryptomatte.store_metadata(render_result); +} + +void Instance::update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer) +{ + RE_engine_register_pass(engine, scene, view_layer, RE_PASSNAME_COMBINED, 4, "RGBA", SOCK_RGBA); + +#define CHECK_PASS_LEGACY(name, type, channels, chanid) \ + if (view_layer->passflag & (SCE_PASS_##name)) { \ + RE_engine_register_pass( \ + engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \ + } \ + ((void)0) +#define CHECK_PASS_EEVEE(name, type, channels, chanid) \ + if (view_layer->eevee.render_passes & (EEVEE_RENDER_PASS_##name)) { \ + RE_engine_register_pass( \ + engine, scene, view_layer, RE_PASSNAME_##name, channels, chanid, type); \ + } \ + ((void)0) + + CHECK_PASS_LEGACY(Z, SOCK_FLOAT, 1, "Z"); + CHECK_PASS_LEGACY(MIST, SOCK_FLOAT, 1, "Z"); + CHECK_PASS_LEGACY(NORMAL, SOCK_VECTOR, 3, "XYZ"); + CHECK_PASS_LEGACY(DIFFUSE_DIRECT, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_LEGACY(DIFFUSE_COLOR, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_LEGACY(GLOSSY_DIRECT, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_LEGACY(GLOSSY_COLOR, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_EEVEE(VOLUME_LIGHT, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_LEGACY(EMIT, SOCK_RGBA, 3, "RGB"); + CHECK_PASS_LEGACY(ENVIRONMENT, SOCK_RGBA, 3, "RGB"); + /* TODO: CHECK_PASS_LEGACY(SHADOW, SOCK_RGBA, 3, "RGB"); + * CHECK_PASS_LEGACY(AO, SOCK_RGBA, 3, "RGB"); + * When available they should be converted from Value textures to RGB. */ + + LISTBASE_FOREACH (ViewLayerAOV *, aov, &view_layer->aovs) { + if ((aov->flag & AOV_CONFLICT) != 0) { + continue; + } + switch (aov->type) { + case AOV_TYPE_COLOR: + RE_engine_register_pass(engine, scene, view_layer, aov->name, 4, "RGBA", SOCK_RGBA); + break; + case AOV_TYPE_VALUE: + RE_engine_register_pass(engine, scene, view_layer, aov->name, 1, "X", SOCK_FLOAT); + break; + default: + break; + } + } + + /* NOTE: Name channels lowercase `rgba` so that compression rules check in OpenEXR DWA code uses + * lossless compression. Reportedly this naming is the only one which works good from the + * interoperability point of view. Using `xyzw` naming is not portable. */ + auto register_cryptomatte_passes = [&](eViewLayerCryptomatteFlags cryptomatte_layer, + eViewLayerEEVEEPassType eevee_pass) { + if (view_layer->cryptomatte_flag & cryptomatte_layer) { + for (std::string pass_name : Film::pass_to_render_pass_names(eevee_pass, view_layer)) { + RE_engine_register_pass( + engine, scene, view_layer, pass_name.c_str(), 4, "rgba", SOCK_RGBA); + } + } + }; + register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_OBJECT, EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT); + register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_ASSET, EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET); + register_cryptomatte_passes(VIEW_LAYER_CRYPTOMATTE_MATERIAL, + EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL); +} + /** \} */ } // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index 84be59fc5f0..c8eecbd812d 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -16,8 +16,16 @@ #include "DRW_render.h" #include "eevee_camera.hh" +#include "eevee_cryptomatte.hh" +#include "eevee_depth_of_field.hh" +#include "eevee_film.hh" +#include "eevee_hizbuffer.hh" +#include "eevee_light.hh" #include "eevee_material.hh" +#include "eevee_motion_blur.hh" #include "eevee_pipeline.hh" +#include "eevee_renderbuffers.hh" +#include "eevee_sampling.hh" #include "eevee_shader.hh" #include "eevee_sync.hh" #include "eevee_view.hh" @@ -31,19 +39,29 @@ namespace blender::eevee { */ class Instance { friend VelocityModule; + friend MotionBlurModule; public: ShaderModule &shaders; SyncModule sync; MaterialModule materials; PipelineModule pipelines; + LightModule lights; VelocityModule velocity; + MotionBlurModule motion_blur; + DepthOfField depth_of_field; + Cryptomatte cryptomatte; + HiZBuffer hiz_buffer; + Sampling sampling; Camera camera; + Film film; + RenderBuffers render_buffers; MainView main_view; World world; /** Input data. */ Depsgraph *depsgraph; + Manager *manager; /** Evaluated IDs. */ Scene *scene; ViewLayer *view_layer; @@ -57,8 +75,13 @@ class Instance { const View3D *v3d; const RegionView3D *rv3d; - /* Info string displayed at the top of the render / viewport. */ + /** True if the grease pencil engine might be running. */ + bool gpencil_engine_enabled; + + /** Info string displayed at the top of the render / viewport. */ std::string info = ""; + /** Debug mode from debug value. */ + eDebugMode debug_mode = eDebugMode::DEBUG_NONE; public: Instance() @@ -66,8 +89,16 @@ class Instance { sync(*this), materials(*this), pipelines(*this), + lights(*this), velocity(*this), + motion_blur(*this), + depth_of_field(*this), + cryptomatte(*this), + hiz_buffer(*this), + sampling(*this), camera(*this), + film(*this), + render_buffers(*this), main_view(*this), world(*this){}; ~Instance(){}; @@ -89,15 +120,23 @@ class Instance { void render_sync(); void render_frame(RenderLayer *render_layer, const char *view_name); + void store_metadata(RenderResult *render_result); void draw_viewport(DefaultFramebufferList *dfbl); - bool is_viewport(void) + static void update_passes(RenderEngine *engine, Scene *scene, ViewLayer *view_layer); + + bool is_viewport() const + { + return render == nullptr; + } + + bool overlays_enabled() const { - return !DRW_state_is_scene_render(); + return v3d && ((v3d->flag2 & V3D_HIDE_OVERLAYS) == 0); } - bool use_scene_lights(void) const + bool use_scene_lights() const { return (!v3d) || ((v3d->shading.type == OB_MATERIAL) && @@ -107,7 +146,7 @@ class Instance { } /* Light the scene using the selected HDRI in the viewport shading pop-over. */ - bool use_studio_light(void) const + bool use_studio_light() const { return (v3d) && (((v3d->shading.type == OB_MATERIAL) && ((v3d->shading.flag & V3D_SHADING_SCENE_WORLD) == 0)) || @@ -116,7 +155,12 @@ class Instance { } private: + static void object_sync_render(void *instance_, + Object *ob, + RenderEngine *engine, + Depsgraph *depsgraph); void render_sample(); + void render_read_result(RenderLayer *render_layer, const char *view_name); void mesh_sync(Object *ob, ObjectHandle &ob_handle); diff --git a/source/blender/draw/engines/eevee_next/eevee_light.cc b/source/blender/draw/engines/eevee_next/eevee_light.cc new file mode 100644 index 00000000000..b60246fa3ab --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_light.cc @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The light module manages light data buffers and light culling system. + */ + +#include "draw_debug.hh" + +#include "eevee_instance.hh" + +#include "eevee_light.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name LightData + * \{ */ + +static eLightType to_light_type(short blender_light_type, short blender_area_type) +{ + switch (blender_light_type) { + default: + case LA_LOCAL: + return LIGHT_POINT; + case LA_SUN: + return LIGHT_SUN; + case LA_SPOT: + return LIGHT_SPOT; + case LA_AREA: + return ELEM(blender_area_type, LA_AREA_DISK, LA_AREA_ELLIPSE) ? LIGHT_ELLIPSE : LIGHT_RECT; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Light Object + * \{ */ + +void Light::sync(/* ShadowModule &shadows , */ const Object *ob, float threshold) +{ + const ::Light *la = (const ::Light *)ob->data; + float scale[3]; + + float max_power = max_fff(la->r, la->g, la->b) * fabsf(la->energy / 100.0f); + float surface_max_power = max_ff(la->diff_fac, la->spec_fac) * max_power; + float volume_max_power = la->volume_fac * max_power; + + float influence_radius_surface = attenuation_radius_get(la, threshold, surface_max_power); + float influence_radius_volume = attenuation_radius_get(la, threshold, volume_max_power); + + this->influence_radius_max = max_ff(influence_radius_surface, influence_radius_volume); + this->influence_radius_invsqr_surface = 1.0f / square_f(max_ff(influence_radius_surface, 1e-8f)); + this->influence_radius_invsqr_volume = 1.0f / square_f(max_ff(influence_radius_volume, 1e-8f)); + + this->color = float3(&la->r) * la->energy; + normalize_m4_m4_ex(this->object_mat.ptr(), ob->obmat, scale); + /* Make sure we have consistent handedness (in case of negatively scaled Z axis). */ + float3 cross = math::cross(float3(this->_right), float3(this->_up)); + if (math::dot(cross, float3(this->_back)) < 0.0f) { + negate_v3(this->_up); + } + + shape_parameters_set(la, scale); + + float shape_power = shape_power_get(la); + float point_power = point_power_get(la); + this->diffuse_power = la->diff_fac * shape_power; + this->transmit_power = la->diff_fac * point_power; + this->specular_power = la->spec_fac * shape_power; + this->volume_power = la->volume_fac * point_power; + + eLightType new_type = to_light_type(la->type, la->area_shape); + if (this->type != new_type) { + /* shadow_discard_safe(shadows); */ + this->type = new_type; + } + +#if 0 + if (la->mode & LA_SHADOW) { + if (la->type == LA_SUN) { + if (this->shadow_id == LIGHT_NO_SHADOW) { + this->shadow_id = shadows.directionals.alloc(); + } + + ShadowDirectional &shadow = shadows.directionals[this->shadow_id]; + shadow.sync(this->object_mat, la->bias * 0.05f, 1.0f); + } + else { + float cone_aperture = DEG2RAD(360.0); + if (la->type == LA_SPOT) { + cone_aperture = min_ff(DEG2RAD(179.9), la->spotsize); + } + else if (la->type == LA_AREA) { + cone_aperture = DEG2RAD(179.9); + } + + if (this->shadow_id == LIGHT_NO_SHADOW) { + this->shadow_id = shadows.punctuals.alloc(); + } + + ShadowPunctual &shadow = shadows.punctuals[this->shadow_id]; + shadow.sync(this->type, + this->object_mat, + cone_aperture, + la->clipsta, + this->influence_radius_max, + la->bias * 0.05f); + } + } + else { + shadow_discard_safe(shadows); + } +#endif + + this->initialized = true; +} + +#if 0 +void Light::shadow_discard_safe(ShadowModule &shadows) +{ + if (shadow_id != LIGHT_NO_SHADOW) { + if (this->type != LIGHT_SUN) { + shadows.punctuals.free(shadow_id); + } + else { + shadows.directionals.free(shadow_id); + } + shadow_id = LIGHT_NO_SHADOW; + } +} +#endif + +/* Returns attenuation radius inverted & squared for easy bound checking inside the shader. */ +float Light::attenuation_radius_get(const ::Light *la, float light_threshold, float light_power) +{ + if (la->type == LA_SUN) { + return (light_power > 1e-5f) ? 1e16f : 0.0f; + } + + if (la->mode & LA_CUSTOM_ATTENUATION) { + return la->att_dist; + } + /* Compute the distance (using the inverse square law) + * at which the light power reaches the light_threshold. */ + /* TODO take area light scale into account. */ + return sqrtf(light_power / light_threshold); +} + +void Light::shape_parameters_set(const ::Light *la, const float scale[3]) +{ + if (la->type == LA_AREA) { + float area_size_y = (ELEM(la->area_shape, LA_AREA_RECT, LA_AREA_ELLIPSE)) ? la->area_sizey : + la->area_size; + _area_size_x = max_ff(0.003f, la->area_size * scale[0] * 0.5f); + _area_size_y = max_ff(0.003f, area_size_y * scale[1] * 0.5f); + /* For volume point lighting. */ + radius_squared = max_ff(0.001f, hypotf(_area_size_x, _area_size_y) * 0.5f); + radius_squared = square_f(radius_squared); + } + else { + if (la->type == LA_SPOT) { + /* Spot size & blend */ + spot_size_inv[0] = scale[2] / scale[0]; + spot_size_inv[1] = scale[2] / scale[1]; + float spot_size = cosf(la->spotsize * 0.5f); + float spot_blend = (1.0f - spot_size) * la->spotblend; + _spot_mul = 1.0f / max_ff(1e-8f, spot_blend); + _spot_bias = -spot_size * _spot_mul; + spot_tan = tanf(min_ff(la->spotsize * 0.5f, M_PI_2 - 0.0001f)); + } + + if (la->type == LA_SUN) { + _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f); + } + else { + _area_size_x = la->area_size; + } + _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x); + radius_squared = square_f(_area_size_x); + } +} + +float Light::shape_power_get(const ::Light *la) +{ + /* Make illumination power constant */ + switch (la->type) { + case LA_AREA: { + float area = _area_size_x * _area_size_y; + float power = 1.0f / (area * 4.0f * float(M_PI)); + /* FIXME : Empirical, Fit cycles power */ + power *= 0.8f; + if (ELEM(la->area_shape, LA_AREA_DISK, LA_AREA_ELLIPSE)) { + /* Scale power to account for the lower area of the ellipse compared to the surrounding + * rectangle. */ + power *= 4.0f / M_PI; + } + return power; + } + case LA_SPOT: + case LA_LOCAL: { + return 1.0f / (4.0f * square_f(_radius) * float(M_PI * M_PI)); + } + default: + case LA_SUN: { + float power = 1.0f / (square_f(_radius) * float(M_PI)); + /* Make illumination power closer to cycles for bigger radii. Cycles uses a cos^3 term that + * we cannot reproduce so we account for that by scaling the light power. This function is + * the result of a rough manual fitting. */ + /* Simplification of: power *= 1 + r²/2 */ + power += 1.0f / (2.0f * M_PI); + + return power; + } + } +} + +float Light::point_power_get(const ::Light *la) +{ + /* Volume light is evaluated as point lights. Remove the shape power. */ + switch (la->type) { + case LA_AREA: { + /* Match cycles. Empirical fit... must correspond to some constant. */ + float power = 0.0792f * M_PI; + + /* This corrects for area light most representative point trick. The fit was found by + * reducing the average error compared to cycles. */ + float area = _area_size_x * _area_size_y; + float tmp = M_PI_2 / (M_PI_2 + sqrtf(area)); + /* Lerp between 1.0 and the limit (1 / pi). */ + power *= tmp + (1.0f - tmp) * M_1_PI; + + return power; + } + case LA_SPOT: + case LA_LOCAL: { + /* Match cycles. Empirical fit... must correspond to some constant. */ + return 0.0792f; + } + default: + case LA_SUN: { + return 1.0f; + } + } +} + +void Light::debug_draw() +{ +#ifdef DEBUG + drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f)); +#endif +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name LightModule + * \{ */ + +void LightModule::begin_sync() +{ + use_scene_lights_ = inst_.use_scene_lights(); + + /* In begin_sync so it can be animated. */ + if (assign_if_different(light_threshold_, max_ff(1e-16f, inst_.scene->eevee.light_threshold))) { + inst_.sampling.reset(); + } + + sun_lights_len_ = 0; + local_lights_len_ = 0; +} + +void LightModule::sync_light(const Object *ob, ObjectHandle &handle) +{ + if (use_scene_lights_ == false) { + return; + } + Light &light = light_map_.lookup_or_add_default(handle.object_key); + light.used = true; + if (handle.recalc != 0 || !light.initialized) { + light.sync(/* inst_.shadows, */ ob, light_threshold_); + } + sun_lights_len_ += int(light.type == LIGHT_SUN); + local_lights_len_ += int(light.type != LIGHT_SUN); +} + +void LightModule::end_sync() +{ + // ShadowModule &shadows = inst_.shadows; + + /* NOTE: We resize this buffer before removing deleted lights. */ + int lights_allocated = ceil_to_multiple_u(max_ii(light_map_.size(), 1), LIGHT_CHUNK); + light_buf_.resize(lights_allocated); + + /* Track light deletion. */ + Vector<ObjectKey, 0> deleted_keys; + /* Indices inside GPU data array. */ + int sun_lights_idx = 0; + int local_lights_idx = sun_lights_len_; + + /* Fill GPU data with scene data. */ + for (auto item : light_map_.items()) { + Light &light = item.value; + + if (!light.used) { + /* Deleted light. */ + deleted_keys.append(item.key); + // light.shadow_discard_safe(shadows); + continue; + } + + int dst_idx = (light.type == LIGHT_SUN) ? sun_lights_idx++ : local_lights_idx++; + /* Put all light data into global data SSBO. */ + light_buf_[dst_idx] = light; + +#if 0 + if (light.shadow_id != LIGHT_NO_SHADOW) { + if (light.type == LIGHT_SUN) { + light_buf_[dst_idx].shadow_data = shadows.directionals[light.shadow_id]; + } + else { + light_buf_[dst_idx].shadow_data = shadows.punctuals[light.shadow_id]; + } + } +#endif + /* Untag for next sync. */ + light.used = false; + } + /* This scene data buffer is then immutable after this point. */ + light_buf_.push_update(); + + for (auto &key : deleted_keys) { + light_map_.remove(key); + } + + /* Update sampling on deletion or un-hiding (use_scene_lights). */ + if (assign_if_different(light_map_size_, light_map_.size())) { + inst_.sampling.reset(); + } + + /* If exceeding the limit, just trim off the excess to avoid glitchy rendering. */ + if (sun_lights_len_ + local_lights_len_ > CULLING_MAX_ITEM) { + sun_lights_len_ = min_ii(sun_lights_len_, CULLING_MAX_ITEM); + local_lights_len_ = min_ii(local_lights_len_, CULLING_MAX_ITEM - sun_lights_len_); + inst_.info = "Error: Too many lights in the scene."; + } + lights_len_ = sun_lights_len_ + local_lights_len_; + + /* Resize to the actual number of lights after pruning. */ + lights_allocated = ceil_to_multiple_u(max_ii(lights_len_, 1), LIGHT_CHUNK); + culling_key_buf_.resize(lights_allocated); + culling_zdist_buf_.resize(lights_allocated); + culling_light_buf_.resize(lights_allocated); + + { + /* Compute tile size and total word count. */ + uint word_per_tile = divide_ceil_u(max_ii(lights_len_, 1), 32); + int2 render_extent = inst_.film.render_extent_get(); + int2 tiles_extent; + /* Default to 32 as this is likely to be the maximum + * tile size used by hardware or compute shading. */ + uint tile_size = 16; + do { + tile_size *= 2; + tiles_extent = math::divide_ceil(render_extent, int2(tile_size)); + uint tile_count = tiles_extent.x * tiles_extent.y; + if (tile_count > max_tile_count_threshold) { + continue; + } + total_word_count_ = tile_count * word_per_tile; + + } while (total_word_count_ > max_word_count_threshold); + /* Keep aligned with storage buffer requirements. */ + total_word_count_ = ceil_to_multiple_u(total_word_count_, 32); + + culling_data_buf_.tile_word_len = word_per_tile; + culling_data_buf_.tile_size = tile_size; + culling_data_buf_.tile_x_len = tiles_extent.x; + culling_data_buf_.tile_y_len = tiles_extent.y; + culling_data_buf_.items_count = lights_len_; + culling_data_buf_.local_lights_len = local_lights_len_; + culling_data_buf_.sun_lights_len = sun_lights_len_; + } + culling_tile_buf_.resize(total_word_count_); + + culling_pass_sync(); + debug_pass_sync(); +} + +void LightModule::culling_pass_sync() +{ + uint safe_lights_len = max_ii(lights_len_, 1); + uint culling_select_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SELECT_GROUP_SIZE); + uint culling_sort_dispatch_size = divide_ceil_u(safe_lights_len, CULLING_SORT_GROUP_SIZE); + uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE); + + /* NOTE: We reference the buffers that may be resized or updated later. */ + + culling_ps_.init(); + { + auto &sub = culling_ps_.sub("Select"); + sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT)); + sub.bind_ssbo("light_cull_buf", &culling_data_buf_); + sub.bind_ssbo("in_light_buf", light_buf_); + sub.bind_ssbo("out_light_buf", culling_light_buf_); + sub.bind_ssbo("out_zdist_buf", culling_zdist_buf_); + sub.bind_ssbo("out_key_buf", culling_key_buf_); + sub.dispatch(int3(culling_select_dispatch_size, 1, 1)); + sub.barrier(GPU_BARRIER_SHADER_STORAGE); + } + { + auto &sub = culling_ps_.sub("Sort"); + sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SORT)); + sub.bind_ssbo("light_cull_buf", &culling_data_buf_); + sub.bind_ssbo("in_light_buf", light_buf_); + sub.bind_ssbo("out_light_buf", culling_light_buf_); + sub.bind_ssbo("in_zdist_buf", culling_zdist_buf_); + sub.bind_ssbo("in_key_buf", culling_key_buf_); + sub.dispatch(int3(culling_sort_dispatch_size, 1, 1)); + sub.barrier(GPU_BARRIER_SHADER_STORAGE); + } + { + auto &sub = culling_ps_.sub("Zbin"); + sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN)); + sub.bind_ssbo("light_cull_buf", &culling_data_buf_); + sub.bind_ssbo("light_buf", culling_light_buf_); + sub.bind_ssbo("out_zbin_buf", culling_zbin_buf_); + sub.dispatch(int3(1, 1, 1)); + sub.barrier(GPU_BARRIER_SHADER_STORAGE); + } + { + auto &sub = culling_ps_.sub("Tiles"); + sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_TILE)); + sub.bind_ssbo("light_cull_buf", &culling_data_buf_); + sub.bind_ssbo("light_buf", culling_light_buf_); + sub.bind_ssbo("out_light_tile_buf", culling_tile_buf_); + sub.dispatch(int3(culling_tile_dispatch_size, 1, 1)); + sub.barrier(GPU_BARRIER_SHADER_STORAGE); + } +} + +void LightModule::debug_pass_sync() +{ + if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) { + debug_draw_ps_.init(); + debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM); + debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG)); + inst_.hiz_buffer.bind_resources(&debug_draw_ps_); + debug_draw_ps_.bind_ssbo("light_buf", &culling_light_buf_); + debug_draw_ps_.bind_ssbo("light_cull_buf", &culling_data_buf_); + debug_draw_ps_.bind_ssbo("light_zbin_buf", &culling_zbin_buf_); + debug_draw_ps_.bind_ssbo("light_tile_buf", &culling_tile_buf_); + debug_draw_ps_.bind_texture("depth_tx", &inst_.render_buffers.depth_tx); + debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3); + } +} + +void LightModule::set_view(View &view, const int2 extent) +{ + float far_z = view.far_clip(); + float near_z = view.near_clip(); + + culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z); + culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale; + culling_data_buf_.tile_to_uv_fac = (culling_data_buf_.tile_size / float2(extent)); + culling_data_buf_.visible_count = 0; + culling_data_buf_.push_update(); + + inst_.manager->submit(culling_ps_, view); +} + +void LightModule::debug_draw(View &view, GPUFrameBuffer *view_fb) +{ + if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) { + inst_.info = "Debug Mode: Light Culling Validation"; + inst_.hiz_buffer.update(); + GPU_framebuffer_bind(view_fb); + inst_.manager->submit(debug_draw_ps_, view); + } +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_light.hh b/source/blender/draw/engines/eevee_next/eevee_light.hh new file mode 100644 index 00000000000..9bacc180ea8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_light.hh @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * The light module manages light data buffers and light culling system. + * + * The culling follows the principles of Tiled Culling + Z binning from: + * "Improved Culling for Tiled and Clustered Rendering" + * by Michal Drobot + * http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf + * + * The culling is separated in 4 compute phases: + * - View Culling (select pass): Create a z distance and a index buffer of visible lights. + * - Light sorting: Outputs visible lights sorted by Z distance. + * - Z binning: Compute the Z bins min/max light indices. + * - Tile intersection: Fine grained 2D culling of each lights outputting a bitmap per tile. + */ + +#pragma once + +#include "BLI_bitmap.h" +#include "BLI_vector.hh" +#include "DNA_light_types.h" + +#include "eevee_camera.hh" +#include "eevee_sampling.hh" +#include "eevee_shader.hh" +#include "eevee_shader_shared.hh" +#include "eevee_sync.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Light Object + * \{ */ + +struct Light : public LightData { + public: + bool initialized = false; + bool used = false; + + public: + Light() + { + shadow_id = LIGHT_NO_SHADOW; + } + + void sync(/* ShadowModule &shadows, */ const Object *ob, float threshold); + + // void shadow_discard_safe(ShadowModule &shadows); + + void debug_draw(); + + private: + float attenuation_radius_get(const ::Light *la, float light_threshold, float light_power); + void shape_parameters_set(const ::Light *la, const float scale[3]); + float shape_power_get(const ::Light *la); + float point_power_get(const ::Light *la); +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name LightModule + * \{ */ + +/** + * The light module manages light data buffers and light culling system. + */ +class LightModule { + // friend ShadowModule; + + private: + /* Keep tile count reasonable for memory usage and 2D culling performance. */ + static constexpr uint max_memory_threshold = 32 * 1024 * 1024; /* 32 MiB */ + static constexpr uint max_word_count_threshold = max_memory_threshold / sizeof(uint); + static constexpr uint max_tile_count_threshold = 8192; + + Instance &inst_; + + /** Map of light objects data. Converted to flat array each frame. */ + Map<ObjectKey, Light> light_map_; + /** Flat array sent to GPU, populated from light_map_. Source buffer for light culling. */ + LightDataBuf light_buf_ = {"Lights_no_cull"}; + /** Recorded size of light_map_ (after pruning) to detect deletion. */ + int64_t light_map_size_ = 0; + /** Luminous intensity to consider the light boundary at. Used for culling. */ + float light_threshold_ = 0.01f; + /** If false, will prevent all scene light from being synced. */ + bool use_scene_lights_ = false; + /** Number of sun lights synced during the last sync. Used as offset. */ + int sun_lights_len_ = 0; + int local_lights_len_ = 0; + /** Sun plus local lights count for convenience. */ + int lights_len_ = 0; + + /** + * Light Culling + */ + + /** LightData buffer used for rendering. Filled by the culling pass. */ + LightDataBuf culling_light_buf_ = {"Lights_culled"}; + /** Culling infos. */ + LightCullingDataBuf culling_data_buf_ = {"LightCull_data"}; + /** Z-distance matching the key for each visible lights. Used for sorting. */ + LightCullingZdistBuf culling_zdist_buf_ = {"LightCull_zdist"}; + /** Key buffer containing only visible lights indices. Used for sorting. */ + LightCullingKeyBuf culling_key_buf_ = {"LightCull_key"}; + /** Zbins containing min and max light index for each Z bin. */ + LightCullingZbinBuf culling_zbin_buf_ = {"LightCull_zbin"}; + /** Bitmap of lights touching each tiles. */ + LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"}; + /** Culling compute passes. */ + PassSimple culling_ps_ = {"LightCulling"}; + /** Total number of words the tile buffer needs to contain for the render resolution. */ + uint total_word_count_ = 0; + + /** Debug Culling visualization. */ + PassSimple debug_draw_ps_ = {"LightCulling.Debug"}; + + public: + LightModule(Instance &inst) : inst_(inst){}; + ~LightModule(){}; + + void begin_sync(); + void sync_light(const Object *ob, ObjectHandle &handle); + void end_sync(); + + /** + * Update acceleration structure for the given view. + */ + void set_view(View &view, const int2 extent); + + void debug_draw(View &view, GPUFrameBuffer *view_fb); + + void bind_resources(DRWShadingGroup *grp) + { + DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_); + DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_); + DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_); + DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_); +#if 0 + DRW_shgroup_uniform_texture(grp, "shadow_atlas_tx", inst_.shadows.atlas_tx_get()); + DRW_shgroup_uniform_texture(grp, "shadow_tilemaps_tx", inst_.shadows.tilemap_tx_get()); +#endif + } + + template<typename T> void bind_resources(draw::detail::PassBase<T> *pass) + { + /* Storage Buf. */ + pass->bind_ssbo(LIGHT_CULL_BUF_SLOT, &culling_data_buf_); + pass->bind_ssbo(LIGHT_BUF_SLOT, &culling_light_buf_); + pass->bind_ssbo(LIGHT_ZBIN_BUF_SLOT, &culling_zbin_buf_); + pass->bind_ssbo(LIGHT_TILE_BUF_SLOT, &culling_tile_buf_); + } + + private: + void culling_pass_sync(); + void debug_pass_sync(); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_material.cc b/source/blender/draw/engines/eevee_next/eevee_material.cc index 1676c89d679..a92f96e8c70 100644 --- a/source/blender/draw/engines/eevee_next/eevee_material.cc +++ b/source/blender/draw/engines/eevee_next/eevee_material.cc @@ -72,10 +72,9 @@ bNodeTree *DefaultSurfaceNodeTree::nodetree_get(::Material *ma) MaterialModule::MaterialModule(Instance &inst) : inst_(inst) { { - bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname); - diffuse_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default diffuse"); - diffuse_mat->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + nullptr, &diffuse_mat->id, "Shader Nodetree", ntreeType_Shader->idname); diffuse_mat->use_nodes = true; /* To use the forward pipeline. */ diffuse_mat->blend_method = MA_BM_BLEND; @@ -95,10 +94,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst) nodeSetActive(ntree, output); } { - bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname); - glossy_mat = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default metal"); - glossy_mat->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + nullptr, &glossy_mat->id, "Shader Nodetree", ntreeType_Shader->idname); glossy_mat->use_nodes = true; /* To use the forward pipeline. */ glossy_mat->blend_method = MA_BM_BLEND; @@ -120,10 +118,9 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst) nodeSetActive(ntree, output); } { - bNodeTree *ntree = ntreeAddTree(nullptr, "Shader Nodetree", ntreeType_Shader->idname); - error_mat_ = (::Material *)BKE_id_new_nomain(ID_MA, "EEVEE default error"); - error_mat_->nodetree = ntree; + bNodeTree *ntree = ntreeAddTreeEmbedded( + nullptr, &error_mat_->id, "Shader Nodetree", ntreeType_Shader->idname); error_mat_->use_nodes = true; /* Use emission and output material to be compatible with both World and Material. */ @@ -145,9 +142,6 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst) MaterialModule::~MaterialModule() { - for (Material *mat : material_map_.values()) { - delete mat; - } BKE_id_free(nullptr, glossy_mat); BKE_id_free(nullptr, diffuse_mat); BKE_id_free(nullptr, error_mat_); @@ -157,13 +151,12 @@ void MaterialModule::begin_sync() { queued_shaders_count = 0; - for (Material *mat : material_map_.values()) { - mat->init = false; - } + material_map_.clear(); shader_map_.clear(); } -MaterialPass MaterialModule::material_pass_get(::Material *blender_mat, +MaterialPass MaterialModule::material_pass_get(Object *ob, + ::Material *blender_mat, eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type) { @@ -195,7 +188,7 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat, BLI_assert(GPU_material_status(matpass.gpumat) == GPU_MAT_SUCCESS); if (GPU_material_recalc_flag_get(matpass.gpumat)) { - // inst_.sampling.reset(); + inst_.sampling.reset(); } if ((pipeline_type == MAT_PIPE_DEFERRED) && @@ -203,35 +196,34 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat, pipeline_type = MAT_PIPE_FORWARD; } - if ((pipeline_type == MAT_PIPE_FORWARD) && + if (ELEM(pipeline_type, + MAT_PIPE_FORWARD, + MAT_PIPE_FORWARD_PREPASS, + MAT_PIPE_FORWARD_PREPASS_VELOCITY) && GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT)) { - /* Transparent needs to use one shgroup per object to support reordering. */ - matpass.shgrp = inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type); + /* Transparent pass is generated later. */ + matpass.sub_pass = nullptr; } else { ShaderKey shader_key(matpass.gpumat, geometry_type, pipeline_type); - auto add_cb = [&]() -> DRWShadingGroup * { - /* First time encountering this shader. Create a shading group. */ - return inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type); - }; - DRWShadingGroup *grp = shader_map_.lookup_or_add_cb(shader_key, add_cb); - - if (grp != nullptr) { - /* Shading group for this shader already exists. Create a sub one for this material. */ - /* IMPORTANT: We always create a subgroup so that all subgroups are inserted after the - * first "empty" shgroup. This avoids messing the order of subgroups when there is more - * nested subgroup (i.e: hair drawing). */ - /* TODO(@fclem): Remove material resource binding from the first group creation. */ - matpass.shgrp = DRW_shgroup_create_sub(grp); - DRW_shgroup_add_material_resources(matpass.shgrp, matpass.gpumat); + PassMain::Sub *shader_sub = shader_map_.lookup_or_add_cb(shader_key, [&]() { + /* First time encountering this shader. Create a sub that will contain materials using it. */ + return inst_.pipelines.material_add(ob, blender_mat, matpass.gpumat, pipeline_type); + }); + + if (shader_sub != nullptr) { + /* Create a sub for this material as `shader_sub` is for sharing shader between materials. */ + matpass.sub_pass = &shader_sub->sub(GPU_material_get_name(matpass.gpumat)); + matpass.sub_pass->material_set(*inst_.manager, matpass.gpumat); } } return matpass; } -Material &MaterialModule::material_sync(::Material *blender_mat, +Material &MaterialModule::material_sync(Object *ob, + ::Material *blender_mat, eMaterialGeometry geometry_type, bool has_motion) { @@ -249,27 +241,32 @@ Material &MaterialModule::material_sync(::Material *blender_mat, MaterialKey material_key(blender_mat, geometry_type, surface_pipe); - /* TODO: allocate in blocks to avoid memory fragmentation. */ - auto add_cb = [&]() { return new Material(); }; - Material &mat = *material_map_.lookup_or_add_cb(material_key, add_cb); - - /* Forward pipeline needs to use one shgroup per object. */ - if (mat.init == false || (surface_pipe == MAT_PIPE_FORWARD)) { - mat.init = true; + Material &mat = material_map_.lookup_or_add_cb(material_key, [&]() { + Material mat; /* Order is important for transparent. */ - mat.prepass = material_pass_get(blender_mat, prepass_pipe, geometry_type); - mat.shading = material_pass_get(blender_mat, surface_pipe, geometry_type); + mat.prepass = material_pass_get(ob, blender_mat, prepass_pipe, geometry_type); + mat.shading = material_pass_get(ob, blender_mat, surface_pipe, geometry_type); if (blender_mat->blend_shadow == MA_BS_NONE) { mat.shadow = MaterialPass(); } else { - mat.shadow = material_pass_get(blender_mat, MAT_PIPE_SHADOW, geometry_type); + mat.shadow = material_pass_get(ob, blender_mat, MAT_PIPE_SHADOW, geometry_type); } - mat.is_alpha_blend_transparent = (blender_mat->blend_method == MA_BM_BLEND) && - GPU_material_flag_get(mat.prepass.gpumat, + GPU_material_flag_get(mat.shading.gpumat, GPU_MATFLAG_TRANSPARENT); + return mat; + }); + + if (mat.is_alpha_blend_transparent) { + /* Transparent needs to use one sub pass per object to support reordering. + * NOTE: Pre-pass needs to be created first in order to be sorted first. */ + mat.prepass.sub_pass = inst_.pipelines.forward.prepass_transparent_add( + ob, blender_mat, mat.shading.gpumat); + mat.shading.sub_pass = inst_.pipelines.forward.material_transparent_add( + ob, blender_mat, mat.shading.gpumat); } + return mat; } @@ -297,7 +294,7 @@ MaterialArray &MaterialModule::material_array_get(Object *ob, bool has_motion) for (auto i : IndexRange(materials_len)) { ::Material *blender_mat = material_from_slot(ob, i); - Material &mat = material_sync(blender_mat, to_material_geometry(ob), has_motion); + Material &mat = material_sync(ob, blender_mat, to_material_geometry(ob), has_motion); material_array_.materials.append(&mat); material_array_.gpu_materials.append(mat.shading.gpumat); } @@ -310,7 +307,7 @@ Material &MaterialModule::material_get(Object *ob, eMaterialGeometry geometry_type) { ::Material *blender_mat = material_from_slot(ob, mat_nr); - Material &mat = material_sync(blender_mat, geometry_type, has_motion); + Material &mat = material_sync(ob, blender_mat, geometry_type, has_motion); return mat; } diff --git a/source/blender/draw/engines/eevee_next/eevee_material.hh b/source/blender/draw/engines/eevee_next/eevee_material.hh index 23165a741b9..ad0c293926b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_material.hh +++ b/source/blender/draw/engines/eevee_next/eevee_material.hh @@ -203,12 +203,11 @@ class DefaultSurfaceNodeTree { * \{ */ struct MaterialPass { - GPUMaterial *gpumat = nullptr; - DRWShadingGroup *shgrp = nullptr; + GPUMaterial *gpumat; + PassMain::Sub *sub_pass; }; struct Material { - bool init = false; bool is_alpha_blend_transparent; MaterialPass shadow, shading, prepass; }; @@ -228,8 +227,8 @@ class MaterialModule { private: Instance &inst_; - Map<MaterialKey, Material *> material_map_; - Map<ShaderKey, DRWShadingGroup *> shader_map_; + Map<MaterialKey, Material> material_map_; + Map<ShaderKey, PassMain::Sub *> shader_map_; MaterialArray material_array_; @@ -254,13 +253,15 @@ class MaterialModule { Material &material_get(Object *ob, bool has_motion, int mat_nr, eMaterialGeometry geometry_type); private: - Material &material_sync(::Material *blender_mat, + Material &material_sync(Object *ob, + ::Material *blender_mat, eMaterialGeometry geometry_type, bool has_motion); /** Return correct material or empty default material if slot is empty. */ ::Material *material_from_slot(Object *ob, int slot); - MaterialPass material_pass_get(::Material *blender_mat, + MaterialPass material_pass_get(Object *ob, + ::Material *blender_mat, eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type); }; diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc new file mode 100644 index 00000000000..f68abafa3d4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.cc @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + */ + +// #include "BLI_map.hh" +#include "DEG_depsgraph_query.h" + +#include "eevee_instance.hh" +#include "eevee_motion_blur.hh" +// #include "eevee_sampling.hh" +// #include "eevee_shader_shared.hh" +// #include "eevee_velocity.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name MotionBlurModule + * + * \{ */ + +void MotionBlurModule::init() +{ + const Scene *scene = inst_.scene; + + enabled_ = (scene->eevee.flag & SCE_EEVEE_MOTION_BLUR_ENABLED) != 0; + + if (!enabled_) { + motion_blur_fx_enabled_ = false; + return; + } + + /* Take into account the steps needed for fx motion blur. */ + int steps_count = max_ii(1, scene->eevee.motion_blur_steps) * 2 + 1; + + time_steps_.resize(steps_count); + + initial_frame_ = scene->r.cfra; + initial_subframe_ = scene->r.subframe; + frame_time_ = initial_frame_ + initial_subframe_; + shutter_position_ = scene->eevee.motion_blur_position; + shutter_time_ = scene->eevee.motion_blur_shutter; + + data_.depth_scale = scene->eevee.motion_blur_depth_scale; + motion_blur_fx_enabled_ = true; /* TODO(fclem): UI option. */ + + /* Viewport stops here. We only do Post-FX motion blur. */ + if (inst_.is_viewport()) { + enabled_ = false; + return; + } + + /* Without this there is the possibility of the curve table not being allocated. */ + BKE_curvemapping_changed((struct CurveMapping *)&scene->r.mblur_shutter_curve, false); + + Vector<float> cdf(CM_TABLE); + Sampling::cdf_from_curvemapping(scene->r.mblur_shutter_curve, cdf); + Sampling::cdf_invert(cdf, time_steps_); + + for (float &time : time_steps_) { + time = this->shutter_time_to_scene_time(time); + } + + step_id_ = 1; + + if (motion_blur_fx_enabled_) { + /* A bit weird but we have to sync the first 2 steps here because the step() + * function is only called after rendering a sample. */ + inst_.velocity.step_sync(STEP_PREVIOUS, time_steps_[0]); + inst_.velocity.step_sync(STEP_NEXT, time_steps_[2]); + } + inst_.set_time(time_steps_[1]); +} + +/* Runs after rendering a sample. */ +void MotionBlurModule::step() +{ + if (!enabled_) { + return; + } + + if (inst_.sampling.finished()) { + /* Restore original frame number. This is because the render pipeline expects it. */ + RE_engine_frame_set(inst_.render, initial_frame_, initial_subframe_); + } + else if (inst_.sampling.do_render_sync()) { + /* Time to change motion step. */ + BLI_assert(time_steps_.size() > step_id_ + 2); + step_id_ += 2; + + if (motion_blur_fx_enabled_) { + inst_.velocity.step_swap(); + inst_.velocity.step_sync(eVelocityStep::STEP_NEXT, time_steps_[step_id_ + 1]); + } + inst_.set_time(time_steps_[step_id_]); + } +} + +float MotionBlurModule::shutter_time_to_scene_time(float time) +{ + switch (shutter_position_) { + case SCE_EEVEE_MB_START: + /* No offset. */ + break; + case SCE_EEVEE_MB_CENTER: + time -= 0.5f; + break; + case SCE_EEVEE_MB_END: + time -= 1.0; + break; + default: + BLI_assert(!"Invalid motion blur position enum!"); + break; + } + time *= shutter_time_; + time += frame_time_; + return time; +} + +void MotionBlurModule::sync() +{ + /* Disable motion blur in viewport when changing camera projection type. + * Avoids really high velocities. */ + if (inst_.velocity.camera_changed_projection()) { + motion_blur_fx_enabled_ = false; + } + + if (!motion_blur_fx_enabled_) { + return; + } + + eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT; + RenderBuffers &render_buffers = inst_.render_buffers; + + motion_blur_ps_.init(); + inst_.velocity.bind_resources(&motion_blur_ps_); + inst_.sampling.bind_resources(&motion_blur_ps_); + { + /* Create max velocity tiles. */ + PassSimple::Sub &sub = motion_blur_ps_.sub("TilesFlatten"); + eShaderType shader = (inst_.is_viewport()) ? MOTION_BLUR_TILE_FLATTEN_VIEWPORT : + MOTION_BLUR_TILE_FLATTEN_RENDER; + sub.shader_set(inst_.shaders.static_shader_get(shader)); + sub.bind_ubo("motion_blur_buf", data_); + sub.bind_texture("depth_tx", &render_buffers.depth_tx); + sub.bind_image("velocity_img", &render_buffers.vector_tx); + sub.bind_image("out_tiles_img", &tiles_tx_); + sub.dispatch(&dispatch_flatten_size_); + sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH); + } + { + /* Expand max velocity tiles by spreading them in their neighborhood. */ + PassSimple::Sub &sub = motion_blur_ps_.sub("TilesDilate"); + sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_TILE_DILATE)); + sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_); + sub.bind_image("in_tiles_img", &tiles_tx_); + sub.dispatch(&dispatch_dilate_size_); + sub.barrier(GPU_BARRIER_SHADER_STORAGE); + } + { + /* Do the motion blur gather algorithm. */ + PassSimple::Sub &sub = motion_blur_ps_.sub("ConvolveGather"); + sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_GATHER)); + sub.bind_ubo("motion_blur_buf", data_); + sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_); + sub.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter); + sub.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter); + sub.bind_texture("in_color_tx", &input_color_tx_, no_filter); + sub.bind_image("in_tiles_img", &tiles_tx_); + sub.bind_image("out_color_img", &output_color_tx_); + + sub.dispatch(&dispatch_gather_size_); + sub.barrier(GPU_BARRIER_TEXTURE_FETCH); + } +} + +void MotionBlurModule::render(View &view, GPUTexture **input_tx, GPUTexture **output_tx) +{ + if (!motion_blur_fx_enabled_) { + return; + } + + const Texture &depth_tx = inst_.render_buffers.depth_tx; + + int2 extent = {depth_tx.width(), depth_tx.height()}; + int2 tiles_extent = math::divide_ceil(extent, int2(MOTION_BLUR_TILE_SIZE)); + + if (inst_.is_viewport()) { + float frame_delta = fabsf(inst_.velocity.step_time_delta_get(STEP_PREVIOUS, STEP_CURRENT)); + /* Avoid highly disturbing blurs, during navigation with high shutter time. */ + if (frame_delta > 0.0f && !DRW_state_is_navigating()) { + /* Rescale motion blur intensity to be shutter time relative and avoid long streak when we + * have frame skipping. Always try to stick to what the render frame would look like. */ + data_.motion_scale = float2(shutter_time_ / frame_delta); + } + else { + /* There is no time change. Motion only comes from viewport navigation and object transform. + * Apply motion blur as smoothing and only blur towards last frame. */ + data_.motion_scale = float2(1.0f, 0.0f); + + if (was_navigating_ != DRW_state_is_navigating()) { + /* Special case for navigation events that only last for one frame (for instance mouse + * scroll for zooming). For this case we have to wait for the next frame before enabling + * the navigation motion blur. */ + was_navigating_ = DRW_state_is_navigating(); + return; + } + } + was_navigating_ = DRW_state_is_navigating(); + + /* Change texture swizzling to avoid complexity in gather pass shader. */ + GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgrg"); + } + else { + data_.motion_scale = float2(1.0f); + } + /* Second motion vector is stored inverted. */ + data_.motion_scale.y = -data_.motion_scale.y; + data_.target_size_inv = 1.0f / float2(extent); + data_.push_update(); + + input_color_tx_ = *input_tx; + output_color_tx_ = *output_tx; + + dispatch_flatten_size_ = int3(tiles_extent, 1); + dispatch_dilate_size_ = int3(math::divide_ceil(tiles_extent, int2(MOTION_BLUR_GROUP_SIZE)), 1); + dispatch_gather_size_ = int3(math::divide_ceil(extent, int2(MOTION_BLUR_GROUP_SIZE)), 1); + + DRW_stats_group_start("Motion Blur"); + + tiles_tx_.acquire(tiles_extent, GPU_RGBA16F); + + GPU_storagebuf_clear_to_zero(tile_indirection_buf_); + + inst_.manager->submit(motion_blur_ps_, view); + + tiles_tx_.release(); + + DRW_stats_group_end(); + + if (inst_.is_viewport()) { + /* Reset swizzle since this texture might be reused in other places. */ + GPU_texture_swizzle_set(inst_.render_buffers.vector_tx, "rgba"); + } + + /* Swap buffers so that next effect has the right input. */ + *input_tx = output_color_tx_; + *output_tx = input_color_tx_; +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh new file mode 100644 index 00000000000..056c2e323d5 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_motion_blur.hh @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Motion blur is done by accumulating scene samples over shutter time. + * Since the number of step is discrete, quite low, and not per pixel randomized, + * we couple this with a post processing motion blur. + * + * The post-fx motion blur is done in two directions, from the previous step and to the next. + * + * For a scene with 3 motion steps, a flat shutter curve and shutter time of 2 frame + * centered on frame we have: + * + * |--------------------|--------------------| + * -1 0 1 Frames + * + * |-------------|-------------|-------------| + * 1 2 3 Motion steps + * + * |------|------|------|------|------|------| + * 0 1 2 4 5 6 7 Time Steps + * + * |-------------| One motion step blurs this range. + * -1 | +1 Objects and geometry steps are recorded here. + * 0 Scene is rendered here. + * + * Since motion step N and N+1 share one time step we reuse it to avoid an extra scene evaluation. + * + * Note that we have to evaluate -1 and +1 time steps before rendering so eval order is -1, +1, 0. + * This is because all GPUBatches from the DRWCache are being free when changing a frame. + * + * For viewport, we only have the current and previous step data to work with. So we center the + * blur on the current frame and extrapolate the motion. + * + * The Post-FX motion blur is based on: + * "A Fast and Stable Feature-Aware Motion Blur Filter" + * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai + */ + +#pragma once + +#include "BLI_map.hh" +#include "DEG_depsgraph_query.h" + +#include "eevee_sampling.hh" +#include "eevee_shader_shared.hh" +#include "eevee_velocity.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name MotionBlur + * + * \{ */ + +/** + * Manages time-steps evaluations and accumulation Motion blur. + * Also handles Post process motion blur. + */ +class MotionBlurModule { + private: + Instance &inst_; + + /** + * Array containing all steps (in scene time) we need to evaluate (not render). + * Only odd steps are rendered. The even ones are evaluated for fx motion blur. + */ + Vector<float> time_steps_; + + /** Copy of input frame and sub-frame to restore after render. */ + int initial_frame_; + float initial_subframe_; + /** Time of the frame we are rendering. */ + float frame_time_; + /** Enum controlling when the shutter opens. See SceneEEVEE.motion_blur_position. */ + int shutter_position_; + /** Time in scene frame the shutter is open. Controls the amount of blur. */ + float shutter_time_; + + /** True if motion blur is enabled as a module. */ + bool enabled_ = false; + /** True if motion blur post-fx is enabled. */ + float motion_blur_fx_enabled_ = false; + /** True if last viewport redraw state was already in navigation state. */ + bool was_navigating_ = false; + + int step_id_ = 0; + + /** Velocity tiles used to guide and speedup the gather pass. */ + TextureFromPool tiles_tx_; + + GPUTexture *input_color_tx_ = nullptr; + GPUTexture *output_color_tx_ = nullptr; + + PassSimple motion_blur_ps_ = {"MotionBlur"}; + + MotionBlurTileIndirectionBuf tile_indirection_buf_; + MotionBlurDataBuf data_; + /** Dispatch size for full-screen passes. */ + int3 dispatch_flatten_size_ = int3(0); + int3 dispatch_dilate_size_ = int3(0); + int3 dispatch_gather_size_ = int3(0); + + public: + MotionBlurModule(Instance &inst) : inst_(inst){}; + ~MotionBlurModule(){}; + + void init(); + + void step(); + + void sync(); + + bool postfx_enabled() const + { + return motion_blur_fx_enabled_; + } + + void render(View &view, GPUTexture **input_tx, GPUTexture **output_tx); + + private: + float shutter_time_to_scene_time(float time); +}; + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc index 33853eba06c..33978518ffc 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc @@ -24,21 +24,36 @@ namespace blender::eevee { void WorldPipeline::sync(GPUMaterial *gpumat) { - DRWState state = DRW_STATE_WRITE_COLOR; - world_ps_ = DRW_pass_create("World", state); - - /* Push a matrix at the same location as the camera. */ - float4x4 camera_mat = float4x4::identity(); - // copy_v3_v3(camera_mat[3], inst_.camera.data_get().viewinv[3]); - - DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, world_ps_); - DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx); - DRW_shgroup_call_obmat(grp, DRW_cache_fullscreen_quad_get(), camera_mat.ptr()); + Manager &manager = *inst_.manager; + RenderBuffers &rbufs = inst_.render_buffers; + + ResourceHandle handle = manager.resource_handle(float4x4::identity().ptr()); + + world_ps_.init(); + world_ps_.state_set(DRW_STATE_WRITE_COLOR); + world_ps_.material_set(manager, gpumat); + world_ps_.push_constant("world_opacity_fade", inst_.film.background_opacity_get()); + world_ps_.bind_texture("utility_tx", inst_.pipelines.utility_tx); + /* AOVs. */ + world_ps_.bind_image("aov_color_img", &rbufs.aov_color_tx); + world_ps_.bind_image("aov_value_img", &rbufs.aov_value_tx); + world_ps_.bind_ssbo("aov_buf", &inst_.film.aovs_info); + /* RenderPasses. Cleared by background (even if bad practice). */ + world_ps_.bind_image("rp_normal_img", &rbufs.normal_tx); + world_ps_.bind_image("rp_light_img", &rbufs.light_tx); + world_ps_.bind_image("rp_diffuse_color_img", &rbufs.diffuse_color_tx); + world_ps_.bind_image("rp_specular_color_img", &rbufs.specular_color_tx); + world_ps_.bind_image("rp_emission_img", &rbufs.emission_tx); + world_ps_.bind_image("rp_cryptomatte_img", &rbufs.cryptomatte_tx); + + world_ps_.draw(DRW_cache_fullscreen_quad_get(), handle); + /* To allow opaque pass rendering over it. */ + world_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); } -void WorldPipeline::render() +void WorldPipeline::render(View &view) { - DRW_draw_pass(world_ps_); + inst_.manager->submit(world_ps_, view); } /** \} */ @@ -51,182 +66,167 @@ void WorldPipeline::render() void ForwardPipeline::sync() { + camera_forward_ = inst_.camera.forward(); + + DRWState state_depth_only = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS; + DRWState state_depth_color = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS | + DRW_STATE_WRITE_COLOR; { - DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS; - prepass_ps_ = DRW_pass_create("Forward.Opaque.Prepass", state); - prepass_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity", - state | DRW_STATE_WRITE_COLOR); + prepass_ps_.init(); - state |= DRW_STATE_CULL_BACK; - prepass_culled_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Culled", state); - prepass_culled_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity", - state | DRW_STATE_WRITE_COLOR); + { + /* Common resources. */ - DRW_pass_link(prepass_ps_, prepass_velocity_ps_); - DRW_pass_link(prepass_velocity_ps_, prepass_culled_ps_); - DRW_pass_link(prepass_culled_ps_, prepass_culled_velocity_ps_); - } - { - DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL; - opaque_ps_ = DRW_pass_create("Forward.Opaque", state); + /* Textures. */ + prepass_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx); - state |= DRW_STATE_CULL_BACK; - opaque_culled_ps_ = DRW_pass_create("Forward.Opaque.Culled", state); + inst_.velocity.bind_resources(&prepass_ps_); + inst_.sampling.bind_resources(&prepass_ps_); + } + + prepass_double_sided_static_ps_ = &prepass_ps_.sub("DoubleSided.Static"); + prepass_double_sided_static_ps_->state_set(state_depth_only); + + prepass_single_sided_static_ps_ = &prepass_ps_.sub("SingleSided.Static"); + prepass_single_sided_static_ps_->state_set(state_depth_only | DRW_STATE_CULL_BACK); - DRW_pass_link(opaque_ps_, opaque_culled_ps_); + prepass_double_sided_moving_ps_ = &prepass_ps_.sub("DoubleSided.Moving"); + prepass_double_sided_moving_ps_->state_set(state_depth_color); + + prepass_single_sided_moving_ps_ = &prepass_ps_.sub("SingleSided.Moving"); + prepass_single_sided_moving_ps_->state_set(state_depth_color | DRW_STATE_CULL_BACK); } { - DRWState state = DRW_STATE_DEPTH_LESS_EQUAL; - transparent_ps_ = DRW_pass_create("Forward.Transparent", state); + opaque_ps_.init(); + + { + /* Common resources. */ + + /* RenderPasses. */ + opaque_ps_.bind_image(RBUFS_NORMAL_SLOT, &inst_.render_buffers.normal_tx); + opaque_ps_.bind_image(RBUFS_LIGHT_SLOT, &inst_.render_buffers.light_tx); + opaque_ps_.bind_image(RBUFS_DIFF_COLOR_SLOT, &inst_.render_buffers.diffuse_color_tx); + opaque_ps_.bind_image(RBUFS_SPEC_COLOR_SLOT, &inst_.render_buffers.specular_color_tx); + opaque_ps_.bind_image(RBUFS_EMISSION_SLOT, &inst_.render_buffers.emission_tx); + /* AOVs. */ + opaque_ps_.bind_image(RBUFS_AOV_COLOR_SLOT, &inst_.render_buffers.aov_color_tx); + opaque_ps_.bind_image(RBUFS_AOV_VALUE_SLOT, &inst_.render_buffers.aov_value_tx); + /* Cryptomatte. */ + opaque_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx); + /* Storage Buf. */ + opaque_ps_.bind_ssbo(RBUFS_AOV_BUF_SLOT, &inst_.film.aovs_info); + /* Textures. */ + opaque_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx); + + inst_.lights.bind_resources(&opaque_ps_); + inst_.sampling.bind_resources(&opaque_ps_); + inst_.cryptomatte.bind_resources(&opaque_ps_); + } + + opaque_single_sided_ps_ = &opaque_ps_.sub("SingleSided"); + opaque_single_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | + DRW_STATE_CULL_BACK); + + opaque_double_sided_ps_ = &opaque_ps_.sub("DoubleSided"); + opaque_double_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL); } -} + { + transparent_ps_.init(); + /* Workaround limitation of PassSortable. Use dummy pass that will be sorted first in all + * circumstances. */ + PassMain::Sub &sub = transparent_ps_.sub("ResourceBind", -FLT_MAX); -DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat) -{ - DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_; - // LightModule &lights = inst_.lights; - // LightProbeModule &lightprobes = inst_.lightprobes; - // RaytracingModule &raytracing = inst_.raytracing; - // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; - DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass); - // lights.shgroup_resources(grp); - // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get()); - // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get()); - // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get()); - // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get()); - // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); - // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx); - /* TODO(fclem): Make this only needed if material uses it ... somehow. */ - // if (true) { - // DRW_shgroup_uniform_texture_ref( - // grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get()); - // } - // if (raytracing.enabled()) { - // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data); - // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data); - // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data); - // DRW_shgroup_uniform_texture_ref_ex(grp, "radiance_tx", &input_screen_radiance_tx_, - // no_interp); - // } - // if (raytracing.enabled()) { - // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get()); - // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get()); - // } - return grp; -} + /* Common resources. */ -DRWShadingGroup *ForwardPipeline::prepass_opaque_add(::Material *blender_mat, - GPUMaterial *gpumat, - bool has_motion) -{ - DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? - (has_motion ? prepass_culled_velocity_ps_ : prepass_culled_ps_) : - (has_motion ? prepass_velocity_ps_ : prepass_ps_); - DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass); - if (has_motion) { - inst_.velocity.bind_resources(grp); + /* Textures. */ + sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx); + + inst_.lights.bind_resources(&sub); + inst_.sampling.bind_resources(&sub); } - return grp; } -DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_mat, - GPUMaterial *gpumat) +PassMain::Sub *ForwardPipeline::prepass_opaque_add(::Material *blender_mat, + GPUMaterial *gpumat, + bool has_motion) { - // LightModule &lights = inst_.lights; - // LightProbeModule &lightprobes = inst_.lightprobes; - // RaytracingModule &raytracing = inst_.raytracing; - // eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT; - DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_); - // lights.shgroup_resources(grp); - // DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get()); - // DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get()); - // DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get()); - // DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get()); - // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get()); - // DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get()); - // DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx); - /* TODO(fclem): Make this only needed if material uses it ... somehow. */ - // if (true) { - // DRW_shgroup_uniform_texture_ref( - // grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get()); - // } - // if (raytracing.enabled()) { - // DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data); - // DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data); - // DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data); - // DRW_shgroup_uniform_texture_ref_ex( - // grp, "rt_radiance_tx", &input_screen_radiance_tx_, no_interp); - // } - // if (raytracing.enabled()) { - // DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get()); - // DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get()); - // } + PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? + (has_motion ? prepass_single_sided_moving_ps_ : + prepass_single_sided_static_ps_) : + (has_motion ? prepass_double_sided_moving_ps_ : + prepass_double_sided_static_ps_); + return &pass->sub(GPU_material_get_name(gpumat)); +} - DRWState state_disable = DRW_STATE_WRITE_DEPTH; - DRWState state_enable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM; - if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) { - state_enable |= DRW_STATE_CULL_BACK; - } - DRW_shgroup_state_disable(grp, state_disable); - DRW_shgroup_state_enable(grp, state_enable); - return grp; +PassMain::Sub *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat) +{ + PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_single_sided_ps_ : + opaque_double_sided_ps_; + return &pass->sub(GPU_material_get_name(gpumat)); } -DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_mat, - GPUMaterial *gpumat) +PassMain::Sub *ForwardPipeline::prepass_transparent_add(const Object *ob, + ::Material *blender_mat, + GPUMaterial *gpumat) { if ((blender_mat->blend_flag & MA_BL_HIDE_BACKFACE) == 0) { return nullptr; } + DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS_EQUAL; + if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) { + state |= DRW_STATE_CULL_BACK; + } + float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_); + PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value); + pass->state_set(state); + pass->material_set(*inst_.manager, gpumat); + return pass; +} - DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_); - - DRWState state_disable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM; - DRWState state_enable = DRW_STATE_WRITE_DEPTH; - if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) { - state_enable |= DRW_STATE_CULL_BACK; +PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob, + ::Material *blender_mat, + GPUMaterial *gpumat) +{ + DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL; + if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) { + state |= DRW_STATE_CULL_BACK; } - DRW_shgroup_state_disable(grp, state_disable); - DRW_shgroup_state_enable(grp, state_enable); - return grp; + float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_); + PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value); + pass->state_set(state); + pass->material_set(*inst_.manager, gpumat); + return pass; } -void ForwardPipeline::render(const DRWView *view, +void ForwardPipeline::render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb, - GPUTexture *depth_tx, GPUTexture *UNUSED(combined_tx)) { - UNUSED_VARS(view, depth_tx, prepass_fb, combined_fb); - // HiZBuffer &hiz = inst_.hiz_front; + UNUSED_VARS(view); - DRW_stats_group_start("ForwardOpaque"); + DRW_stats_group_start("Forward.Opaque"); GPU_framebuffer_bind(prepass_fb); - DRW_draw_pass(prepass_ps_); + inst_.manager->submit(prepass_ps_, view); - // hiz.set_dirty(); + // if (!DRW_pass_is_empty(prepass_ps_)) { + inst_.hiz_buffer.set_dirty(); + // } // if (inst_.raytracing.enabled()) { // rt_buffer.radiance_copy(combined_tx); - // hiz.update(depth_tx); + // inst_.hiz_buffer.update(); // } // inst_.shadows.set_view(view, depth_tx); GPU_framebuffer_bind(combined_fb); - DRW_draw_pass(opaque_ps_); + inst_.manager->submit(opaque_ps_, view); DRW_stats_group_end(); - DRW_stats_group_start("ForwardTransparent"); - /* TODO(fclem) This is suboptimal. We could sort during sync. */ - /* FIXME(fclem) This wont work for panoramic, where we need - * to sort by distance to camera, not by z. */ - DRW_pass_sort_shgroup_z(transparent_ps_); - DRW_draw_pass(transparent_ps_); - DRW_stats_group_end(); + inst_.manager->submit(transparent_ps_, view); // if (inst_.raytracing.enabled()) { // gbuffer.ray_radiance_tx.release(); diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh index 3bdc718767b..0614a963dec 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh @@ -13,6 +13,7 @@ #pragma once #include "DRW_render.h" +#include "draw_shader_shared.h" /* TODO(fclem): Move it to GPU/DRAW. */ #include "../eevee/eevee_lut.h" @@ -31,13 +32,13 @@ class WorldPipeline { private: Instance &inst_; - DRWPass *world_ps_ = nullptr; + PassSimple world_ps_ = {"World.Background"}; public: WorldPipeline(Instance &inst) : inst_(inst){}; void sync(GPUMaterial *gpumat); - void render(); + void render(View &view); }; /** \} */ @@ -52,13 +53,18 @@ class ForwardPipeline { private: Instance &inst_; - DRWPass *prepass_ps_ = nullptr; - DRWPass *prepass_velocity_ps_ = nullptr; - DRWPass *prepass_culled_ps_ = nullptr; - DRWPass *prepass_culled_velocity_ps_ = nullptr; - DRWPass *opaque_ps_ = nullptr; - DRWPass *opaque_culled_ps_ = nullptr; - DRWPass *transparent_ps_ = nullptr; + PassMain prepass_ps_ = {"Prepass"}; + PassMain::Sub *prepass_single_sided_static_ps_ = nullptr; + PassMain::Sub *prepass_single_sided_moving_ps_ = nullptr; + PassMain::Sub *prepass_double_sided_static_ps_ = nullptr; + PassMain::Sub *prepass_double_sided_moving_ps_ = nullptr; + + PassMain opaque_ps_ = {"Shading"}; + PassMain::Sub *opaque_single_sided_ps_ = nullptr; + PassMain::Sub *opaque_double_sided_ps_ = nullptr; + + PassSortable transparent_ps_ = {"Forward.Transparent"}; + float3 camera_forward_; // GPUTexture *input_screen_radiance_tx_ = nullptr; @@ -67,31 +73,19 @@ class ForwardPipeline { void sync(); - DRWShadingGroup *material_add(::Material *blender_mat, GPUMaterial *gpumat) - { - return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ? - material_transparent_add(blender_mat, gpumat) : - material_opaque_add(blender_mat, gpumat); - } + PassMain::Sub *prepass_opaque_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion); + PassMain::Sub *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat); - DRWShadingGroup *prepass_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion) - { - return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ? - prepass_transparent_add(blender_mat, gpumat) : - prepass_opaque_add(blender_mat, gpumat, has_motion); - } - - DRWShadingGroup *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat); - DRWShadingGroup *prepass_opaque_add(::Material *blender_mat, - GPUMaterial *gpumat, - bool has_motion); - DRWShadingGroup *material_transparent_add(::Material *blender_mat, GPUMaterial *gpumat); - DRWShadingGroup *prepass_transparent_add(::Material *blender_mat, GPUMaterial *gpumat); + PassMain::Sub *prepass_transparent_add(const Object *ob, + ::Material *blender_mat, + GPUMaterial *gpumat); + PassMain::Sub *material_transparent_add(const Object *ob, + ::Material *blender_mat, + GPUMaterial *gpumat); - void render(const DRWView *view, + void render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb, - GPUTexture *depth_tx, GPUTexture *combined_tx); }; @@ -193,26 +187,36 @@ class PipelineModule { // velocity.sync(); } - DRWShadingGroup *material_add(::Material *blender_mat, - GPUMaterial *gpumat, - eMaterialPipeline pipeline_type) + PassMain::Sub *material_add(Object *ob, + ::Material *blender_mat, + GPUMaterial *gpumat, + eMaterialPipeline pipeline_type) { switch (pipeline_type) { case MAT_PIPE_DEFERRED_PREPASS: // return deferred.prepass_add(blender_mat, gpumat, false); - break; + case MAT_PIPE_FORWARD_PREPASS: + if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) { + return forward.prepass_transparent_add(ob, blender_mat, gpumat); + } + return forward.prepass_opaque_add(blender_mat, gpumat, false); + case MAT_PIPE_DEFERRED_PREPASS_VELOCITY: // return deferred.prepass_add(blender_mat, gpumat, true); - break; - case MAT_PIPE_FORWARD_PREPASS: - return forward.prepass_add(blender_mat, gpumat, false); case MAT_PIPE_FORWARD_PREPASS_VELOCITY: - return forward.prepass_add(blender_mat, gpumat, true); + if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) { + return forward.prepass_transparent_add(ob, blender_mat, gpumat); + } + return forward.prepass_opaque_add(blender_mat, gpumat, true); + case MAT_PIPE_DEFERRED: // return deferred.material_add(blender_mat, gpumat); - break; case MAT_PIPE_FORWARD: - return forward.material_add(blender_mat, gpumat); + if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) { + return forward.material_transparent_add(ob, blender_mat, gpumat); + } + return forward.material_opaque_add(blender_mat, gpumat); + case MAT_PIPE_VOLUME: /* TODO(fclem) volume pass. */ return nullptr; diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc new file mode 100644 index 00000000000..8e36e1d071c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * A film is a fullscreen buffer (usually at output extent) + * that will be able to accumulate sample in any distorted camera_type + * using a pixel filter. + * + * Input needs to be jittered so that the filter converges to the right result. + */ + +#include "BLI_rect.h" + +#include "GPU_framebuffer.h" +#include "GPU_texture.h" + +#include "DRW_render.h" + +#include "eevee_film.hh" +#include "eevee_instance.hh" + +namespace blender::eevee { + +void RenderBuffers::acquire(int2 extent) +{ + const eViewLayerEEVEEPassType enabled_passes = inst_.film.enabled_passes_get(); + + auto pass_extent = [&](eViewLayerEEVEEPassType pass_bit) -> int2 { + /* Use dummy texture for disabled passes. Allows correct bindings. */ + return (enabled_passes & pass_bit) ? extent : int2(1); + }; + + eGPUTextureFormat color_format = GPU_RGBA16F; + eGPUTextureFormat float_format = GPU_R16F; + + /* Depth and combined are always needed. */ + depth_tx.acquire(extent, GPU_DEPTH24_STENCIL8); + combined_tx.acquire(extent, color_format); + + bool do_vector_render_pass = (enabled_passes & EEVEE_RENDER_PASS_VECTOR) || + (inst_.motion_blur.postfx_enabled() && !inst_.is_viewport()); + uint32_t max_light_color_layer = max_ii(enabled_passes & EEVEE_RENDER_PASS_DIFFUSE_LIGHT ? + (int)RENDER_PASS_LAYER_DIFFUSE_LIGHT : + -1, + enabled_passes & EEVEE_RENDER_PASS_SPECULAR_LIGHT ? + (int)RENDER_PASS_LAYER_SPECULAR_LIGHT : + -1) + + 1; + /* Only RG16F when only doing only reprojection or motion blur. */ + eGPUTextureFormat vector_format = do_vector_render_pass ? GPU_RGBA16F : GPU_RG16F; + /* TODO(fclem): Make vector pass allocation optional if no TAA or motion blur is needed. */ + vector_tx.acquire(extent, vector_format); + + normal_tx.acquire(pass_extent(EEVEE_RENDER_PASS_NORMAL), color_format); + diffuse_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_DIFFUSE_COLOR), color_format); + specular_color_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SPECULAR_COLOR), color_format); + volume_light_tx.acquire(pass_extent(EEVEE_RENDER_PASS_VOLUME_LIGHT), color_format); + emission_tx.acquire(pass_extent(EEVEE_RENDER_PASS_EMIT), color_format); + environment_tx.acquire(pass_extent(EEVEE_RENDER_PASS_ENVIRONMENT), color_format); + shadow_tx.acquire(pass_extent(EEVEE_RENDER_PASS_SHADOW), float_format); + ambient_occlusion_tx.acquire(pass_extent(EEVEE_RENDER_PASS_AO), float_format); + + light_tx.ensure_2d_array(color_format, + max_light_color_layer > 0 ? extent : int2(1), + max_ii(1, max_light_color_layer)); + + const AOVsInfoData &aovs = inst_.film.aovs_info; + aov_color_tx.ensure_2d_array( + color_format, (aovs.color_len > 0) ? extent : int2(1), max_ii(1, aovs.color_len)); + aov_value_tx.ensure_2d_array( + float_format, (aovs.value_len > 0) ? extent : int2(1), max_ii(1, aovs.value_len)); + + eGPUTextureFormat cryptomatte_format = GPU_R32F; + const int cryptomatte_layer_len = inst_.film.cryptomatte_layer_max_get(); + if (cryptomatte_layer_len == 2) { + cryptomatte_format = GPU_RG32F; + } + else if (cryptomatte_layer_len == 3) { + cryptomatte_format = GPU_RGBA32F; + } + cryptomatte_tx.acquire( + pass_extent(static_cast<eViewLayerEEVEEPassType>(EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT | + EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET | + EEVEE_RENDER_PASS_CRYPTOMATTE_MATERIAL)), + cryptomatte_format); +} + +void RenderBuffers::release() +{ + depth_tx.release(); + combined_tx.release(); + + normal_tx.release(); + vector_tx.release(); + diffuse_color_tx.release(); + specular_color_tx.release(); + volume_light_tx.release(); + emission_tx.release(); + environment_tx.release(); + shadow_tx.release(); + ambient_occlusion_tx.release(); + cryptomatte_tx.release(); +} + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh new file mode 100644 index 00000000000..ae5d7fbae5c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.hh @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Render buffers are textures that are filled during a view rendering. + * Their content is then added to the accumulation buffers of the film class. + * They are short lived and can be reused when doing multi view rendering. + */ + +#pragma once + +#include "DRW_render.h" + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +class RenderBuffers { + public: + TextureFromPool depth_tx; + TextureFromPool combined_tx; + + // TextureFromPool mist_tx; /* Derived from depth_tx during accumulation. */ + TextureFromPool normal_tx; + TextureFromPool vector_tx; + TextureFromPool diffuse_color_tx; + TextureFromPool specular_color_tx; + TextureFromPool volume_light_tx; + TextureFromPool emission_tx; + TextureFromPool environment_tx; + TextureFromPool shadow_tx; + TextureFromPool ambient_occlusion_tx; + TextureFromPool cryptomatte_tx; + /* TODO(fclem): Use texture from pool once they support texture array. */ + Texture light_tx; + Texture aov_color_tx; + Texture aov_value_tx; + + private: + Instance &inst_; + + public: + RenderBuffers(Instance &inst) : inst_(inst){}; + + /* Acquires (also ensures) the render buffer before rendering to them. */ + void acquire(int2 extent); + void release(); +}; + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.cc b/source/blender/draw/engines/eevee_next/eevee_sampling.cc new file mode 100644 index 00000000000..76a0e98638b --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_sampling.cc @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Random number generator, contains persistent state and sample count logic. + */ + +#include "BLI_rand.h" + +#include "eevee_instance.hh" +#include "eevee_sampling.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name Sampling + * \{ */ + +void Sampling::init(const Scene *scene) +{ + sample_count_ = inst_.is_viewport() ? scene->eevee.taa_samples : scene->eevee.taa_render_samples; + + if (sample_count_ == 0) { + BLI_assert(inst_.is_viewport()); + sample_count_ = infinite_sample_count_; + } + + motion_blur_steps_ = !inst_.is_viewport() ? scene->eevee.motion_blur_steps : 1; + sample_count_ = divide_ceil_u(sample_count_, motion_blur_steps_); + + if (scene->eevee.flag & SCE_EEVEE_DOF_JITTER) { + if (sample_count_ == infinite_sample_count_) { + /* Special case for viewport continuous rendering. We clamp to a max sample + * to avoid the jittered dof never converging. */ + dof_ring_count_ = 6; + } + else { + dof_ring_count_ = sampling_web_ring_count_get(dof_web_density_, sample_count_); + } + dof_sample_count_ = sampling_web_sample_count_get(dof_web_density_, dof_ring_count_); + /* Change total sample count to fill the web pattern entirely. */ + sample_count_ = divide_ceil_u(sample_count_, dof_sample_count_) * dof_sample_count_; + } + else { + dof_ring_count_ = 0; + dof_sample_count_ = 1; + } + + /* Only multiply after to have full the full DoF web pattern for each time steps. */ + sample_count_ *= motion_blur_steps_; +} + +void Sampling::end_sync() +{ + if (reset_) { + viewport_sample_ = 0; + } + + if (inst_.is_viewport()) { + + interactive_mode_ = viewport_sample_ < interactive_mode_threshold; + + bool interactive_mode_disabled = (inst_.scene->eevee.flag & SCE_EEVEE_TAA_REPROJECTION) == 0; + if (interactive_mode_disabled) { + interactive_mode_ = false; + sample_ = viewport_sample_; + } + else if (interactive_mode_) { + int interactive_sample_count = min_ii(interactive_sample_max_, sample_count_); + + if (viewport_sample_ < interactive_sample_count) { + /* Loop over the same starting samples. */ + sample_ = sample_ % interactive_sample_count; + } + else { + /* Break out of the loop and resume normal pattern. */ + sample_ = interactive_sample_count; + } + } + } +} + +void Sampling::step() +{ + { + /* TODO(fclem) we could use some persistent states to speedup the computation. */ + double2 r, offset = {0, 0}; + /* Using 2,3 primes as per UE4 Temporal AA presentation. + * http://advances.realtimerendering.com/s2014/epic/TemporalAA.pptx (slide 14) */ + uint2 primes = {2, 3}; + BLI_halton_2d(primes, offset, sample_ + 1, r); + /* WORKAROUND: We offset the distribution to make the first sample (0,0). This way, we are + * assured that at least one of the samples inside the TAA rotation will match the one from the + * draw manager. This makes sure overlays are correctly composited in static scene. */ + data_.dimensions[SAMPLING_FILTER_U] = fractf(r[0] + (1.0 / 2.0)); + data_.dimensions[SAMPLING_FILTER_V] = fractf(r[1] + (2.0 / 3.0)); + /* TODO de-correlate. */ + data_.dimensions[SAMPLING_TIME] = r[0]; + data_.dimensions[SAMPLING_CLOSURE] = r[1]; + data_.dimensions[SAMPLING_RAYTRACE_X] = r[0]; + } + { + double2 r, offset = {0, 0}; + uint2 primes = {5, 7}; + BLI_halton_2d(primes, offset, sample_ + 1, r); + data_.dimensions[SAMPLING_LENS_U] = r[0]; + data_.dimensions[SAMPLING_LENS_V] = r[1]; + /* TODO de-correlate. */ + data_.dimensions[SAMPLING_LIGHTPROBE] = r[0]; + data_.dimensions[SAMPLING_TRANSPARENCY] = r[1]; + } + { + /* Using leaped Halton sequence so we can reused the same primes as lens. */ + double3 r, offset = {0, 0, 0}; + uint64_t leap = 11; + uint3 primes = {5, 4, 7}; + BLI_halton_3d(primes, offset, sample_ * leap, r); + data_.dimensions[SAMPLING_SHADOW_U] = r[0]; + data_.dimensions[SAMPLING_SHADOW_V] = r[1]; + data_.dimensions[SAMPLING_SHADOW_W] = r[2]; + /* TODO de-correlate. */ + data_.dimensions[SAMPLING_RAYTRACE_U] = r[0]; + data_.dimensions[SAMPLING_RAYTRACE_V] = r[1]; + data_.dimensions[SAMPLING_RAYTRACE_W] = r[2]; + } + { + /* Using leaped Halton sequence so we can reused the same primes. */ + double2 r, offset = {0, 0}; + uint64_t leap = 5; + uint2 primes = {2, 3}; + BLI_halton_2d(primes, offset, sample_ * leap, r); + data_.dimensions[SAMPLING_SHADOW_X] = r[0]; + data_.dimensions[SAMPLING_SHADOW_Y] = r[1]; + /* TODO de-correlate. */ + data_.dimensions[SAMPLING_SSS_U] = r[0]; + data_.dimensions[SAMPLING_SSS_V] = r[1]; + } + + data_.push_update(); + + viewport_sample_++; + sample_++; + + reset_ = false; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Sampling patterns + * \{ */ + +float3 Sampling::sample_ball(const float3 &rand) +{ + float3 sample; + sample.z = rand.x * 2.0f - 1.0f; /* cos theta */ + + float r = sqrtf(fmaxf(0.0f, 1.0f - square_f(sample.z))); /* sin theta */ + + float omega = rand.y * 2.0f * M_PI; + sample.x = r * cosf(omega); + sample.y = r * sinf(omega); + + sample *= sqrtf(sqrtf(rand.z)); + return sample; +} + +float2 Sampling::sample_disk(const float2 &rand) +{ + float omega = rand.y * 2.0f * M_PI; + return sqrtf(rand.x) * float2(cosf(omega), sinf(omega)); +} + +float2 Sampling::sample_spiral(const float2 &rand) +{ + /* Fibonacci spiral. */ + float omega = 4.0f * M_PI * (1.0f + sqrtf(5.0f)) * rand.x; + float r = sqrtf(rand.x); + /* Random rotation. */ + omega += rand.y * 2.0f * M_PI; + return r * float2(cosf(omega), sinf(omega)); +} + +void Sampling::dof_disk_sample_get(float *r_radius, float *r_theta) const +{ + if (dof_ring_count_ == 0) { + *r_radius = *r_theta = 0.0f; + return; + } + + int s = sample_ - 1; + int ring = 0; + int ring_sample_count = 1; + int ring_sample = 1; + + s = s * (dof_web_density_ - 1); + s = s % dof_sample_count_; + + /* Choosing sample to we get faster convergence. + * The issue here is that we cannot map a low discrepancy sequence to this sampling pattern + * because the same sample could be chosen twice in relatively short intervals. */ + /* For now just use an ascending sequence with an offset. This gives us relatively quick + * initial coverage and relatively high distance between samples. */ + /* TODO(@fclem) We can try to order samples based on a LDS into a table to avoid duplicates. + * The drawback would be some memory consumption and initialize time. */ + int samples_passed = 1; + while (s >= samples_passed) { + ring++; + ring_sample_count = ring * dof_web_density_; + ring_sample = s - samples_passed; + ring_sample = (ring_sample + 1) % ring_sample_count; + samples_passed += ring_sample_count; + } + + *r_radius = ring / (float)dof_ring_count_; + *r_theta = 2.0f * M_PI * ring_sample / (float)ring_sample_count; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Cumulative Distribution Function (CDF) + * \{ */ + +/* Creates a discrete cumulative distribution function table from a given curvemapping. + * Output cdf vector is expected to already be sized according to the wanted resolution. */ +void Sampling::cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf) +{ + BLI_assert(cdf.size() > 1); + cdf[0] = 0.0f; + /* Actual CDF evaluation. */ + for (int u : IndexRange(cdf.size() - 1)) { + float x = (float)(u + 1) / (float)(cdf.size() - 1); + cdf[u + 1] = cdf[u] + BKE_curvemapping_evaluateF(&curve, 0, x); + } + /* Normalize the CDF. */ + for (int u : cdf.index_range()) { + cdf[u] /= cdf.last(); + } + /* Just to make sure. */ + cdf.last() = 1.0f; +} + +/* Inverts a cumulative distribution function. + * Output vector is expected to already be sized according to the wanted resolution. */ +void Sampling::cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf) +{ + for (int u : inverted_cdf.index_range()) { + float x = (float)u / (float)(inverted_cdf.size() - 1); + for (int i : cdf.index_range()) { + if (i == cdf.size() - 1) { + inverted_cdf[u] = 1.0f; + } + else if (cdf[i] >= x) { + float t = (x - cdf[i]) / (cdf[i + 1] - cdf[i]); + inverted_cdf[u] = ((float)i + t) / (float)(cdf.size() - 1); + break; + } + } + } +} + +/** \} */ + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.hh b/source/blender/draw/engines/eevee_next/eevee_sampling.hh new file mode 100644 index 00000000000..c2bf23d20fc --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_sampling.hh @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Random number generator, contains persistent state and sample count logic. + */ + +#pragma once + +#include "BKE_colortools.h" +#include "BLI_system.h" +#include "BLI_vector.hh" +#include "DNA_scene_types.h" +#include "DRW_render.h" + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +class Sampling { + private: + Instance &inst_; + + /* Number of samples in the first ring of jittered depth of field. */ + static constexpr uint64_t dof_web_density_ = 6; + /* High number of sample for viewport infinite rendering. */ + static constexpr uint64_t infinite_sample_count_ = 0xFFFFFFu; + /* During interactive rendering, loop over the first few samples. */ + static constexpr uint64_t interactive_sample_max_ = 8; + + /** 0 based current sample. Might not increase sequentially in viewport. */ + uint64_t sample_ = 0; + /** Target sample count. */ + uint64_t sample_count_ = 64; + /** Number of ring in the web pattern of the jittered Depth of Field. */ + uint64_t dof_ring_count_ = 0; + /** Number of samples in the web pattern of the jittered Depth of Field. */ + uint64_t dof_sample_count_ = 1; + /** Motion blur steps. */ + uint64_t motion_blur_steps_ = 1; + /** Increases if the view and the scene is static. Does increase sequentially. */ + int64_t viewport_sample_ = 0; + /** Tag to reset sampling for the next sample. */ + bool reset_ = false; + /** + * Switch between interactive and static accumulation. + * In interactive mode, image stability is prioritized over quality. + */ + bool interactive_mode_ = false; + /** + * Sample count after which we use the static accumulation. + * Interactive sampling from sample 0 to (interactive_mode_threshold - 1). + * Accumulation sampling from sample interactive_mode_threshold to sample_count_. + */ + static constexpr int interactive_mode_threshold = 3; + + SamplingDataBuf data_; + + public: + Sampling(Instance &inst) : inst_(inst){}; + ~Sampling(){}; + + void init(const Scene *scene); + void end_sync(); + void step(); + + /* Viewport Only: Function to call to notify something in the scene changed. + * This will reset accumulation. Do not call after end_sync() or during sample rendering. */ + void reset() + { + reset_ = true; + } + + /* Viewport Only: true if an update happened in the scene and accumulation needs reset. */ + bool is_reset() const + { + return reset_; + } + + void bind_resources(DRWShadingGroup *grp) + { + DRW_shgroup_storage_block_ref(grp, "sampling_buf", &data_); + } + + template<typename T> void bind_resources(draw::detail::PassBase<T> *pass) + { + /* Storage Buf. */ + pass->bind_ssbo(SAMPLING_BUF_SLOT, &data_); + } + + /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */ + float rng_get(eSamplingDimension dimension) const + { + return data_.dimensions[dimension]; + } + + /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */ + float2 rng_2d_get(eSamplingDimension starting_dimension) const + { + return *reinterpret_cast<const float2 *>(&data_.dimensions[starting_dimension]); + } + + /* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */ + float3 rng_3d_get(eSamplingDimension starting_dimension) const + { + return *reinterpret_cast<const float3 *>(&data_.dimensions[starting_dimension]); + } + + /* Returns true if rendering has finished. */ + bool finished() const + { + return (sample_ >= sample_count_); + } + + /* Returns true if viewport smoothing and sampling has finished. */ + bool finished_viewport() const + { + return (viewport_sample_ >= sample_count_) && !interactive_mode_; + } + + /* Returns true if viewport renderer is in interactive mode and should use TAA. */ + bool interactive_mode() const + { + return interactive_mode_; + } + + uint64_t sample_count() const + { + return sample_count_; + } + + /* Return true if we are starting a new motion blur step. We need to run sync again since + * depsgraph was updated by MotionBlur::step(). */ + bool do_render_sync() const + { + return ((sample_ % (sample_count_ / motion_blur_steps_)) == 0); + } + + /** + * Special ball distribution: + * Point are distributed in a way that when they are orthogonally + * projected into any plane, the resulting distribution is (close to) + * a uniform disc distribution. + * \a rand is 3 random float in the [0..1] range. + * Returns point in a ball of radius 1 and centered on the origin. + */ + static float3 sample_ball(const float3 &rand); + + /** + * Uniform disc distribution. + * \a rand is 2 random float in the [0..1] range. + * Returns point in a disk of radius 1 and centered on the origin. + */ + static float2 sample_disk(const float2 &rand); + + /** + * Uniform disc distribution using Fibonacci spiral sampling. + * \a rand is 2 random float in the [0..1] range. + * Returns point in a disk of radius 1 and centered on the origin. + */ + static float2 sample_spiral(const float2 &rand); + + /** + * Special RNG for depth of field. + * Returns \a radius and \a theta angle offset to apply to the web sampling pattern. + */ + void dof_disk_sample_get(float *r_radius, float *r_theta) const; + + /** + * Returns sample count inside the jittered depth of field web pattern. + */ + uint64_t dof_ring_count_get() const + { + return dof_ring_count_; + } + + /** + * Returns sample count inside the jittered depth of field web pattern. + */ + uint64_t dof_sample_count_get() const + { + return dof_sample_count_; + } + + /* Cumulative Distribution Function Utils. */ + static void cdf_from_curvemapping(const CurveMapping &curve, Vector<float> &cdf); + static void cdf_invert(Vector<float> &cdf, Vector<float> &inverted_cdf); +}; + +} // namespace blender::eevee diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index 09aa97e49e9..64b1d4891a9 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -9,6 +9,8 @@ * and static shader usage. */ +#include "GPU_capabilities.h" + #include "gpu_shader_create_info.hh" #include "eevee_shader.hh" @@ -78,8 +80,68 @@ ShaderModule::~ShaderModule() const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type) { switch (shader_type) { - case VELOCITY_RESOLVE: - return "eevee_velocity_resolve"; + case FILM_FRAG: + return "eevee_film_frag"; + case FILM_COMP: + return "eevee_film_comp"; + case FILM_CRYPTOMATTE_POST: + return "eevee_film_cryptomatte_post"; + case HIZ_DEBUG: + return "eevee_hiz_debug"; + case HIZ_UPDATE: + return "eevee_hiz_update"; + case MOTION_BLUR_GATHER: + return "eevee_motion_blur_gather"; + case MOTION_BLUR_TILE_DILATE: + return "eevee_motion_blur_tiles_dilate"; + case MOTION_BLUR_TILE_FLATTEN_RENDER: + return "eevee_motion_blur_tiles_flatten_render"; + case MOTION_BLUR_TILE_FLATTEN_VIEWPORT: + return "eevee_motion_blur_tiles_flatten_viewport"; + case DOF_BOKEH_LUT: + return "eevee_depth_of_field_bokeh_lut"; + case DOF_DOWNSAMPLE: + return "eevee_depth_of_field_downsample"; + case DOF_FILTER: + return "eevee_depth_of_field_filter"; + case DOF_GATHER_FOREGROUND_LUT: + return "eevee_depth_of_field_gather_foreground_lut"; + case DOF_GATHER_FOREGROUND: + return "eevee_depth_of_field_gather_foreground_no_lut"; + case DOF_GATHER_BACKGROUND_LUT: + return "eevee_depth_of_field_gather_background_lut"; + case DOF_GATHER_BACKGROUND: + return "eevee_depth_of_field_gather_background_no_lut"; + case DOF_GATHER_HOLE_FILL: + return "eevee_depth_of_field_hole_fill"; + case DOF_REDUCE: + return "eevee_depth_of_field_reduce"; + case DOF_RESOLVE: + return "eevee_depth_of_field_resolve_no_lut"; + case DOF_RESOLVE_LUT: + return "eevee_depth_of_field_resolve_lut"; + case DOF_SETUP: + return "eevee_depth_of_field_setup"; + case DOF_SCATTER: + return "eevee_depth_of_field_scatter"; + case DOF_STABILIZE: + return "eevee_depth_of_field_stabilize"; + case DOF_TILES_DILATE_MINABS: + return "eevee_depth_of_field_tiles_dilate_minabs"; + case DOF_TILES_DILATE_MINMAX: + return "eevee_depth_of_field_tiles_dilate_minmax"; + case DOF_TILES_FLATTEN: + return "eevee_depth_of_field_tiles_flatten"; + case LIGHT_CULLING_DEBUG: + return "eevee_light_culling_debug"; + case LIGHT_CULLING_SELECT: + return "eevee_light_culling_select"; + case LIGHT_CULLING_SORT: + return "eevee_light_culling_sort"; + case LIGHT_CULLING_TILE: + return "eevee_light_culling_tile"; + case LIGHT_CULLING_ZBIN: + return "eevee_light_culling_zbin"; /* To avoid compiler warning about missing case. */ case MAX_SHADER_TYPE: return ""; @@ -122,11 +184,41 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu GPUCodegenOutput &codegen = *codegen_; ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info); - info.auto_resource_location(true); + /* WORKAROUND: Replace by new ob info. */ + int64_t ob_info_index = info.additional_infos_.first_index_of_try("draw_object_infos"); + if (ob_info_index != -1) { + info.additional_infos_[ob_info_index] = "draw_object_infos_new"; + } + + /* WORKAROUND: Add new ob attr buffer. */ + if (GPU_material_uniform_attributes(gpumat) != nullptr) { + info.additional_info("draw_object_attribute_new"); + } + + /* WORKAROUND: Avoid utility texture merge error. TODO: find a cleaner fix. */ + for (auto &resource : info.batch_resources_) { + if (resource.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) { + if (resource.slot == RBUFS_UTILITY_TEX_SLOT) { + resource.slot = GPU_max_textures_frag() - 1; + } + } + } if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) { info.define("MAT_TRANSPARENT"); + /* Transparent material do not have any velocity specific pipeline. */ + if (pipeline_type == MAT_PIPE_FORWARD_PREPASS_VELOCITY) { + pipeline_type = MAT_PIPE_FORWARD_PREPASS; + } } + + if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT) == false && + pipeline_type == MAT_PIPE_FORWARD) { + /* Opaque forward do support AOVs and render pass. */ + info.additional_info("eevee_aov_out"); + info.additional_info("eevee_render_pass_out"); + } + if (GPU_material_flag_get(gpumat, GPU_MATFLAG_BARYCENTRIC)) { switch (geometry_type) { case MAT_GEOM_MESH: @@ -161,7 +253,6 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu } } info.vertex_inputs_.clear(); - info.additional_info("draw_curves_infos"); break; case MAT_GEOM_WORLD: /** diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index 0f42e880a10..88538557c07 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -26,7 +26,41 @@ namespace blender::eevee { /* Keep alphabetical order and clean prefix. */ enum eShaderType { - VELOCITY_RESOLVE = 0, + FILM_FRAG = 0, + FILM_COMP, + FILM_CRYPTOMATTE_POST, + + DOF_BOKEH_LUT, + DOF_DOWNSAMPLE, + DOF_FILTER, + DOF_GATHER_BACKGROUND_LUT, + DOF_GATHER_BACKGROUND, + DOF_GATHER_FOREGROUND_LUT, + DOF_GATHER_FOREGROUND, + DOF_GATHER_HOLE_FILL, + DOF_REDUCE, + DOF_RESOLVE_LUT, + DOF_RESOLVE, + DOF_SCATTER, + DOF_SETUP, + DOF_STABILIZE, + DOF_TILES_DILATE_MINABS, + DOF_TILES_DILATE_MINMAX, + DOF_TILES_FLATTEN, + + HIZ_UPDATE, + HIZ_DEBUG, + + LIGHT_CULLING_DEBUG, + LIGHT_CULLING_SELECT, + LIGHT_CULLING_SORT, + LIGHT_CULLING_TILE, + LIGHT_CULLING_ZBIN, + + MOTION_BLUR_GATHER, + MOTION_BLUR_TILE_DILATE, + MOTION_BLUR_TILE_FLATTEN_RENDER, + MOTION_BLUR_TILE_FLATTEN_VIEWPORT, MAX_SHADER_TYPE, }; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh index eb409f076f3..8e96445d6b9 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh @@ -12,22 +12,132 @@ # include "BLI_memory_utils.hh" # include "DRW_gpu_wrapper.hh" -// # include "eevee_defines.hh" +# include "draw_manager.hh" +# include "draw_pass.hh" + +# include "eevee_defines.hh" # include "GPU_shader_shared.h" namespace blender::eevee { -using draw::Framebuffer; -using draw::SwapChain; -using draw::Texture; -using draw::TextureFromPool; +using namespace draw; + +constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT; +constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER; #endif #define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14 /* -------------------------------------------------------------------- */ +/** \name Debug Mode + * \{ */ + +/** These are just to make more sense of G.debug_value's values. Reserved range is 1-30. */ +enum eDebugMode : uint32_t { + DEBUG_NONE = 0u, + /** + * Gradient showing light evaluation hot-spots. + */ + DEBUG_LIGHT_CULLING = 1u, + /** + * Show incorrectly downsample tiles in red. + */ + DEBUG_HIZ_VALIDATION = 2u, + /** + * Tile-maps to screen. Is also present in other modes. + * - Black pixels, no pages allocated. + * - Green pixels, pages cached. + * - Red pixels, pages allocated. + */ + DEBUG_SHADOW_TILEMAPS = 10u, + /** + * Random color per pages. Validates page density allocation and sampling. + */ + DEBUG_SHADOW_PAGES = 11u, + /** + * Outputs random color per tile-map (or tile-map level). Validates tile-maps coverage. + * Black means not covered by any tile-maps LOD of the shadow. + */ + DEBUG_SHADOW_LOD = 12u, + /** + * Outputs white pixels for pages allocated and black pixels for unused pages. + * This needs DEBUG_SHADOW_PAGE_ALLOCATION_ENABLED defined in order to work. + */ + DEBUG_SHADOW_PAGE_ALLOCATION = 13u, + /** + * Outputs the tile-map atlas. Default tile-map is too big for the usual screen resolution. + * Try lowering SHADOW_TILEMAP_PER_ROW and SHADOW_MAX_TILEMAP before using this option. + */ + DEBUG_SHADOW_TILE_ALLOCATION = 14u, + /** + * Visualize linear depth stored in the atlas regions of the active light. + * This way, one can check if the rendering, the copying and the shadow sampling functions works. + */ + DEBUG_SHADOW_SHADOW_DEPTH = 15u +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Sampling + * \{ */ + +enum eSamplingDimension : uint32_t { + SAMPLING_FILTER_U = 0u, + SAMPLING_FILTER_V = 1u, + SAMPLING_LENS_U = 2u, + SAMPLING_LENS_V = 3u, + SAMPLING_TIME = 4u, + SAMPLING_SHADOW_U = 5u, + SAMPLING_SHADOW_V = 6u, + SAMPLING_SHADOW_W = 7u, + SAMPLING_SHADOW_X = 8u, + SAMPLING_SHADOW_Y = 9u, + SAMPLING_CLOSURE = 10u, + SAMPLING_LIGHTPROBE = 11u, + SAMPLING_TRANSPARENCY = 12u, + SAMPLING_SSS_U = 13u, + SAMPLING_SSS_V = 14u, + SAMPLING_RAYTRACE_U = 15u, + SAMPLING_RAYTRACE_V = 16u, + SAMPLING_RAYTRACE_W = 17u, + SAMPLING_RAYTRACE_X = 18u +}; + +/** + * IMPORTANT: Make sure the array can contain all sampling dimensions. + * Also note that it needs to be multiple of 4. + */ +#define SAMPLING_DIMENSION_COUNT 20 + +/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */ +struct SamplingData { + /** Array containing random values from Low Discrepancy Sequence in [0..1) range. */ + float dimensions[SAMPLING_DIMENSION_COUNT]; +}; +BLI_STATIC_ASSERT_ALIGN(SamplingData, 16) + +/* Returns total sample count in a web pattern of the given size. */ +static inline int sampling_web_sample_count_get(int web_density, int ring_count) +{ + return ((ring_count * ring_count + ring_count) / 2) * web_density + 1; +} + +/* Returns lowest possible ring count that contains at least sample_count samples. */ +static inline int sampling_web_ring_count_get(int web_density, int sample_count) +{ + /* Inversion of web_sample_count_get(). */ + float x = 2.0f * (float(sample_count) - 1.0f) / float(web_density); + /* Solving polynomial. We only search positive solution. */ + float discriminant = 1.0f + 4.0f * x; + return int(ceilf(0.5f * (sqrtf(discriminant) - 1.0f))); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Camera * \{ */ @@ -65,15 +175,176 @@ struct CameraData { /** Clipping distances. */ float clip_near; float clip_far; - /** Film pixel filter radius. */ - float filter_size; eCameraType type; + + bool1 initialized; + +#ifdef __cplusplus + /* Small constructor to allow detecting new buffers. */ + CameraData() : initialized(false){}; +#endif }; BLI_STATIC_ASSERT_ALIGN(CameraData, 16) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Film + * \{ */ + +#define FILM_PRECOMP_SAMPLE_MAX 16 + +enum eFilmWeightLayerIndex : uint32_t { + FILM_WEIGHT_LAYER_ACCUMULATION = 0u, + FILM_WEIGHT_LAYER_DISTANCE = 1u, +}; + +enum ePassStorageType : uint32_t { + PASS_STORAGE_COLOR = 0u, + PASS_STORAGE_VALUE = 1u, + PASS_STORAGE_CRYPTOMATTE = 2u, +}; + +struct FilmSample { + int2 texel; + float weight; + /** Used for accumulation. */ + float weight_sum_inv; +}; +BLI_STATIC_ASSERT_ALIGN(FilmSample, 16) + +struct FilmData { + /** Size of the film in pixels. */ + int2 extent; + /** Offset of the film in the full-res frame, in pixels. */ + int2 offset; + /** Extent used by the render buffers when rendering the main views. */ + int2 render_extent; + /** Sub-pixel offset applied to the window matrix. + * NOTE: In final film pixel unit. + * NOTE: Positive values makes the view translate in the negative axes direction. + * NOTE: The origin is the center of the lower left film pixel of the area covered by a render + * pixel if using scaled resolution rendering. + */ + float2 subpixel_offset; + /** Scaling factor to convert texel to uvs. */ + float2 extent_inv; + /** Is true if history is valid and can be sampled. Bypass history to resets accumulation. */ + bool1 use_history; + /** Is true if combined buffer is valid and can be re-projected to reduce variance. */ + bool1 use_reprojection; + /** Is true if accumulation of non-filtered passes is needed. */ + bool1 has_data; + /** Is true if accumulation of filtered passes is needed. */ + bool1 any_render_pass_1; + bool1 any_render_pass_2; + /** Controlled by user in lookdev mode or by render settings. */ + float background_opacity; + float _pad0; + /** Output counts per type. */ + int color_len, value_len; + /** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */ + int mist_id; + int normal_id; + int vector_id; + int diffuse_light_id; + int diffuse_color_id; + int specular_light_id; + int specular_color_id; + int volume_light_id; + int emission_id; + int environment_id; + int shadow_id; + int ambient_occlusion_id; + /** Not indexed but still not -1 if enabled. */ + int depth_id; + int combined_id; + /** Id of the render-pass to be displayed. -1 for combined. */ + int display_id; + /** Storage type of the render-pass to be displayed. */ + ePassStorageType display_storage_type; + /** True if we bypass the accumulation and directly output the accumulation buffer. */ + bool1 display_only; + /** Start of AOVs and number of aov. */ + int aov_color_id, aov_color_len; + int aov_value_id, aov_value_len; + /** Start of cryptomatte per layer (-1 if pass is not enabled). */ + int cryptomatte_object_id; + int cryptomatte_asset_id; + int cryptomatte_material_id; + /** Max number of samples stored per layer (is even number). */ + int cryptomatte_samples_len; + /** Settings to render mist pass */ + float mist_scale, mist_bias, mist_exponent; + /** Scene exposure used for better noise reduction. */ + float exposure_scale; + /** Scaling factor for scaled resolution rendering. */ + int scaling_factor; + /** Film pixel filter radius. */ + float filter_radius; + /** Precomputed samples. First in the table is the closest one. The rest is unordered. */ + int samples_len; + /** Sum of the weights of all samples in the sample table. */ + float samples_weight_total; + FilmSample samples[FILM_PRECOMP_SAMPLE_MAX]; +}; +BLI_STATIC_ASSERT_ALIGN(FilmData, 16) + +static inline float film_filter_weight(float filter_radius, float sample_distance_sqr) +{ +#if 1 /* Faster */ + /* Gaussian fitted to Blackman-Harris. */ + float r = sample_distance_sqr / (filter_radius * filter_radius); + const float sigma = 0.284; + const float fac = -0.5 / (sigma * sigma); + float weight = expf(fac * r); +#else + /* Blackman-Harris filter. */ + float r = M_2PI * saturate(0.5 + sqrtf(sample_distance_sqr) / (2.0 * filter_radius)); + float weight = 0.35875 - 0.48829 * cosf(r) + 0.14128 * cosf(2.0 * r) - 0.01168 * cosf(3.0 * r); +#endif + return weight; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Render passes + * \{ */ + +enum eRenderPassLayerIndex : uint32_t { + RENDER_PASS_LAYER_DIFFUSE_LIGHT = 0u, + RENDER_PASS_LAYER_SPECULAR_LIGHT = 1u, +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Arbitrary Output Variables + * \{ */ + +/* Theoretical max is 128 as we are using texture array and VRAM usage. + * However, the output_aov() function perform a linear search inside all the hashes. + * If we find a way to avoid this we could bump this number up. */ +#define AOV_MAX 16 + +/* NOTE(@fclem): Needs to be used in #StorageBuffer because of arrays of scalar. */ +struct AOVsInfoData { + uint hash_value[AOV_MAX]; + uint hash_color[AOV_MAX]; + /* Length of used data. */ + uint color_len; + uint value_len; + /** Id of the AOV to be displayed (from the start of the AOV array). -1 for combined. */ + int display_id; + /** True if the AOV to be displayed is from the value accum buffer. */ + bool1 display_is_value; +}; +BLI_STATIC_ASSERT_ALIGN(AOVsInfoData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name VelocityModule * \{ */ @@ -122,6 +393,272 @@ BLI_STATIC_ASSERT_ALIGN(VelocityGeometryIndex, 16) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Motion Blur + * \{ */ + +#define MOTION_BLUR_TILE_SIZE 32 +#define MOTION_BLUR_MAX_TILE 512 /* 16384 / MOTION_BLUR_TILE_SIZE */ +struct MotionBlurData { + /** As the name suggests. Used to avoid a division in the sampling. */ + float2 target_size_inv; + /** Viewport motion scaling factor. Make blur relative to frame time not render time. */ + float2 motion_scale; + /** Depth scaling factor. Avoid blurring background behind moving objects. */ + float depth_scale; + + float _pad0, _pad1, _pad2; +}; +BLI_STATIC_ASSERT_ALIGN(MotionBlurData, 16) + +/* For some reasons some GLSL compilers do not like this struct. + * So we declare it as a uint array instead and do indexing ourselves. */ +#ifdef __cplusplus +struct MotionBlurTileIndirection { + /** + * Stores indirection to the tile with the highest velocity covering each tile. + * This is stored using velocity in the MSB to be able to use atomicMax operations. + */ + uint prev[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE]; + uint next[MOTION_BLUR_MAX_TILE][MOTION_BLUR_MAX_TILE]; +}; +BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16) +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +/* 5% error threshold. */ +#define DOF_FAST_GATHER_COC_ERROR 0.05 +#define DOF_GATHER_RING_COUNT 5 +#define DOF_DILATE_RING_COUNT 3 + +struct DepthOfFieldData { + /** Size of the render targets for gather & scatter passes. */ + int2 extent; + /** Size of a pixel in uv space (1.0 / extent). */ + float2 texel_size; + /** Scale factor for anisotropic bokeh. */ + float2 bokeh_anisotropic_scale; + float2 bokeh_anisotropic_scale_inv; + /* Correction factor to align main target pixels with the filtered mipmap chain texture. */ + float2 gather_uv_fac; + /** Scatter parameters. */ + float scatter_coc_threshold; + float scatter_color_threshold; + float scatter_neighbor_max_color; + int scatter_sprite_per_row; + /** Number of side the bokeh shape has. */ + float bokeh_blades; + /** Rotation of the bokeh shape. */ + float bokeh_rotation; + /** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */ + float coc_mul, coc_bias; + /** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */ + float coc_abs_max; + /** Copy of camera type. */ + eCameraType camera_type; + /** Weights of spatial filtering in stabilize pass. Not array to avoid alignment restriction. */ + float4 filter_samples_weight; + float filter_center_weight; + /** Max number of sprite in the scatter pass for each ground. */ + int scatter_max_rect; + + int _pad0, _pad1; +}; +BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16) + +struct ScatterRect { + /** Color and CoC of the 4 pixels the scatter sprite represents. */ + float4 color_and_coc[4]; + /** Rect center position in half pixel space. */ + float2 offset; + /** Rect half extent in half pixel space. */ + float2 half_extent; +}; +BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16) + +/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */ +#ifndef M_PI +# define EEVEE_PI +# define M_PI 3.14159265358979323846 /* pi */ +#endif + +static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth) +{ + depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth; + return dof.coc_mul * depth + dof.coc_bias; +} + +static inline float regular_polygon_side_length(float sides_count) +{ + return 2.0f * sinf(M_PI / sides_count); +} + +/* Returns intersection ratio between the radius edge at theta and the regular polygon edge. + * Start first corners at theta == 0. */ +static inline float circle_to_polygon_radius(float sides_count, float theta) +{ + /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide + * 36). */ + float side_angle = (2.0f * M_PI) / sides_count; + return cosf(side_angle * 0.5f) / + cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI))); +} + +/* Remap input angle to have homogenous spacing of points along a polygon edge. + * Expects theta to be in [0..2pi] range. */ +static inline float circle_to_polygon_angle(float sides_count, float theta) +{ + float side_angle = (2.0f * M_PI) / sides_count; + float halfside_angle = side_angle * 0.5f; + float side = floorf(theta / side_angle); + /* Length of segment from center to the middle of polygon side. */ + float adjacent = circle_to_polygon_radius(sides_count, 0.0f); + + /* This is the relative position of the sample on the polygon half side. */ + float local_theta = theta - side * side_angle; + float ratio = (local_theta - halfside_angle) / halfside_angle; + + float halfside_len = regular_polygon_side_length(sides_count) * 0.5f; + float opposite = ratio * halfside_len; + + /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */ + float final_local_theta = atanf(opposite / adjacent); + + return side * side_angle + final_local_theta; +} + +#ifdef EEVEE_PI +# undef M_PI +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Light Culling + * \{ */ + +/* Number of items we can cull. Limited by how we store CullingZBin. */ +#define CULLING_MAX_ITEM 65536 +/* Fine grained subdivision in the Z direction. Limited by the LDS in z-binning compute shader. */ +#define CULLING_ZBIN_COUNT 4096 +/* Max tile map resolution per axes. */ +#define CULLING_TILE_RES 16 + +struct LightCullingData { + /** Scale applied to tile pixel coordinates to get target UV coordinate. */ + float2 tile_to_uv_fac; + /** Scale and bias applied to linear Z to get zbin. */ + float zbin_scale; + float zbin_bias; + /** Valid item count in the source data array. */ + uint items_count; + /** Items that are processed by the 2.5D culling. */ + uint local_lights_len; + /** Items that are **NOT** processed by the 2.5D culling (i.e: Sun Lights). */ + uint sun_lights_len; + /** Number of items that passes the first culling test. */ + uint visible_count; + /** Extent of one square tile in pixels. */ + float tile_size; + /** Number of tiles on the X/Y axis. */ + uint tile_x_len; + uint tile_y_len; + /** Number of word per tile. Depends on the maximum number of lights. */ + uint tile_word_len; +}; +BLI_STATIC_ASSERT_ALIGN(LightCullingData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Lights + * \{ */ + +#define LIGHT_NO_SHADOW -1 + +enum eLightType : uint32_t { + LIGHT_SUN = 0u, + LIGHT_POINT = 1u, + LIGHT_SPOT = 2u, + LIGHT_RECT = 3u, + LIGHT_ELLIPSE = 4u +}; + +static inline bool is_area_light(eLightType type) +{ + return type >= LIGHT_RECT; +} + +struct LightData { + /** Normalized object matrix. Last column contains data accessible using the following macros. */ + float4x4 object_mat; + /** Packed data in the last column of the object_mat. */ +#define _area_size_x object_mat[0][3] +#define _area_size_y object_mat[1][3] +#define _radius _area_size_x +#define _spot_mul object_mat[2][3] +#define _spot_bias object_mat[3][3] + /** Aliases for axes. */ +#ifndef USE_GPU_SHADER_CREATE_INFO +# define _right object_mat[0] +# define _up object_mat[1] +# define _back object_mat[2] +# define _position object_mat[3] +#else +# define _right object_mat[0].xyz +# define _up object_mat[1].xyz +# define _back object_mat[2].xyz +# define _position object_mat[3].xyz +#endif + /** Influence radius (inverted and squared) adjusted for Surface / Volume power. */ + float influence_radius_invsqr_surface; + float influence_radius_invsqr_volume; + /** Maximum influence radius. Used for culling. */ + float influence_radius_max; + /** Index of the shadow struct on CPU. -1 means no shadow. */ + int shadow_id; + /** NOTE: It is ok to use float3 here. A float is declared right after it. + * float3 is also aligned to 16 bytes. */ + float3 color; + /** Power depending on shader type. */ + float diffuse_power; + float specular_power; + float volume_power; + float transmit_power; + /** Special radius factor for point lighting. */ + float radius_squared; + /** Light Type. */ + eLightType type; + /** Spot angle tangent. */ + float spot_tan; + /** Spot size. Aligned to size of float2. */ + float2 spot_size_inv; + /** Associated shadow data. Only valid if shadow_id is not LIGHT_NO_SHADOW. */ + // ShadowData shadow_data; +}; +BLI_STATIC_ASSERT_ALIGN(LightData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Hierarchical-Z Buffer + * \{ */ + +struct HiZData { + /** Scale factor to remove HiZBuffer padding. */ + float2 uv_scale; + + float2 _pad0; +}; +BLI_STATIC_ASSERT_ALIGN(HiZData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Ray-Tracing * \{ */ @@ -142,6 +679,34 @@ enum eClosureBits : uint32_t { /** \} */ /* -------------------------------------------------------------------- */ +/** \name Subsurface + * \{ */ + +#define SSS_SAMPLE_MAX 64 +#define SSS_BURLEY_TRUNCATE 16.0 +#define SSS_BURLEY_TRUNCATE_CDF 0.9963790093708328 +#define SSS_TRANSMIT_LUT_SIZE 64.0 +#define SSS_TRANSMIT_LUT_RADIUS 1.218 +#define SSS_TRANSMIT_LUT_SCALE ((SSS_TRANSMIT_LUT_SIZE - 1.0) / float(SSS_TRANSMIT_LUT_SIZE)) +#define SSS_TRANSMIT_LUT_BIAS (0.5 / float(SSS_TRANSMIT_LUT_SIZE)) +#define SSS_TRANSMIT_LUT_STEP_RES 64.0 + +struct SubsurfaceData { + /** xy: 2D sample position [-1..1], zw: sample_bounds. */ + /* NOTE(fclem) Using float4 for alignment. */ + float4 samples[SSS_SAMPLE_MAX]; + /** Sample index after which samples are not randomly rotated anymore. */ + int jitter_threshold; + /** Number of samples precomputed in the set. */ + int sample_len; + int _pad0; + int _pad1; +}; +BLI_STATIC_ASSERT_ALIGN(SubsurfaceData, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Utility Texture * \{ */ @@ -178,10 +743,26 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer) #ifdef __cplusplus +using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>; using CameraDataBuf = draw::UniformBuffer<CameraData>; +using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>; +using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>; +using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>; +using FilmDataBuf = draw::UniformBuffer<FilmData>; +using HiZDataBuf = draw::UniformBuffer<HiZData>; +using LightCullingDataBuf = draw::StorageBuffer<LightCullingData>; +using LightCullingKeyBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>; +using LightCullingTileBuf = draw::StorageArrayBuffer<uint, LIGHT_CHUNK, true>; +using LightCullingZbinBuf = draw::StorageArrayBuffer<uint, CULLING_ZBIN_COUNT, true>; +using LightCullingZdistBuf = draw::StorageArrayBuffer<float, LIGHT_CHUNK, true>; +using LightDataBuf = draw::StorageArrayBuffer<LightData, LIGHT_CHUNK>; +using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>; +using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>; +using SamplingDataBuf = draw::StorageBuffer<SamplingData>; +using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>; using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>; using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>; -using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>; +using CryptomatteObjectBuf = draw::StorageArrayBuffer<float2, 16>; } // namespace blender::eevee #endif diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.cc b/source/blender/draw/engines/eevee_next/eevee_sync.cc index 42af251d770..09ea7c9ec3d 100644 --- a/source/blender/draw/engines/eevee_next/eevee_sync.cc +++ b/source/blender/draw/engines/eevee_next/eevee_sync.cc @@ -47,7 +47,7 @@ ObjectHandle &SyncModule::sync_object(Object *ob) const int recalc_flags = ID_RECALC_COPY_ON_WRITE | ID_RECALC_TRANSFORM | ID_RECALC_SHADING | ID_RECALC_GEOMETRY; if ((eevee_dd.recalc & recalc_flags) != 0) { - // inst_.sampling.reset(); + inst_.sampling.reset(); UNUSED_VARS(inst_); } @@ -63,7 +63,7 @@ WorldHandle &SyncModule::sync_world(::World *world) const int recalc_flags = ID_RECALC_ALL; if ((eevee_dd.recalc & recalc_flags) != 0) { - // inst_.sampling.reset(); + inst_.sampling.reset(); } return eevee_dd; } @@ -74,25 +74,12 @@ WorldHandle &SyncModule::sync_world(::World *world) /** \name Common * \{ */ -static inline void shgroup_geometry_call(DRWShadingGroup *grp, - Object *ob, - GPUBatch *geom, - int v_first = -1, - int v_count = -1, - bool use_instancing = false) +static inline void geometry_call(PassMain::Sub *sub_pass, + GPUBatch *geom, + ResourceHandle resource_handle) { - if (grp == nullptr) { - return; - } - - if (v_first == -1) { - DRW_shgroup_call(grp, geom, ob); - } - else if (use_instancing) { - DRW_shgroup_call_instance_range(grp, ob, geom, v_first, v_count); - } - else { - DRW_shgroup_call_range(grp, ob, geom, v_first, v_count); + if (sub_pass != nullptr) { + sub_pass->draw(geom, resource_handle); } } @@ -102,9 +89,13 @@ static inline void shgroup_geometry_call(DRWShadingGroup *grp, /** \name Mesh * \{ */ -void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle) +void SyncModule::sync_mesh(Object *ob, + ObjectHandle &ob_handle, + ResourceHandle res_handle, + const ObjectRef &ob_ref) { - bool has_motion = inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc); + bool has_motion = inst_.velocity.step_object_sync( + ob, ob_handle.object_key, res_handle, ob_handle.recalc); MaterialArray &material_array = inst_.materials.material_array_get(ob, has_motion); @@ -123,14 +114,20 @@ void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle) continue; } Material *material = material_array.materials[i]; - shgroup_geometry_call(material->shading.shgrp, ob, geom); - shgroup_geometry_call(material->prepass.shgrp, ob, geom); - shgroup_geometry_call(material->shadow.shgrp, ob, geom); + geometry_call(material->shading.sub_pass, geom, res_handle); + geometry_call(material->prepass.sub_pass, geom, res_handle); + geometry_call(material->shadow.sub_pass, geom, res_handle); - is_shadow_caster = is_shadow_caster || material->shadow.shgrp != nullptr; + is_shadow_caster = is_shadow_caster || material->shadow.sub_pass != nullptr; is_alpha_blend = is_alpha_blend || material->is_alpha_blend_transparent; + + GPUMaterial *gpu_material = material_array.gpu_materials[i]; + ::Material *mat = GPU_material_get_material(gpu_material); + inst_.cryptomatte.sync_material(mat); } + inst_.manager->extract_object_attributes(res_handle, ob_ref, material_array.gpu_materials); + inst_.cryptomatte.sync_object(ob, res_handle); // shadows.sync_object(ob, ob_handle, is_shadow_caster, is_alpha_blend); } @@ -155,11 +152,13 @@ struct gpIterData { int vcount = 0; bool instancing = false; - gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle) + gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle, ResourceHandle resource_handle) : inst(inst_), ob(ob_), material_array(inst_.materials.material_array_get( - ob_, inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc))) + ob_, + inst_.velocity.step_object_sync( + ob, ob_handle.object_key, resource_handle, ob_handle.recalc))) { cfra = DEG_get_ctime(inst.depsgraph); }; @@ -167,26 +166,28 @@ struct gpIterData { static void gpencil_drawcall_flush(gpIterData &iter) { +#if 0 /* Incompatible with new draw manager. */ if (iter.geom != nullptr) { - shgroup_geometry_call(iter.material->shading.shgrp, + geometry_call(iter.material->shading.sub_pass, iter.ob, iter.geom, iter.vfirst, iter.vcount, iter.instancing); - shgroup_geometry_call(iter.material->prepass.shgrp, + geometry_call(iter.material->prepass.sub_pass, iter.ob, iter.geom, iter.vfirst, iter.vcount, iter.instancing); - shgroup_geometry_call(iter.material->shadow.shgrp, + geometry_call(iter.material->shadow.sub_pass, iter.ob, iter.geom, iter.vfirst, iter.vcount, iter.instancing); } +#endif iter.geom = nullptr; iter.vfirst = -1; iter.vcount = 0; @@ -250,18 +251,22 @@ static void gpencil_stroke_sync(bGPDlayer *UNUSED(gpl), } } -void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle) +void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle) { /* TODO(fclem): Waiting for a user option to use the render engine instead of gpencil engine. */ - return; + if (true) { + inst_.gpencil_engine_enabled = true; + return; + } + UNUSED_VARS(res_handle); - gpIterData iter(inst_, ob, ob_handle); + gpIterData iter(inst_, ob, ob_handle, res_handle); BKE_gpencil_visible_stroke_iter((bGPdata *)ob->data, nullptr, gpencil_stroke_sync, &iter); gpencil_drawcall_flush(iter); - // bool is_caster = true; /* TODO material.shadow.shgrp. */ + // bool is_caster = true; /* TODO material.shadow.sub_pass. */ // bool is_alpha_blend = true; /* TODO material.is_alpha_blend. */ // shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend); } @@ -277,14 +282,24 @@ static void shgroup_curves_call(MaterialPass &matpass, ParticleSystem *part_sys = nullptr, ModifierData *modifier_data = nullptr) { - if (matpass.shgrp == nullptr) { + UNUSED_VARS(ob, modifier_data); + if (matpass.sub_pass == nullptr) { return; } - DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.shgrp, matpass.gpumat); + if (part_sys != nullptr) { + // DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.sub_pass, matpass.gpumat); + } + else { + // DRW_shgroup_curves_create_sub(ob, matpass.sub_pass, matpass.gpumat); + } } -void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data) +void SyncModule::sync_curves(Object *ob, + ObjectHandle &ob_handle, + ResourceHandle res_handle, + ModifierData *modifier_data) { + UNUSED_VARS(res_handle); int mat_nr = CURVES_MATERIAL_NR; ParticleSystem *part_sys = nullptr; @@ -309,10 +324,16 @@ void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData * shgroup_curves_call(material.prepass, ob, part_sys, modifier_data); shgroup_curves_call(material.shadow, ob, part_sys, modifier_data); + inst_.cryptomatte.sync_object(ob, res_handle); + GPUMaterial *gpu_material = + inst_.materials.material_array_get(ob, has_motion).gpu_materials[mat_nr - 1]; + ::Material *mat = GPU_material_get_material(gpu_material); + inst_.cryptomatte.sync_material(mat); + /* TODO(fclem) Hair velocity. */ // shading_passes.velocity.gpencil_add(ob, ob_handle); - // bool is_caster = material.shadow.shgrp != nullptr; + // bool is_caster = material.shadow.sub_pass != nullptr; // bool is_alpha_blend = material.is_alpha_blend_transparent; // shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend); } diff --git a/source/blender/draw/engines/eevee_next/eevee_sync.hh b/source/blender/draw/engines/eevee_next/eevee_sync.hh index bd8147a2882..ab883ce44c2 100644 --- a/source/blender/draw/engines/eevee_next/eevee_sync.hh +++ b/source/blender/draw/engines/eevee_next/eevee_sync.hh @@ -150,9 +150,15 @@ class SyncModule { ObjectHandle &sync_object(Object *ob); WorldHandle &sync_world(::World *world); - void sync_mesh(Object *ob, ObjectHandle &ob_handle); - void sync_gpencil(Object *ob, ObjectHandle &ob_handle); - void sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data = nullptr); + void sync_mesh(Object *ob, + ObjectHandle &ob_handle, + ResourceHandle res_handle, + const ObjectRef &ob_ref); + void sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle); + void sync_curves(Object *ob, + ObjectHandle &ob_handle, + ResourceHandle res_handle, + ModifierData *modifier_data = nullptr); }; /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc index ceae9df44d0..7af311a8ccc 100644 --- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc +++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc @@ -9,10 +9,6 @@ * temporal re-projection or motion blur. * * It is the module that tracks the objects between frames updates. - * - * #VelocityModule contains all motion steps data and logic. - * #VelocityPass contains the resolve pass for static geometry. - * #VelocityView is a per view instance that contain the velocity buffer. */ #include "BKE_duplilist.h" @@ -36,16 +32,21 @@ namespace blender::eevee { void VelocityModule::init() { -#if 0 /* TODO renderpasses */ - if (inst_.render && (inst_.render_passes.vector != nullptr)) { - /* No motion blur and the vector pass was requested. Do the step sync here. */ + if (inst_.render && (inst_.film.enabled_passes_get() & EEVEE_RENDER_PASS_VECTOR) != 0) { + /* No motion blur and the vector pass was requested. Do the steps sync here. */ const Scene *scene = inst_.scene; float initial_time = scene->r.cfra + scene->r.subframe; step_sync(STEP_PREVIOUS, initial_time - 1.0f); step_sync(STEP_NEXT, initial_time + 1.0f); + inst_.set_time(initial_time); + step_ = STEP_CURRENT; + /* Let the main sync loop handle the current step. */ } -#endif + + /* For viewport, only previous motion is supported. + * Still bind previous step to avoid undefined behavior. */ + next_step_ = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT; } static void step_object_sync_render(void *velocity, @@ -54,7 +55,9 @@ static void step_object_sync_render(void *velocity, Depsgraph *UNUSED(depsgraph)) { ObjectKey object_key(ob); - reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key); + /* NOTE: Dummy resource handle since this will not be used for drawing. */ + ResourceHandle resource_handle(0); + reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key, resource_handle); } void VelocityModule::step_sync(eVelocityStep step, float time) @@ -70,10 +73,18 @@ void VelocityModule::step_camera_sync() { inst_.camera.sync(); *camera_steps[step_] = inst_.camera.data_get(); + step_time[step_] = inst_.scene->r.cfra + inst_.scene->r.subframe; + /* Fix undefined camera steps when rendering is starting. */ + if ((step_ == STEP_CURRENT) && (camera_steps[STEP_PREVIOUS]->initialized == false)) { + *camera_steps[STEP_PREVIOUS] = *static_cast<CameraData *>(camera_steps[step_]); + camera_steps[STEP_PREVIOUS]->initialized = true; + step_time[STEP_PREVIOUS] = step_time[step_]; + } } bool VelocityModule::step_object_sync(Object *ob, ObjectKey &object_key, + ResourceHandle resource_handle, int /*IDRecalcFlag*/ recalc) { bool has_motion = object_has_velocity(ob) || (recalc & ID_RECALC_TRANSFORM); @@ -85,8 +96,6 @@ bool VelocityModule::step_object_sync(Object *ob, return false; } - uint32_t resource_id = DRW_object_resource_id_get(ob); - /* Object motion. */ /* FIXME(fclem) As we are using original objects pointers, there is a chance the previous * object key matches a totally different object if the scene was changed by user or python @@ -95,7 +104,7 @@ bool VelocityModule::step_object_sync(Object *ob, * We live with that until we have a correct way of identifying new objects. */ VelocityObjectData &vel = velocity_map.lookup_or_add_default(object_key); vel.obj.ofs[step_] = object_steps_usage[step_]++; - vel.obj.resource_id = resource_id; + vel.obj.resource_id = resource_handle.resource_index(); vel.id = (ID *)ob->data; object_steps[step_]->get_or_resize(vel.obj.ofs[step_]) = ob->obmat; if (step_ == STEP_CURRENT) { @@ -162,7 +171,7 @@ bool VelocityModule::step_object_sync(Object *ob, } /* TODO(@fclem): Reset sampling here? Should ultimately be covered by depsgraph update tags. */ - // inst_.sampling.reset(); + inst_.sampling.reset(); return true; } @@ -213,6 +222,7 @@ void VelocityModule::step_swap() SWAP(VelocityObjectBuf *, object_steps[step_a], object_steps[step_b]); SWAP(VelocityGeometryBuf *, geometry_steps[step_a], geometry_steps[step_b]); SWAP(CameraDataBuf *, camera_steps[step_a], camera_steps[step_b]); + SWAP(float, step_time[step_a], step_time[step_b]); for (VelocityObjectData &vel : velocity_map.values()) { vel.obj.ofs[step_a] = vel.obj.ofs[step_b]; @@ -239,10 +249,7 @@ void VelocityModule::step_swap() void VelocityModule::begin_sync() { - if (inst_.is_viewport()) { - /* Viewport always evaluate current step. */ - step_ = STEP_CURRENT; - } + step_ = STEP_CURRENT; step_camera_sync(); object_steps_usage[step_] = 0; } @@ -255,7 +262,7 @@ void VelocityModule::end_sync() uint32_t max_resource_id_ = 0u; for (Map<ObjectKey, VelocityObjectData>::Item item : velocity_map.items()) { - if (item.value.obj.resource_id == (uint)-1) { + if (item.value.obj.resource_id == (uint32_t)-1) { deleted_obj.append(item.key); } else { @@ -264,14 +271,18 @@ void VelocityModule::end_sync() } if (deleted_obj.size() > 0) { - // inst_.sampling.reset(); + inst_.sampling.reset(); + } + + if (inst_.is_viewport() && camera_has_motion()) { + inst_.sampling.reset(); } - for (auto key : deleted_obj) { + for (auto &key : deleted_obj) { velocity_map.remove(key); } - indirection_buf.resize(power_of_2_max_u(max_resource_id_ + 1)); + indirection_buf.resize(ceil_to_multiple_u(max_resource_id_, 128)); /* Avoid uploading more data to the GPU as well as an extra level of * indirection on the GPU by copying back offsets the to VelocityIndex. */ @@ -300,19 +311,6 @@ void VelocityModule::end_sync() camera_steps[STEP_CURRENT]->push_update(); camera_steps[STEP_NEXT]->push_update(); indirection_buf.push_update(); - - { - resolve_ps_ = DRW_pass_create("Velocity.Resolve", (DRWState)0); - GPUShader *sh = inst_.shaders.static_shader_get(VELOCITY_RESOLVE); - DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_); - DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &input_depth_tx_); - DRW_shgroup_uniform_image_ref(grp, "velocity_view_img", &velocity_view_tx_); - DRW_shgroup_uniform_image_ref(grp, "velocity_camera_img", &velocity_camera_tx_); - DRW_shgroup_uniform_block(grp, "camera_prev", *camera_steps[STEP_PREVIOUS]); - DRW_shgroup_uniform_block(grp, "camera_curr", *camera_steps[STEP_CURRENT]); - DRW_shgroup_uniform_block(grp, "camera_next", *camera_steps[STEP_NEXT]); - DRW_shgroup_call_compute_ref(grp, resolve_dispatch_size_); - } } bool VelocityModule::object_has_velocity(const Object *ob) @@ -359,60 +357,30 @@ void VelocityModule::bind_resources(DRWShadingGroup *grp) DRW_shgroup_storage_block_ref(grp, "velocity_indirection_buf", &indirection_buf); } -/* Resolve pass for static geometry and to camera space projection. */ -void VelocityModule::resolve_camera_motion(GPUTexture *depth_tx, - GPUTexture *velocity_view_tx, - GPUTexture *velocity_camera_tx) +bool VelocityModule::camera_has_motion() const { - input_depth_tx_ = depth_tx; - velocity_view_tx_ = velocity_view_tx; - velocity_camera_tx_ = velocity_camera_tx; - - resolve_dispatch_size_.x = divide_ceil_u(GPU_texture_width(depth_tx), 8); - resolve_dispatch_size_.y = divide_ceil_u(GPU_texture_height(depth_tx), 8); - - DRW_draw_pass(resolve_ps_); -} - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Velocity View - * \{ */ - -void VelocityView::sync() -{ - /* TODO: Remove. */ - velocity_view_tx_.sync(); - velocity_camera_tx_.sync(); -} - -void VelocityView::acquire(int2 extent) -{ - /* WORKAROUND: View name should be unique and static. - * With this, we can reuse the same texture across views. */ - DrawEngineType *owner = (DrawEngineType *)view_name_.c_str(); - - /* Only RG16F when only doing only reprojection or motion blur. */ - eGPUTextureFormat format = inst_.is_viewport() ? GPU_RG16F : GPU_RGBA16F; - velocity_view_tx_.acquire(extent, format, owner); - if (false /* TODO(fclem): Panoramic camera. */) { - velocity_camera_tx_.acquire(extent, format, owner); - } - else { - velocity_camera_tx_.acquire(int2(1), format, owner); + /* Only valid after sync. */ + if (inst_.is_viewport()) { + /* Viewport has no next step. */ + return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT]; } + return *camera_steps[STEP_PREVIOUS] != *camera_steps[STEP_CURRENT] && + *camera_steps[STEP_NEXT] != *camera_steps[STEP_CURRENT]; } -void VelocityView::resolve(GPUTexture *depth_tx) +bool VelocityModule::camera_changed_projection() const { - inst_.velocity.resolve_camera_motion(depth_tx, velocity_view_tx_, velocity_camera_tx_); + /* Only valid after sync. */ + if (inst_.is_viewport()) { + return camera_steps[STEP_PREVIOUS]->type != camera_steps[STEP_CURRENT]->type; + } + /* Cannot happen in render mode since we set the type during the init phase. */ + return false; } -void VelocityView::release() +float VelocityModule::step_time_delta_get(eVelocityStep start, eVelocityStep end) const { - velocity_view_tx_.release(); - velocity_camera_tx_.release(); + return step_time[end] - step_time[start]; } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.hh b/source/blender/draw/engines/eevee_next/eevee_velocity.hh index e2606c061e1..6f18b05d476 100644 --- a/source/blender/draw/engines/eevee_next/eevee_velocity.hh +++ b/source/blender/draw/engines/eevee_next/eevee_velocity.hh @@ -27,8 +27,6 @@ namespace blender::eevee { /** Container for scene velocity data. */ class VelocityModule { - friend class VelocityView; - public: struct VelocityObjectData : public VelocityIndex { /** ID to retrieve the corresponding #VelocityGeometryData after copy. */ @@ -58,6 +56,8 @@ class VelocityModule { int3 object_steps_usage = int3(0); /** Buffer of all #VelocityIndex used in this frame. Indexed by draw manager resource id. */ VelocityIndexBuf indirection_buf; + /** Frame time at which each steps were evaluated. */ + float3 step_time; /** * Copies of camera data. One for previous and one for next time step. @@ -67,16 +67,10 @@ class VelocityModule { private: Instance &inst_; + /** Step being synced. */ eVelocityStep step_ = STEP_CURRENT; - - DRWPass *resolve_ps_ = nullptr; - - /** Reference only. Not owned. */ - GPUTexture *input_depth_tx_; - GPUTexture *velocity_view_tx_; - GPUTexture *velocity_camera_tx_; - - int3 resolve_dispatch_size_ = int3(1, 1, 1); + /** Step referenced as next step. */ + eVelocityStep next_step_ = STEP_NEXT; public: VelocityModule(Instance &inst) : inst_(inst) @@ -111,7 +105,10 @@ class VelocityModule { void step_sync(eVelocityStep step, float time); /* Gather motion data. Returns true if the object **can** have motion. */ - bool step_object_sync(Object *ob, ObjectKey &object_key, int recalc = 0); + bool step_object_sync(Object *ob, + ObjectKey &object_key, + ResourceHandle resource_handle, + int recalc = 0); /* Moves next frame data to previous frame data. Nullify next frame data. */ void step_swap(); @@ -121,56 +118,29 @@ class VelocityModule { void bind_resources(DRWShadingGroup *grp); - private: - bool object_has_velocity(const Object *ob); - bool object_is_deform(const Object *ob); - - void resolve_camera_motion(GPUTexture *depth_tx, - GPUTexture *velocity_view_tx, - GPUTexture *velocity_camera_tx); -}; + template<typename T> void bind_resources(draw::detail::Pass<T> *pass) + { + /* Storage Buf. */ + pass->bind_ssbo(VELOCITY_OBJ_PREV_BUF_SLOT, &(*object_steps[STEP_PREVIOUS])); + pass->bind_ssbo(VELOCITY_OBJ_NEXT_BUF_SLOT, &(*object_steps[next_step_])); + pass->bind_ssbo(VELOCITY_GEO_PREV_BUF_SLOT, &(*geometry_steps[STEP_PREVIOUS])); + pass->bind_ssbo(VELOCITY_GEO_NEXT_BUF_SLOT, &(*geometry_steps[next_step_])); + pass->bind_ssbo(VELOCITY_INDIRECTION_BUF_SLOT, &indirection_buf); + /* Uniform Buf. */ + pass->bind_ubo(VELOCITY_CAMERA_PREV_BUF, &(*camera_steps[STEP_PREVIOUS])); + pass->bind_ubo(VELOCITY_CAMERA_CURR_BUF, &(*camera_steps[STEP_CURRENT])); + pass->bind_ubo(VELOCITY_CAMERA_NEXT_BUF, &(*camera_steps[next_step_])); + } -/** \} */ + bool camera_has_motion() const; + bool camera_changed_projection() const; -/* -------------------------------------------------------------------- */ -/** \name Velocity - * - * \{ */ + /* Returns frame time difference between two steps. */ + float step_time_delta_get(eVelocityStep start, eVelocityStep end) const; -/** - * Per view module. - */ -class VelocityView { private: - Instance &inst_; - - StringRefNull view_name_; - - TextureFromPool velocity_camera_tx_ = {"velocity_camera_tx_"}; - TextureFromPool velocity_view_tx_ = {"velocity_view_tx_"}; - - public: - VelocityView(Instance &inst, const char *name) : inst_(inst), view_name_(name){}; - ~VelocityView(){}; - - void sync(); - - void acquire(int2 extent); - void release(); - - void resolve(GPUTexture *depth_tx); - - /** - * Getters - **/ - GPUTexture *view_vectors_get() const - { - return velocity_view_tx_; - } - GPUTexture *camera_vectors_get() const - { - return (velocity_camera_tx_.is_valid()) ? velocity_camera_tx_ : velocity_view_tx_; - } + bool object_has_velocity(const Object *ob); + bool object_is_deform(const Object *ob); }; /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc index e21342c5ef6..48951c2bae7 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.cc +++ b/source/blender/draw/engines/eevee_next/eevee_view.cc @@ -34,17 +34,19 @@ void ShadingView::init() // mb_.init(); } -void ShadingView::sync(int2 render_extent_) +void ShadingView::sync() { + int2 render_extent = inst_.film.render_extent_get(); + if (false /* inst_.camera.is_panoramic() */) { - int64_t render_pixel_count = render_extent_.x * (int64_t)render_extent_.y; + int64_t render_pixel_count = render_extent.x * (int64_t)render_extent.y; /* Divide pixel count between the 6 views. Rendering to a square target. */ extent_[0] = extent_[1] = ceilf(sqrtf(1 + (render_pixel_count / 6))); /* TODO(@fclem): Clip unused views here. */ is_enabled_ = true; } else { - extent_ = render_extent_; + extent_ = render_extent; /* Only enable -Z view. */ is_enabled_ = (StringRefNull(name_) == "negZ_view"); } @@ -54,47 +56,34 @@ void ShadingView::sync(int2 render_extent_) } /* Create views. */ - // const CameraData &data = inst_.camera.data_get(); + const CameraData &cam = inst_.camera.data_get(); float4x4 viewmat, winmat; const float(*viewmat_p)[4] = viewmat.ptr(), (*winmat_p)[4] = winmat.ptr(); -#if 0 if (false /* inst_.camera.is_panoramic() */) { /* TODO(@fclem) Over-scans. */ /* For now a mandatory 5% over-scan for DoF. */ - float side = data.clip_near * 1.05f; - float near = data.clip_near; - float far = data.clip_far; + float side = cam.clip_near * 1.05f; + float near = cam.clip_near; + float far = cam.clip_far; perspective_m4(winmat.ptr(), -side, side, -side, side, near, far); - viewmat = face_matrix_ * data.viewmat; + viewmat = face_matrix_ * cam.viewmat; } else { - viewmat_p = data.viewmat.ptr(); - winmat_p = data.winmat.ptr(); + viewmat_p = cam.viewmat.ptr(); + winmat_p = cam.winmat.ptr(); } -#else - /* TEMP */ - UNUSED_VARS(face_matrix_); - const DRWView *default_view = DRW_view_default_get(); - DRW_view_winmat_get(default_view, winmat.ptr(), false); - DRW_view_viewmat_get(default_view, viewmat.ptr(), false); -#endif main_view_ = DRW_view_create(viewmat_p, winmat_p, nullptr, nullptr, nullptr); sub_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p); render_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p); // dof_.sync(winmat_p, extent_); - // mb_.sync(extent_); - velocity_.sync(); // rt_buffer_opaque_.sync(extent_); // rt_buffer_refract_.sync(extent_); // inst_.hiz_back.view_sync(extent_); // inst_.hiz_front.view_sync(extent_); // inst_.gbuffer.view_sync(extent_); - - combined_tx_.sync(); - postfx_tx_.sync(); } void ShadingView::render() @@ -103,29 +92,25 @@ void ShadingView::render() return; } - /* Query temp textures and create framebuffers. */ - /* HACK: View name should be unique and static. - * With this, we can reuse the same texture across views. */ - DrawEngineType *owner = (DrawEngineType *)name_; - - DefaultTextureList *dtxl = DRW_viewport_texture_list_get(); - - depth_tx_.ensure_2d(GPU_DEPTH24_STENCIL8, extent_); - combined_tx_.acquire(extent_, GPU_RGBA16F, owner); - velocity_.acquire(extent_); - // combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), GPU_ATTACHMENT_TEXTURE(combined_tx_)); - // prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(depth_tx_), - // GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get())); - combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth), GPU_ATTACHMENT_TEXTURE(dtxl->color)); - prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(dtxl->depth), - GPU_ATTACHMENT_TEXTURE(velocity_.view_vectors_get())); + /* Query temp textures and create frame-buffers. */ + RenderBuffers &rbufs = inst_.render_buffers; + rbufs.acquire(extent_); + combined_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx), + GPU_ATTACHMENT_TEXTURE(rbufs.combined_tx)); + prepass_fb_.ensure(GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx), + GPU_ATTACHMENT_TEXTURE(rbufs.vector_tx)); update_view(); + inst_.hiz_buffer.set_dirty(); + DRW_stats_group_start(name_); - // DRW_view_set_active(render_view_); + DRW_view_set_active(render_view_); + + /* If camera has any motion, compute motion vector in the film pass. Otherwise, we avoid float + * precision issue by setting the motion of all static geometry to 0. */ + float4 clear_velocity = float4(inst_.velocity.camera_has_motion() ? VELOCITY_INVALID : 0.0f); - float4 clear_velocity(VELOCITY_INVALID); GPU_framebuffer_bind(prepass_fb_); GPU_framebuffer_clear_color(prepass_fb_, clear_velocity); /* Alpha stores transmittance. So start at 1. */ @@ -133,7 +118,10 @@ void ShadingView::render() GPU_framebuffer_bind(combined_fb_); GPU_framebuffer_clear_color_depth(combined_fb_, clear_color, 1.0f); - inst_.pipelines.world.render(); + inst_.pipelines.world.render(render_view_new_); + + /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */ + inst_.lights.set_view(render_view_new_, extent_); // inst_.pipelines.deferred.render( // render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_); @@ -142,52 +130,36 @@ void ShadingView::render() // inst_.lookdev.render_overlay(view_fb_); - inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, depth_tx_, combined_tx_); + inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx); - // inst_.lights.debug_draw(view_fb_); - // inst_.shadows.debug_draw(view_fb_); + inst_.lights.debug_draw(render_view_new_, combined_fb_); + inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_); - // velocity_.resolve(depth_tx_); - velocity_.resolve(dtxl->depth); + GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx); - // if (inst_.render_passes.vector) { - // inst_.render_passes.vector->accumulate(velocity_.camera_vectors_get(), sub_view_); - // } + inst_.film.accumulate(sub_view_, combined_final_tx); - // GPUTexture *final_radiance_tx = render_post(combined_tx_); + // inst_.shadows.debug_draw(); - // if (inst_.render_passes.combined) { - // inst_.render_passes.combined->accumulate(final_radiance_tx, sub_view_); - // } - - // if (inst_.render_passes.depth) { - // inst_.render_passes.depth->accumulate(depth_tx_, sub_view_); - // } + rbufs.release(); + postfx_tx_.release(); DRW_stats_group_end(); - - combined_tx_.release(); - postfx_tx_.release(); - velocity_.release(); } -GPUTexture *ShadingView::render_post(GPUTexture *input_tx) +GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx) { -#if 0 - if (!dof_.postfx_enabled() && !mb_.enabled()) { + if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) { return input_tx; } - /* HACK: View name should be unique and static. - * With this, we can reuse the same texture across views. */ - postfx_tx_.acquire(extent_, GPU_RGBA16F, (void *)name_); + postfx_tx_.acquire(extent_, GPU_RGBA16F); - GPUTexture *velocity_tx = velocity_.view_vectors_get(); GPUTexture *output_tx = postfx_tx_; /* Swapping is done internally. Actual output is set to the next input. */ - dof_.render(depth_tx_, &input_tx, &output_tx); - mb_.render(depth_tx_, velocity_tx, &input_tx, &output_tx); -#endif + inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_); + inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx); + return input_tx; } @@ -197,20 +169,25 @@ void ShadingView::update_view() DRW_view_viewmat_get(main_view_, viewmat.ptr(), false); DRW_view_winmat_get(main_view_, winmat.ptr(), false); + /* TODO(fclem): Mixed-resolution rendering: We need to make sure we render with exactly the same + * distances between pixels to line up render samples and target pixels. + * So if the target resolution is not a multiple of the resolution divisor, we need to make the + * projection window bigger in the +X and +Y directions. */ + /* Anti-Aliasing / Super-Sampling jitter. */ - // float jitter_u = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_U) - 0.5f) / extent_[0]; - // float jitter_v = 2.0f * (inst_.sampling.rng_get(SAMPLING_FILTER_V) - 0.5f) / extent_[1]; + float2 jitter = inst_.film.pixel_jitter_get() / float2(extent_); + /* Transform to NDC space. */ + jitter *= 2.0f; - // window_translate_m4(winmat.ptr(), winmat.ptr(), jitter_u, jitter_v); + window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter)); DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr()); - /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop + /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop * out of the blurring radius. To fix this, use custom enlarged culling matrix. */ - // dof_.jitter_apply(winmat, viewmat); + inst_.depth_of_field.jitter_apply(winmat, viewmat); DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr()); - // inst_.lightprobes.set_view(render_view_, extent_); - // inst_.lights.set_view(render_view_, extent_, !inst_.use_scene_lights()); + render_view_new_.sync(viewmat, winmat); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh index fb74412f557..74e513357cd 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.hh +++ b/source/blender/draw/engines/eevee_next/eevee_view.hh @@ -41,19 +41,13 @@ class ShadingView { /** Matrix to apply to the viewmat. */ const float (*face_matrix_)[4]; - /** Post-FX modules. */ - // DepthOfField dof_; - // MotionBlur mb_; - VelocityView velocity_; - /** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */ // RaytraceBuffer rt_buffer_opaque_; // RaytraceBuffer rt_buffer_refract_; + DepthOfFieldBuffer dof_buffer_; Framebuffer prepass_fb_; Framebuffer combined_fb_; - Texture depth_tx_; - TextureFromPool combined_tx_; TextureFromPool postfx_tx_; /** Main views is created from the camera (or is from the viewport). It is not jittered. */ @@ -63,6 +57,7 @@ class ShadingView { DRWView *sub_view_ = nullptr; /** Same as sub_view_ but has Depth Of Field jitter applied. */ DRWView *render_view_ = nullptr; + View render_view_new_; /** Render size of the view. Can change between scene sample eval. */ int2 extent_ = {-1, -1}; @@ -71,17 +66,17 @@ class ShadingView { public: ShadingView(Instance &inst, const char *name, const float (*face_matrix)[4]) - : inst_(inst), name_(name), face_matrix_(face_matrix), velocity_(inst, name){}; + : inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){}; ~ShadingView(){}; void init(); - void sync(int2 render_extent_); + void sync(); void render(); - GPUTexture *render_post(GPUTexture *input_tx); + GPUTexture *render_postfx(GPUTexture *input_tx); private: void update_view(); @@ -94,7 +89,7 @@ class ShadingView { * * Container for all views needed to render the final image. * We might need up to 6 views for panoramic cameras. - * All views are always available but only enabled for if need. + * All views are always available but only enabled for if needed. * \{ */ class MainView { @@ -109,8 +104,6 @@ class MainView { ShadingView shading_views_4; ShadingView shading_views_5; #define shading_views_ (&shading_views_0) - /** Internal render size. */ - int render_extent_[2]; public: MainView(Instance &inst) @@ -123,15 +116,8 @@ class MainView { { } - void init(const int2 full_extent_) + void init() { - /* TODO(fclem) parameter hidden in experimental. We need to figure out mipmap bias to preserve - * texture crispiness. */ - float resolution_scale = 1.0f; - for (int i = 0; i < 2; i++) { - render_extent_[i] = max_ii(1, roundf(full_extent_[i] * resolution_scale)); - } - for (auto i : IndexRange(6)) { shading_views_[i].init(); } @@ -140,7 +126,7 @@ class MainView { void sync() { for (auto i : IndexRange(6)) { - shading_views_[i].sync(render_extent_); + shading_views_[i].sync(); } } diff --git a/source/blender/draw/engines/eevee_next/eevee_world.cc b/source/blender/draw/engines/eevee_next/eevee_world.cc index b9cb24fe30a..313c0bda42e 100644 --- a/source/blender/draw/engines/eevee_next/eevee_world.cc +++ b/source/blender/draw/engines/eevee_next/eevee_world.cc @@ -42,10 +42,10 @@ DefaultWorldNodeTree::~DefaultWorldNodeTree() MEM_SAFE_FREE(ntree_); } -/* Configure a default nodetree with the given world. */ +/* Configure a default node-tree with the given world. */ bNodeTree *DefaultWorldNodeTree::nodetree_get(::World *wo) { - /* WARNING: This function is not threadsafe. Which is not a problem for the moment. */ + /* WARNING: This function is not thread-safe. Which is not a problem for the moment. */ copy_v3_fl3(color_socket_->value, wo->horr, wo->horg, wo->horb); return ntree_; } @@ -79,7 +79,7 @@ void World::sync() /* TODO(fclem) This should be detected to scene level. */ ::World *orig_world = (::World *)DEG_get_original_id(&bl_world->id); if (assign_if_different(prev_original_world, orig_world)) { - // inst_.sampling.reset(); + inst_.sampling.reset(); } bNodeTree *ntree = (bl_world->nodetree && bl_world->use_nodes) ? diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl index a65bb7decb6..6fe5fa01fa3 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_attributes_lib.glsl @@ -3,6 +3,8 @@ #pragma BLENDER_REQUIRE(common_math_lib.glsl) #pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl) +#define EEVEE_ATTRIBUTE_LIB + #if defined(MAT_GEOM_MESH) /* -------------------------------------------------------------------- */ @@ -131,7 +133,7 @@ int g_curves_attr_id = 0; int curves_attribute_element_id() { int id = interp.curves_strand_id; - if (drw_curves.is_point_attribute[g_curves_attr_id] != 0) { + if (drw_curves.is_point_attribute[g_curves_attr_id][0] != 0) { # ifdef COMMON_HAIR_LIB id = hair_get_base_id(); # endif @@ -282,43 +284,3 @@ vec3 attr_load_uv(vec3 attr) /** \} */ #endif - -/* -------------------------------------------------------------------- */ -/** \name Volume Attribute post - * - * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on - * the engine side. But as of now, the engines are responsible for loading the attributes. - * - * \{ */ - -#if defined(MAT_GEOM_VOLUME) - -float attr_load_temperature_post(float attr) -{ - /* Bring the into standard range without having to modify the grid values */ - attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0; - return attr; -} -vec4 attr_load_color_post(vec4 attr) -{ - /* Density is premultiplied for interpolation, divide it out here. */ - attr.rgb *= safe_rcp(attr.a); - attr.rgb *= drw_volume.color_mul.rgb; - attr.a = 1.0; - return attr; -} - -#else /* Noop for any other surface. */ - -float attr_load_temperature_post(float attr) -{ - return attr; -} -vec4 attr_load_color_post(vec4 attr) -{ - return attr; -} - -#endif - -/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl index f79e9102d76..2611f714b59 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_camera_lib.glsl @@ -143,24 +143,10 @@ vec2 camera_uv_from_view(CameraData cam, vec3 vV) } } -vec2 camera_uv_from_world(CameraData cam, vec3 V) +vec2 camera_uv_from_world(CameraData cam, vec3 P) { - vec3 vV = transform_point(cam.viewmat, V); - switch (cam.type) { - default: - case CAMERA_ORTHO: - return camera_uv_from_view(cam.persmat, false, V); - case CAMERA_PERSP: - return camera_uv_from_view(cam.persmat, true, V); - case CAMERA_PANO_EQUIRECT: - return camera_equirectangular_from_direction(cam, vV); - case CAMERA_PANO_EQUISOLID: - /* ATTR_FALLTHROUGH; */ - case CAMERA_PANO_EQUIDISTANT: - return camera_fisheye_from_direction(cam, vV); - case CAMERA_PANO_MIRROR: - return camera_mirror_ball_from_direction(cam, vV); - } + vec3 vV = transform_direction(cam.viewmat, normalize(P)); + return camera_uv_from_view(cam, vV); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl new file mode 100644 index 00000000000..d5fdaae6fc1 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_colorspace_lib.glsl @@ -0,0 +1,37 @@ + +/* -------------------------------------------------------------------- */ +/** \name YCoCg + * \{ */ + +vec3 colorspace_YCoCg_from_scene_linear(vec3 rgb_color) +{ + const mat3 colorspace_tx = transpose(mat3(vec3(1, 2, 1), /* Y */ + vec3(2, 0, -2), /* Co */ + vec3(-1, 2, -1))); /* Cg */ + return colorspace_tx * rgb_color; +} + +vec4 colorspace_YCoCg_from_scene_linear(vec4 rgba_color) +{ + return vec4(colorspace_YCoCg_from_scene_linear(rgba_color.rgb), rgba_color.a); +} + +vec3 colorspace_scene_linear_from_YCoCg(vec3 ycocg_color) +{ + float Y = ycocg_color.x; + float Co = ycocg_color.y; + float Cg = ycocg_color.z; + + vec3 rgb_color; + rgb_color.r = Y + Co - Cg; + rgb_color.g = Y + Cg; + rgb_color.b = Y - Co - Cg; + return rgb_color * 0.25; +} + +vec4 colorspace_scene_linear_from_YCoCg(vec4 ycocg_color) +{ + return vec4(colorspace_scene_linear_from_YCoCg(ycocg_color.rgb), ycocg_color.a); +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl new file mode 100644 index 00000000000..e874a6b56ea --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_cryptomatte_lib.glsl @@ -0,0 +1,70 @@ +/** Storing/merging and sorting cryptomatte samples. */ + +bool cryptomatte_can_merge_sample(vec2 dst, vec2 src) +{ + if (dst == vec2(0.0, 0.0)) { + return true; + } + if (dst.x == src.x) { + return true; + } + return false; +} + +vec2 cryptomatte_merge_sample(vec2 dst, vec2 src) +{ + return vec2(src.x, dst.y + src.y); +} + +vec4 cryptomatte_false_color(float hash) +{ + uint m3hash = floatBitsToUint(hash); + return vec4(hash, + float(m3hash << 8) / float(0xFFFFFFFFu), + float(m3hash << 16) / float(0xFFFFFFFFu), + 1.0); +} + +void cryptomatte_clear_samples(FilmSample dst) +{ + int layer_len = imageSize(cryptomatte_img).z; + for (int i = 0; i < layer_len; i++) { + imageStore(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0)); + } +} + +void cryptomatte_store_film_sample(FilmSample dst, + int cryptomatte_layer_id, + vec2 crypto_sample, + out vec4 out_color) +{ + if (crypto_sample.y == 0.0) { + return; + } + for (int i = 0; i < film_buf.cryptomatte_samples_len / 2; i++) { + ivec3 img_co = ivec3(dst.texel, cryptomatte_layer_id + i); + vec4 sample_pair = imageLoad(cryptomatte_img, img_co); + if (cryptomatte_can_merge_sample(sample_pair.xy, crypto_sample)) { + sample_pair.xy = cryptomatte_merge_sample(sample_pair.xy, crypto_sample); + /* In viewport only one layer is active. */ + /* TODO(jbakker): we are displaying the first sample, but we should display the highest + * weighted one. */ + if (cryptomatte_layer_id + i == 0) { + out_color = cryptomatte_false_color(sample_pair.x); + } + } + else if (cryptomatte_can_merge_sample(sample_pair.zw, crypto_sample)) { + sample_pair.zw = cryptomatte_merge_sample(sample_pair.zw, crypto_sample); + } + else if (i == film_buf.cryptomatte_samples_len / 2 - 1) { + /* TODO(jbakker): New hash detected, but there is no space left to store it. Currently we + * will ignore this sample, but ideally we could replace a sample with a lowest weight. */ + continue; + } + else { + continue; + } + imageStore(cryptomatte_img, img_co, sample_pair); + break; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl new file mode 100644 index 00000000000..99a47c541e9 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl @@ -0,0 +1,680 @@ + +/** + * Depth of Field Gather accumulator. + * We currently have only 2 which are very similar. + * One is for the halfres gather passes and the other one for slight in focus regions. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* -------------------------------------------------------------------- */ +/** \name Options. + * \{ */ + +/* Quality options */ +#ifdef DOF_HOLEFILL_PASS +/* No need for very high density for hole_fill. */ +const int gather_ring_count = 3; +const int gather_ring_density = 3; +const int gather_max_density_change = 0; +const int gather_density_change_ring = 1; +#else +const int gather_ring_count = DOF_GATHER_RING_COUNT; +const int gather_ring_density = 3; +const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */ +const int gather_density_change_ring = 1; +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Constants. + * \{ */ + +const float unit_ring_radius = 1.0 / float(gather_ring_count); +const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5); +const float large_kernel_radius = 0.5 + float(gather_ring_count); +const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring); +/* NOTE(fclem) the bias is reducing issues with density change visible transition. */ +const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius; +const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1); +const float coc_radius_error = 2.0; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather common. + * \{ */ + +struct DofGatherData { + vec4 color; + float weight; + float dist; /* TODO remove */ + /* For scatter occlusion. */ + float coc; + float coc_sqr; + /* For ring bucket merging. */ + float transparency; + + float layer_opacity; +}; + +#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + +/* Intersection with the center of the kernel. */ +float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier) +{ + if (no_smooth_intersection) { + return step(0.0, (abs(coc) - distance_from_center)); + } + else { + /* (Slide 64). */ + return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5); + } +} + +/* Returns weight of the sample for the outer bucket (containing previous + * rings). */ +float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring) +{ + /* First ring has nothing to be mixed against. */ + if (first_ring) { + return 0.0; + } + return saturate(coc - bordering_radius); +} + +void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight) +{ + sample_data.color *= weight; + sample_data.coc *= weight; + sample_data.coc_sqr *= weight; + sample_data.weight *= weight; +} + +void dof_gather_accumulate_sample(DofGatherData sample_data, + float weight, + inout DofGatherData accum_data) +{ + accum_data.color += sample_data.color * weight; + accum_data.coc += sample_data.coc * weight; + accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight); + accum_data.weight += weight; +} + +void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2], + float bordering_radius, + float intersection_multiplier, + bool first_ring, + const bool do_fast_gather, + const bool is_foreground, + inout DofGatherData ring_data, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + for (int i = 0; i < 2; i++) { + dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data); + accum_data.layer_opacity += 1.0; + } + return; + } + +#if 0 + const float mirroring_threshold = -dof_layer_threshold - dof_layer_offset; + /* TODO(fclem) Promote to parameter? dither with Noise? */ + const float mirroring_min_distance = 15.0; + if (pair_data[0].coc < mirroring_threshold && + (pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) { + pair_data[1].coc = pair_data[0].coc; + } + else if (pair_data[1].coc < mirroring_threshold && + (pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) { + pair_data[0].coc = pair_data[1].coc; + } +#endif + + for (int i = 0; i < 2; i++) { + float sample_weight = dof_sample_weight(pair_data[i].coc); + float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground); + float inter_weight = dof_intersection_weight( + pair_data[i].coc, pair_data[i].dist, intersection_multiplier); + float weight = inter_weight * layer_weight * sample_weight; + + /** + * If a CoC is larger than bordering radius we accumulate it to the general accumulator. + * If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion. + **/ + float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring); + dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data); + dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data); + + accum_data.layer_opacity += layer_weight; + + if (is_foreground) { + ring_data.transparency += 1.0 - inter_weight * layer_weight; + } + else { + float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc; + ring_data.transparency += saturate(coc - bordering_radius); + } + } +} + +void dof_gather_accumulate_sample_ring(DofGatherData ring_data, + int sample_count, + bool first_ring, + const bool do_fast_gather, + /* accum_data occludes the ring_data if true. */ + const bool reversed_occlusion, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + /* Do nothing as ring_data contains nothing. All samples are already in + * accum_data. */ + return; + } + + if (first_ring) { + /* Layer opacity is directly accumulated into accum_data data. */ + accum_data.color = ring_data.color; + accum_data.coc = ring_data.coc; + accum_data.coc_sqr = ring_data.coc_sqr; + accum_data.weight = ring_data.weight; + + accum_data.transparency = ring_data.transparency / float(sample_count); + return; + } + + if (ring_data.weight == 0.0) { + return; + } + + float ring_avg_coc = ring_data.coc / ring_data.weight; + float accum_avg_coc = accum_data.coc / accum_data.weight; + + /* Smooth test to set opacity to see if the ring average coc occludes the + * accumulation. Test is reversed to be multiplied against opacity. */ + float ring_occlu = saturate(accum_avg_coc - ring_avg_coc); + /* The bias here is arbitrary. Seems to avoid weird looking foreground in most + * cases. We might need to make it a parameter or find a relative bias. */ + float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0); + + if (is_resolve) { + ring_occlu = accum_occlu = 0.0; + } + + if (no_gather_occlusion) { + ring_occlu = 0.0; + accum_occlu = 0.0; + } + + /* (Slide 40) */ + float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count)); + float accum_opacity = 1.0 - accum_data.transparency; + + if (reversed_occlusion) { + /* Accum_data occludes the ring. */ + float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu; + float one_minus_alpha = 1.0 - alpha; + + accum_data.color += ring_data.color * one_minus_alpha; + accum_data.coc += ring_data.coc * one_minus_alpha; + accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha; + accum_data.weight += ring_data.weight * one_minus_alpha; + + accum_data.transparency *= 1.0 - ring_opacity; + } + else { + /* Ring occludes the accum_data (Same as reference). */ + float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu); + float one_minus_alpha = 1.0 - alpha; + + accum_data.color = accum_data.color * one_minus_alpha + ring_data.color; + accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc; + accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr; + accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight; + } +} + +/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for + * slightfocus gather. */ +/* This should be replaced by web_sample_count_get() but doing so is breaking other things. */ +int dof_gather_total_sample_count(const int ring_count, const int ring_density) +{ + return (ring_count * ring_count - ring_count) * ring_density + 1; +} + +void dof_gather_accumulate_center_sample(DofGatherData center_data, + float bordering_radius, + int i_radius, + const bool do_fast_gather, + const bool is_foreground, + const bool is_resolve, + inout DofGatherData accum_data) +{ + float layer_weight = dof_layer_weight(center_data.coc, is_foreground); + float sample_weight = dof_sample_weight(center_data.coc); + float weight = layer_weight * sample_weight; + float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false); + + if (do_fast_gather) { + /* Hope for the compiler to optimize the above. */ + layer_weight = 1.0; + sample_weight = 1.0; + accum_weight = 1.0; + weight = 1.0; + } + + center_data.transparency = 1.0 - weight; + + dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data); + + if (!do_fast_gather) { + if (is_resolve) { + /* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */ + int total_sample_count = dof_gather_total_sample_count(i_radius + 1, + DOF_SLIGHT_FOCUS_DENSITY); + float fac = saturate(1.0 - abs(center_data.coc) / float(dof_layer_threshold)); + accum_data.layer_opacity += float(total_sample_count) * fac * fac; + } + accum_data.layer_opacity += layer_weight; + + /* Logic of dof_gather_accumulate_sample(). */ + weight *= (1.0 - accum_weight); + center_data.coc_sqr = center_data.coc * (center_data.coc * weight); + center_data.color *= weight; + center_data.coc *= weight; + center_data.weight = weight; + + if (is_foreground && !is_resolve) { + /* Reduce issue with closer foreground over distant foreground. */ + float ring_area = sqr(bordering_radius); + dof_gather_ammend_weight(center_data, ring_area); + } + + /* Accumulate center as its own ring. */ + dof_gather_accumulate_sample_ring( + center_data, 1, false, do_fast_gather, is_foreground, accum_data); + } +} + +int dof_gather_total_sample_count_with_density_change(const int ring_count, + const int ring_density, + int density_change) +{ + int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) - + dof_gather_total_sample_count( + ring_count - gather_density_change_ring, ring_density); + + return dof_gather_total_sample_count(ring_count, ring_density) + + sample_count_per_density_change * density_change; +} + +void dof_gather_accumulate_resolve(int total_sample_count, + DofGatherData accum_data, + out vec4 out_col, + out float out_weight, + out vec2 out_occlusion) +{ + float weight_inv = safe_rcp(accum_data.weight); + out_col = accum_data.color * weight_inv; + out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv; + + if (is_foreground) { + out_weight = 1.0 - accum_data.transparency; + } + else if (accum_data.weight > 0.0) { + out_weight = accum_data.layer_opacity / float(total_sample_count); + } + else { + out_weight = 0.0; + } + /* Gathering may not accumulate to 1.0 alpha because of float precision. */ + if (out_weight > 0.99) { + out_weight = 1.0; + } + else if (out_weight < 0.01) { + out_weight = 0.0; + } + /* Same thing for alpha channel. */ + if (out_col.a > 0.993) { + out_col.a = 1.0; + } + else if (out_col.a < 0.003) { + out_col.a = 0.0; + } +} + +float dof_load_gather_coc(sampler2D gather_input_coc_tx, vec2 uv, float lod) +{ + float coc = textureLod(gather_input_coc_tx, uv, lod).r; + /* We gather at halfres. CoC must be divided by 2 to be compared against radii. */ + return coc * 0.5; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Common Gather accumulator. + * \{ */ + +/* Radii needs to be halfres CoC sizes. */ +bool dof_do_density_change(float base_radius, float min_intersectable_radius) +{ + /* Reduce artifact for very large blur. */ + min_intersectable_radius *= 0.1; + + bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius); + bool larger_than_min_density = (base_radius * radius_downscale_factor > + float(gather_ring_count)); + + return need_new_density && larger_than_min_density; +} + +void dof_gather_init(float base_radius, + vec2 noise, + out vec2 center_co, + out float lod, + out float intersection_multiplier) +{ + /* Jitter center half a ring to reduce undersampling. */ + vec2 jitter_ofs = 0.499 * sample_disk(noise); + if (DOF_BOKEH_TEXTURE) { + jitter_ofs *= dof_buf.bokeh_anisotropic_scale; + } + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + center_co = frag_coord + jitter_ofs * base_radius * unit_sample_radius; + + /* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving out + * of focus shapes. */ + const float lod_bias = -2.0; + lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0); + + if (no_gather_mipmaps) { + lod = 0.0; + } + /* (Slide 64). */ + intersection_multiplier = pow(0.5, lod); +} + +void dof_gather_accumulator(sampler2D color_tx, + sampler2D color_bilinear_tx, + sampler2D coc_tx, + sampler2D bkh_lut_tx, /* Renamed because of ugly macro. */ + float base_radius, + float min_intersectable_radius, + const bool do_fast_gather, + const bool do_density_change, + out vec4 out_color, + out float out_weight, + out vec2 out_occlusion) +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy); + vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U); + vec2 noise = no_gather_random ? vec2(0.0, 0.0) : + vec2(interlieved_gradient_noise(frag_coord, 0, noise_offset.x), + interlieved_gradient_noise(frag_coord, 1, noise_offset.y)); + + if (!do_fast_gather) { + /* Jitter the radius to reduce noticeable density changes. */ + base_radius += noise.x * unit_ring_radius * base_radius; + } + else { + /* Jittering the radius more than we need means we are going to feather the bokeh shape half a + * ring. So we need to compensate for fast gather that does not check CoC intersection. */ + base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius; + } + /* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */ + noise.x = fract(noise.x * 6.1803398875); + + float lod, isect_mul; + vec2 center_co; + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + + bool first_ring = true; + + DofGatherData accum_data = GATHER_DATA_INIT; + + int density_change = 0; + for (int ring = gather_ring_count; ring > 0; ring--) { + int sample_pair_count = gather_ring_density * ring; + + float step_rot = M_PI / float(sample_pair_count); + mat2 step_rot_mat = rot2_from_angle(step_rot); + + float angle_offset = noise.y * step_rot; + vec2 offset = vec2(cos(angle_offset), sin(angle_offset)); + + float ring_radius = float(ring) * unit_sample_radius * base_radius; + + /* Slide 38. */ + float bordering_radius = ring_radius + + (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + DofGatherData ring_data = GATHER_DATA_INIT; + for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) { + offset = step_rot_mat * offset; + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + vec2 offset_co = ((i == 0) ? offset : -offset); + if (DOF_BOKEH_TEXTURE) { + /* Scaling to 0.25 for speed. Improves texture cache hit. */ + offset_co = texture(bkh_lut_tx, offset_co * 0.25 + 0.5).rg; + offset_co *= (is_foreground) ? -dof_buf.bokeh_anisotropic_scale : + dof_buf.bokeh_anisotropic_scale; + } + vec2 sample_co = center_co + offset_co * ring_radius; + vec2 sample_uv = sample_co * dof_buf.gather_uv_fac; + if (do_fast_gather) { + pair_data[i].color = textureLod(color_bilinear_tx, sample_uv, lod); + } + else { + pair_data[i].color = textureLod(color_tx, sample_uv, lod); + } + pair_data[i].coc = dof_load_gather_coc(coc_tx, sample_uv, lod); + pair_data[i].dist = ring_radius; + } + + dof_gather_accumulate_sample_pair(pair_data, + bordering_radius, + isect_mul, + first_ring, + do_fast_gather, + is_foreground, + ring_data, + accum_data); + } + + if (is_foreground) { + /* Reduce issue with closer foreground over distant foreground. */ + /* TODO(fclem) this seems to not be completely correct as the issue remains. */ + float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) - + sqr(float(ring) - 0.5 + coc_radius_error)) * + sqr(base_radius * unit_sample_radius); + dof_gather_ammend_weight(ring_data, ring_area); + } + + dof_gather_accumulate_sample_ring( + ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data); + + first_ring = false; + + if (do_density_change && (ring == change_density_at_ring) && + (density_change < gather_max_density_change)) { + if (dof_do_density_change(base_radius, min_intersectable_radius)) { + base_radius *= radius_downscale_factor; + ring += gather_density_change_ring; + /* We need to account for the density change in the weights (slide 62). + * For that multiply old kernel data by its area divided by the new kernel area. */ + const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor); + /* Samples are already weighted per ring in foreground pass. */ + if (!is_foreground) { + dof_gather_ammend_weight(accum_data, outer_rings_weight); + } + /* Re-init kernel position & sampling parameters. */ + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + density_change++; + } + } + } + + { + /* Center sample. */ + vec2 sample_uv = center_co * dof_buf.gather_uv_fac; + DofGatherData center_data; + if (do_fast_gather) { + center_data.color = textureLod(color_bilinear_tx, sample_uv, lod); + } + else { + center_data.color = textureLod(color_tx, sample_uv, lod); + } + center_data.coc = dof_load_gather_coc(coc_tx, sample_uv, lod); + center_data.dist = 0.0; + + /* Slide 38. */ + float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, 0, do_fast_gather, is_foreground, false, accum_data); + } + + int total_sample_count = dof_gather_total_sample_count_with_density_change( + gather_ring_count, gather_ring_density, density_change); + dof_gather_accumulate_resolve( + total_sample_count, accum_data, out_color, out_weight, out_occlusion); + + if (debug_gather_perf && density_change > 0) { + float fac = saturate(float(density_change) / float(10.0)); + out_color.rgb = avg(out_color.rgb) * neon_gradient(fac); + } + if (debug_gather_perf && do_fast_gather) { + out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0); + } + if (debug_scatter_perf) { + out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0); + } + + /* Output premultiplied color so we can use bilinear sampler in resolve pass. */ + out_color *= out_weight; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Slight focus accumulator. + * + * The full pixel neighborhood is gathered. + * \{ */ + +void dof_slight_focus_gather(sampler2D depth_tx, + sampler2D color_tx, + sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */ + float radius, + out vec4 out_color, + out float out_weight, + out float out_center_coc) +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U); + vec2 noise = no_gather_random ? vec2(0.0) : + vec2(interlieved_gradient_noise(frag_coord, 3, noise_offset.x), + interlieved_gradient_noise(frag_coord, 5, noise_offset.y)); + + DofGatherData fg_accum = GATHER_DATA_INIT; + DofGatherData bg_accum = GATHER_DATA_INIT; + + int i_radius = clamp(int(radius), 0, int(dof_layer_threshold)); + + const float sample_count_max = float(DOF_SLIGHT_FOCUS_SAMPLE_MAX); + /* Scale by search area. */ + float sample_count = sample_count_max * saturate(sqr(radius) / sqr(dof_layer_threshold)); + + bool first_ring = true; + + for (float s = 0.0; s < sample_count; s++) { + vec2 rand2 = fract(hammersley_2d(s, sample_count) + noise); + vec2 offset = sample_disk(rand2); + float ring_dist = sqrt(rand2.y); + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + vec2 sample_offset = ((i == 0) ? offset : -offset); + /* OPTI: could precompute the factor. */ + vec2 sample_uv = (frag_coord + sample_offset) / vec2(textureSize(depth_tx, 0)); + float depth = textureLod(depth_tx, sample_uv, 0.0).r; + pair_data[i].coc = dof_coc_from_depth(dof_buf, sample_uv, depth); + pair_data[i].color = safe_color(textureLod(color_tx, sample_uv, 0.0)); + pair_data[i].dist = ring_dist; + if (DOF_BOKEH_TEXTURE) { + /* Contains subpixel distance to bokeh shape. */ + ivec2 lut_texel = ivec2(round(sample_offset)) + dof_max_slight_focus_radius; + pair_data[i].dist = texelFetch(bkh_lut_tx, lut_texel, 0).r; + } + pair_data[i].coc = clamp(pair_data[i].coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + } + + float bordering_radius = ring_dist + 0.5; + const float isect_mul = 1.0; + DofGatherData bg_ring = GATHER_DATA_INIT; + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum); + /* Treat each sample as a ring. */ + dof_gather_accumulate_sample_ring(bg_ring, 2, first_ring, false, false, bg_accum); + + if (DOF_BOKEH_TEXTURE) { + /* Swap distances in order to flip bokeh shape for foreground. */ + float tmp = pair_data[0].dist; + pair_data[0].dist = pair_data[1].dist; + pair_data[1].dist = tmp; + } + DofGatherData fg_ring = GATHER_DATA_INIT; + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum); + /* Treat each sample as a ring. */ + dof_gather_accumulate_sample_ring(fg_ring, 2, first_ring, false, true, fg_accum); + + first_ring = false; + } + + /* Center sample. */ + vec2 sample_uv = frag_coord / vec2(textureSize(depth_tx, 0)); + DofGatherData center_data; + center_data.color = safe_color(textureLod(color_tx, sample_uv, 0.0)); + center_data.coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r); + center_data.coc = clamp(center_data.coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + center_data.dist = 0.0; + + out_center_coc = center_data.coc; + + /* Slide 38. */ + float bordering_radius = 0.5; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, true, true, fg_accum); + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, false, true, bg_accum); + + vec4 bg_col, fg_col; + float bg_weight, fg_weight; + vec2 unused_occlusion; + + int total_sample_count = int(sample_count) * 2 + 1; + dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion); + dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion); + + /* Fix weighting issues on perfectly focus to slight focus transitioning areas. */ + if (abs(center_data.coc) < 0.5) { + bg_col = center_data.color; + bg_weight = 1.0; + } + + /* Alpha Over */ + float alpha = 1.0 - fg_weight; + out_weight = bg_weight * alpha + fg_weight; + out_color = bg_col * bg_weight * alpha + fg_col * fg_weight; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl new file mode 100644 index 00000000000..26a597b04e8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl @@ -0,0 +1,55 @@ + +/** + * Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or + * the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not + * a perfect circle. + * We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh + * as it is way more complex and expensive to do. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + vec2 gather_uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) / float(DOF_BOKEH_LUT_SIZE)); + /* Center uv in range [-1..1]. */ + gather_uv = gather_uv * 2.0 - 1.0; + + vec2 slight_focus_texel = vec2(gl_GlobalInvocationID.xy) - float(dof_max_slight_focus_radius); + + float radius = length(gather_uv); + + if (dof_buf.bokeh_blades > 0.0) { + /* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */ + float theta = atan(gather_uv.y, gather_uv.x) + M_2PI; + float r = length(gather_uv); + + radius /= circle_to_polygon_radius(dof_buf.bokeh_blades, theta - dof_buf.bokeh_rotation); + + float theta_new = circle_to_polygon_angle(dof_buf.bokeh_blades, theta); + float r_new = circle_to_polygon_radius(dof_buf.bokeh_blades, theta_new); + + theta_new -= dof_buf.bokeh_rotation; + + gather_uv = r_new * vec2(-cos(theta_new), sin(theta_new)); + + { + /* Slight focus distance */ + slight_focus_texel *= dof_buf.bokeh_anisotropic_scale_inv; + float theta = atan(slight_focus_texel.y, -slight_focus_texel.x) + M_2PI; + slight_focus_texel /= circle_to_polygon_radius(dof_buf.bokeh_blades, + theta + dof_buf.bokeh_rotation); + } + } + else { + gather_uv *= safe_rcp(length(gather_uv)); + } + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + /* For gather store the normalized UV. */ + imageStore(out_gather_lut_img, texel, gather_uv.xyxy); + /* For scatter store distance. LUT will be scaled by COC. */ + imageStore(out_scatter_lut_img, texel, vec4(radius)); + /* For slight focus gather store pixel perfect distance. */ + imageStore(out_resolve_lut_img, texel, vec4(length(slight_focus_texel))); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl new file mode 100644 index 00000000000..3d45f285da9 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl @@ -0,0 +1,32 @@ + +/** + * Downsample pass: CoC aware downsample to quarter resolution. + * + * Pretty much identical to the setup pass but get CoC from buffer. + * Also does not weight luma for the bilateral weights. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + vec2 halfres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy); + /* Center uv around the 4 halfres pixels. */ + vec2 quad_center = vec2(gl_GlobalInvocationID * 2 + 1) * halfres_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size; + colors[i] = textureLod(color_tx, sample_uv, 0.0); + cocs[i] = textureLod(coc_tx, sample_uv, 0.0).r; + } + + vec4 weights = dof_bilateral_coc_weights(cocs); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + vec4 out_color = weighted_sum_array(colors, weights); + + imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl new file mode 100644 index 00000000000..49c93ca63cd --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl @@ -0,0 +1,163 @@ + +/** + * Gather Filter pass: Filter the gather pass result to reduce noise. + * + * This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if + * cheaper. + */ + +struct FilterSample { + vec4 color; + float weight; +}; + +/* -------------------------------------------------------------------- */ +/** \name Pixel cache. + * \{ */ + +const uint cache_size = gl_WorkGroupSize.x + 2; +shared vec4 color_cache[cache_size][cache_size]; +shared float weight_cache[cache_size][cache_size]; + +void cache_init() +{ + /** + * Load enough values into LDS to perform the filter. + * + * ┌──────────────────────────────┐ + * │ │ < Border texels that needs to be loaded. + * │ x x x x x x x x │ ─┐ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ Thread Group Size 8x8. + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ ─┘ + * │ L L L L L │ < Border texels that needs to be loaded. + * └──────────────────────────────┘ + * └───────────┘ + * Load using 5x5 threads. + */ + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1; + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) { + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u); + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset; + ivec2 load_texel = clamp(texel + offset, ivec2(0), textureSize(color_tx, 0) - 1); + + color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0); + weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r; + } + } + } + barrier(); +} + +FilterSample cache_sample(int x, int y) +{ + return FilterSample(color_cache[y][x], weight_cache[y][x]); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Median filter + * From: + * Implementing Median Filters in XC4000E FPGAs + * JOHN L. SMITH, Univision Technologies Inc., Billerica, MA + * http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf + * Figure 1 + * \{ */ + +FilterSample filter_min(FilterSample a, FilterSample b) +{ + return FilterSample(min(a.color, b.color), min(a.weight, b.weight)); +} + +FilterSample filter_max(FilterSample a, FilterSample b) +{ + return FilterSample(max(a.color, b.color), max(a.weight, b.weight)); +} + +FilterSample filter_min(FilterSample a, FilterSample b, FilterSample c) +{ + return FilterSample(min(a.color, min(c.color, b.color)), min(a.weight, min(c.weight, b.weight))); +} + +FilterSample filter_max(FilterSample a, FilterSample b, FilterSample c) +{ + return FilterSample(max(a.color, max(c.color, b.color)), max(a.weight, max(c.weight, b.weight))); +} + +FilterSample filter_median(FilterSample s1, FilterSample s2, FilterSample s3) +{ + /* From diagram, with nodes numbered from top to bottom. */ + FilterSample l1 = filter_min(s2, s3); + FilterSample h1 = filter_max(s2, s3); + FilterSample h2 = filter_max(s1, l1); + FilterSample l3 = filter_min(h2, h1); + return l3; +} + +struct FilterLmhResult { + FilterSample low; + FilterSample median; + FilterSample high; +}; + +FilterLmhResult filter_lmh(FilterSample s1, FilterSample s2, FilterSample s3) +{ + /* From diagram, with nodes numbered from top to bottom. */ + FilterSample h1 = filter_max(s2, s3); + FilterSample l1 = filter_min(s2, s3); + + FilterSample h2 = filter_max(s1, l1); + FilterSample l2 = filter_min(s1, l1); + + FilterSample h3 = filter_max(h2, h1); + FilterSample l3 = filter_min(h2, h1); + + FilterLmhResult result; + result.low = l2; + result.median = l3; + result.high = h3; + + return result; +} + +/** \} */ + +void main() +{ + /** + * NOTE: We can **NOT** optimize by discarding some tiles as the result is sampled using bilinear + * filtering in the resolve pass. Not outputting to a tile means that border texels have + * undefined value and tile border will be noticeable in the final image. + */ + + cache_init(); + + ivec2 texel = ivec2(gl_LocalInvocationID.xy); + + FilterLmhResult rows[3]; + for (int y = 0; y < 3; y++) { + rows[y] = filter_lmh(cache_sample(texel.x + 0, texel.y + y), + cache_sample(texel.x + 1, texel.y + y), + cache_sample(texel.x + 2, texel.y + y)); + } + /* Left nodes. */ + FilterSample high = filter_max(rows[0].low, rows[1].low, rows[2].low); + /* Right nodes. */ + FilterSample low = filter_min(rows[0].high, rows[1].high, rows[2].high); + /* Center nodes. */ + FilterSample median = filter_median(rows[0].median, rows[1].median, rows[2].median); + /* Last bottom nodes. */ + median = filter_median(low, median, high); + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, median.color); + imageStore(out_weight_img, out_texel, vec4(median.weight)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl new file mode 100644 index 00000000000..cf8dd7a36e6 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl @@ -0,0 +1,99 @@ + +/** + * Gather pass: Convolve foreground and background parts in separate passes. + * + * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene + *color. A fast gather path is taken if there is not many CoC variation inside the tile. + * + * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring + * rotation to ensure maximum coverage. + * + * Outputs: + * - Color * Weight, Weight, Occlusion 'CoC' Depth (mean and variance) + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +void main() +{ + ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE); + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + float base_radius, min_radius, min_intersectable_radius; + bool can_early_out; + if (is_foreground) { + base_radius = -coc_tile.fg_min_coc; + min_radius = -coc_tile.fg_max_coc; + min_intersectable_radius = -coc_tile.fg_max_intersectable_coc; + can_early_out = !prediction.do_foreground; + } + else { + base_radius = coc_tile.bg_max_coc; + min_radius = coc_tile.bg_min_coc; + min_intersectable_radius = coc_tile.bg_min_intersectable_coc; + can_early_out = !prediction.do_background; + } + + bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground); + + /* Gather at half resolution. Divide CoC by 2. */ + base_radius *= 0.5; + min_intersectable_radius *= 0.5; + + bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius); + + vec4 out_color; + float out_weight; + vec2 out_occlusion; + + if (can_early_out) { + out_color = vec4(0.0); + out_weight = 0.0; + out_occlusion = vec2(0.0, 0.0); + } + else if (do_fast_gather) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + true, + false, + out_color, + out_weight, + out_occlusion); + } + else if (do_density_change) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + false, + true, + out_color, + out_weight, + out_occlusion); + } + else { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + false, + false, + out_color, + out_weight, + out_occlusion); + } + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, out_color); + imageStore(out_weight_img, out_texel, vec4(out_weight)); + imageStore(out_occlusion_img, out_texel, out_occlusion.xyxy); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl new file mode 100644 index 00000000000..5cdabbc2d4b --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl @@ -0,0 +1,70 @@ + +/** + * Holefill pass: Gather background parts where foreground is present. + * + * Using the min&max CoC tile buffer, we select the best appropriate method to blur the scene + *color. A fast gather path is taken if there is not many CoC variation inside the tile. + * + * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring + * rotation to ensure maximum coverage. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +void main() +{ + ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE); + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + float base_radius = -coc_tile.fg_min_coc; + float min_radius = -coc_tile.fg_max_coc; + float min_intersectable_radius = dof_tile_large_coc; + bool can_early_out = !prediction.do_hole_fill; + + bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground); + + /* Gather at half resolution. Divide CoC by 2. */ + base_radius *= 0.5; + min_intersectable_radius *= 0.5; + + bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius); + + vec4 out_color = vec4(0.0); + float out_weight = 0.0; + vec2 unused_occlusion = vec2(0.0, 0.0); + + if (can_early_out) { + /* Early out. */ + } + else if (do_fast_gather) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + coc_tx, + base_radius, + min_intersectable_radius, + true, + false, + out_color, + out_weight, + unused_occlusion); + } + else { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + coc_tx, + base_radius, + min_intersectable_radius, + false, + false, + out_color, + out_weight, + unused_occlusion); + } + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, out_color); + imageStore(out_weight_img, out_texel, vec4(out_weight)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl new file mode 100644 index 00000000000..f89da641446 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl @@ -0,0 +1,327 @@ + +/** + * Depth of Field utils. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +/* -------------------------------------------------------------------- */ +/** \name Constants. + * \{ */ + +#ifndef DOF_SLIGHT_FOCUS_DENSITY +# define DOF_SLIGHT_FOCUS_DENSITY 2 +#endif + +#ifdef DOF_RESOLVE_PASS +const bool is_resolve = true; +#else +const bool is_resolve = false; +#endif +#ifdef DOF_FOREGROUND_PASS +const bool is_foreground = DOF_FOREGROUND_PASS; +#else +const bool is_foreground = false; +#endif +/* Debug options */ +const bool debug_gather_perf = false; +const bool debug_scatter_perf = false; +const bool debug_resolve_perf = false; + +const bool no_smooth_intersection = false; +const bool no_gather_occlusion = false; +const bool no_gather_mipmaps = false; +const bool no_gather_random = false; +const bool no_gather_filtering = false; +const bool no_scatter_occlusion = false; +const bool no_scatter_pass = false; +const bool no_foreground_pass = false; +const bool no_background_pass = false; +const bool no_slight_focus_pass = false; +const bool no_focus_pass = false; +const bool no_hole_fill_pass = false; + +/* Distribute weights between near/slightfocus/far fields (slide 117). */ +const float dof_layer_threshold = 4.0; +/* Make sure it overlaps. */ +const float dof_layer_offset_fg = 0.5 + 1.0; +/* Extra offset for convolution layers to avoid light leaking from background. */ +const float dof_layer_offset = 0.5 + 0.5; + +const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS; + +const vec2 quad_offsets[4] = vec2[4]( + vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5)); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Weighting and downsampling utils. + * \{ */ + +float dof_hdr_color_weight(vec4 color) +{ + /* Very fast "luma" weighting. */ + float luma = (color.g * 2.0) + (color.r + color.b); + /* TODO(fclem) Pass correct exposure. */ + const float exposure = 1.0; + return 1.0 / (luma * exposure + 4.0); +} + +float dof_coc_select(vec4 cocs) +{ + /* Select biggest coc. */ + float selected_coc = cocs.x; + if (abs(cocs.y) > abs(selected_coc)) { + selected_coc = cocs.y; + } + if (abs(cocs.z) > abs(selected_coc)) { + selected_coc = cocs.z; + } + if (abs(cocs.w) > abs(selected_coc)) { + selected_coc = cocs.w; + } + return selected_coc; +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_bilateral_coc_weights(vec4 cocs) +{ + float chosen_coc = dof_coc_select(cocs); + + const float scale = 4.0; /* TODO(fclem) revisit. */ + /* NOTE: The difference between the cocs should be inside a abs() function, + * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */ + return saturate(1.0 - (chosen_coc - cocs) * scale); +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_bilateral_color_weights(vec4 colors[4]) +{ + vec4 weights; + for (int i = 0; i < 4; i++) { + weights[i] = dof_hdr_color_weight(colors[i]); + } + return weights; +} + +/* Returns signed Circle of confusion radius (in pixel) based on depth buffer value [0..1]. */ +float dof_coc_from_depth(DepthOfFieldData dof_data, vec2 uv, float depth) +{ + if (is_panoramic(dof_data.camera_type)) { + /* Use radial depth. */ + depth = -length(get_view_space_from_depth(uv, depth)); + } + else { + depth = get_view_z_from_depth(depth); + } + return coc_radius_from_camera_depth(dof_data, depth); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather & Scatter Weighting + * \{ */ + +float dof_layer_weight(float coc, const bool is_foreground) +{ + /* NOTE: These are fullres pixel CoC value. */ + if (is_resolve) { + return saturate(-abs(coc) + dof_layer_threshold + dof_layer_offset) * + float(is_foreground ? (coc <= 0.5) : (coc > -0.5)); + } + else { + coc *= 2.0; /* Account for half pixel gather. */ + float threshold = dof_layer_threshold - + ((is_foreground) ? dof_layer_offset_fg : dof_layer_offset); + return saturate(((is_foreground) ? -coc : coc) - threshold); + } +} +vec4 dof_layer_weight(vec4 coc) +{ + /* NOTE: Used for scatter pass which already flipped the sign correctly. */ + coc *= 2.0; /* Account for half pixel gather. */ + return saturate(coc - dof_layer_threshold + dof_layer_offset); +} + +/* NOTE: This is halfres CoC radius. */ +float dof_sample_weight(float coc) +{ +#if 1 /* Optimized */ + return min(1.0, 1.0 / sqr(coc)); +#else + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(min_coc, abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +#endif +} +vec4 dof_sample_weight(vec4 coc) +{ +#if 1 /* Optimized */ + return min(vec4(1.0), 1.0 / sqr(coc)); +#else + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(vec4(min_coc), abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +#endif +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Circle of Confusion tiles + * \{ */ + +struct CocTile { + float fg_min_coc; + float fg_max_coc; + float fg_max_intersectable_coc; + float bg_min_coc; + float bg_max_coc; + float bg_min_intersectable_coc; +}; + +/* WATCH: Might have to change depending on the texture format. */ +const float dof_tile_large_coc = 1024.0; + +/* Init a CoC tile for reduction algorithms. */ +CocTile dof_coc_tile_init() +{ + CocTile tile; + tile.fg_min_coc = 0.0; + tile.fg_max_coc = -dof_tile_large_coc; + tile.fg_max_intersectable_coc = dof_tile_large_coc; + tile.bg_min_coc = dof_tile_large_coc; + tile.bg_max_coc = 0.0; + tile.bg_min_intersectable_coc = dof_tile_large_coc; + return tile; +} + +CocTile dof_coc_tile_unpack(vec3 fg, vec3 bg) +{ + CocTile tile; + tile.fg_min_coc = -fg.x; + tile.fg_max_coc = -fg.y; + tile.fg_max_intersectable_coc = -fg.z; + tile.bg_min_coc = bg.x; + tile.bg_max_coc = bg.y; + tile.bg_min_intersectable_coc = bg.z; + return tile; +} + +/* WORKAROUND(fclem): GLSL compilers differs in what qualifiers are requires to pass images as + * parameters. Workaround by using defines. */ +#define dof_coc_tile_load(tiles_fg_img_, tiles_bg_img_, texel_) \ + dof_coc_tile_unpack( \ + imageLoad(tiles_fg_img_, clamp(texel_, ivec2(0), imageSize(tiles_fg_img_) - 1)).xyz, \ + imageLoad(tiles_bg_img_, clamp(texel_, ivec2(0), imageSize(tiles_bg_img_) - 1)).xyz) + +void dof_coc_tile_pack(CocTile tile, out vec3 out_fg, out vec3 out_bg) +{ + out_fg.x = -tile.fg_min_coc; + out_fg.y = -tile.fg_max_coc; + out_fg.z = -tile.fg_max_intersectable_coc; + out_bg.x = tile.bg_min_coc; + out_bg.y = tile.bg_max_coc; + out_bg.z = tile.bg_min_intersectable_coc; +} + +#define dof_coc_tile_store(tiles_fg_img_, tiles_bg_img_, texel_out_, tile_data_) \ + if (true) { \ + vec3 out_fg; \ + vec3 out_bg; \ + dof_coc_tile_pack(tile_data_, out_fg, out_bg); \ + imageStore(tiles_fg_img_, texel_out_, out_fg.xyzz); \ + imageStore(tiles_bg_img_, texel_out_, out_bg.xyzz); \ + } + +bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground) +{ + float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc, + is_foreground); + if (min_weight < 1.0) { + return false; + } + /* FIXME(fclem): This is a workaround to fast gather triggering too early. Since we use custom + * opacity mask, the opacity is not given to be 100% even for after normal threshold. */ + if (is_foreground && min_absolute_coc < dof_layer_threshold) { + return false; + } + return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc); +} + +struct CocTilePrediction { + bool do_foreground; + bool do_slight_focus; + bool do_focus; + bool do_background; + bool do_hole_fill; +}; + +/** + * Using the tile CoC infos, predict which convolutions are required and the ones that can be + * skipped. + */ +CocTilePrediction dof_coc_tile_prediction_get(CocTile tile) +{ + /* Based on tile value, predict what pass we need to load. */ + CocTilePrediction predict; + + predict.do_foreground = (-tile.fg_min_coc > dof_layer_threshold - dof_layer_offset_fg); + bool fg_fully_opaque = predict.do_foreground && + dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true); + predict.do_background = !fg_fully_opaque && + (tile.bg_max_coc > dof_layer_threshold - dof_layer_offset); + bool bg_fully_opaque = predict.do_background && + dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false); + predict.do_hole_fill = !fg_fully_opaque && -tile.fg_min_coc > 0.0; + predict.do_focus = !fg_fully_opaque; + predict.do_slight_focus = !fg_fully_opaque; + +#if 0 /* Debug */ + predict.do_foreground = predict.do_background = predict.do_hole_fill = true; +#endif + return predict; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gathering + * \{ */ + +/** + * Generate samples in a square pattern with the ring radius. X is the center tile. + * + * Dist1 Dist2 + * 6 5 4 3 2 + * 3 2 1 7 1 + * . X 0 . X 0 + * . . . . . + * . . . . . + * + * Samples are expected to be mirrored to complete the pattern. + **/ +ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id) +{ + ivec2 offset; + if (sample_id < ring_distance) { + offset.x = ring_distance; + offset.y = sample_id; + } + else if (sample_id < ring_distance * 3) { + offset.x = ring_distance - sample_id + ring_distance; + offset.y = ring_distance; + } + else { + offset.x = -ring_distance; + offset.y = ring_distance - sample_id + 3 * ring_distance; + } + return offset; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl new file mode 100644 index 00000000000..a6426cd06e4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl @@ -0,0 +1,247 @@ + +/** + * Reduce copy pass: filter fireflies and split color between scatter and gather input. + * + * NOTE: The texture can end up being too big because of the mipmap padding. We correct for + * that during the convolution phase. + * + * Inputs: + * - Output of setup pass (halfres) and reduce downsample pass (quarter res). + * Outputs: + * - Halfres padded to avoid mipmap misalignment (so possibly not matching input size). + * - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain). + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */ +float dof_scatter_neighborhood_rejection(vec3 color) +{ + color = min(vec3(dof_buf.scatter_neighbor_max_color), color); + + float validity = 0.0; + + /* Centered in the middle of 4 quarter res texel. */ + vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy); + vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size; + + vec3 max_diff = vec3(0.0); + for (int i = 0; i < 4; i++) { + vec2 sample_uv = uv + quad_offsets[i] * texel_size; + vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb; + + ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref); + float diff = max_v3(max(vec3(0.0), abs(ref - color))); + + const float rejection_threshold = 0.7; + diff = saturate(diff / rejection_threshold - 1.0); + validity = max(validity, diff); + } + + return validity; +} + +/* This avoids Bokeh sprite popping in and out at the screen border and + * drawing Bokeh sprites larger than the screen. */ +float dof_scatter_screen_border_rejection(float coc, ivec2 texel) +{ + vec2 screen_size = vec2(imageSize(inout_color_lod0_img)); + vec2 uv = (vec2(texel) + 0.5) / screen_size; + vec2 screen_pos = uv * screen_size; + float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos)); + /* Fullres to halfres CoC. */ + coc *= 0.5; + /* Allow 10px transition. */ + const float rejection_hardeness = 1.0 / 10.0; + return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0); +} + +float dof_scatter_luminosity_rejection(vec3 color) +{ + const float rejection_hardness = 1.0; + return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness); +} + +float dof_scatter_coc_radius_rejection(float coc) +{ + const float rejection_hardness = 0.3; + return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness); +} + +float fast_luma(vec3 color) +{ + return (2.0 * color.g) + color.r + color.b; +} + +const uint cache_size = gl_WorkGroupSize.x; +shared vec4 color_cache[cache_size][cache_size]; +shared float coc_cache[cache_size][cache_size]; +shared float do_scatter[cache_size][cache_size]; + +void main() +{ + ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1); + uvec2 texel_local = gl_LocalInvocationID.xy; + /* Increase readablility. */ +#define LOCAL_INDEX texel_local.y][texel_local.x +#define LOCAL_OFFSET(x_, y_) texel_local.y + (y_)][texel_local.x + (x_) + + /* Load level 0 into cache. */ + color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel); + coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r; + + /* Only scatter if luminous enough. */ + do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb); + /* Only scatter if CoC is big enough. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]); + /* Only scatter if CoC is not too big to avoid performance issues. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel); + /* Only scatter if neighborhood is different enough. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb); + /* For debugging. */ + if (no_scatter_pass) { + do_scatter[LOCAL_INDEX] = 0.0; + } + + barrier(); + + /* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */ + if (all(equal(texel_local & 1u, uvec2(0)))) { + vec4 do_scatter4; + /* Follows quad_offsets order. */ + do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)]; + do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)]; + do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)]; + do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)]; + if (any(greaterThan(do_scatter4, vec4(0.0)))) { + /* Apply energy conservation to anamorphic scattered bokeh. */ + do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv); + + /* Circle of Confusion. */ + vec4 coc4; + coc4.x = coc_cache[LOCAL_OFFSET(0, 1)]; + coc4.y = coc_cache[LOCAL_OFFSET(1, 1)]; + coc4.z = coc_cache[LOCAL_OFFSET(1, 0)]; + coc4.w = coc_cache[LOCAL_OFFSET(0, 0)]; + /* We are scattering at half resolution, so divide CoC by 2. */ + coc4 *= 0.5; + /* Sprite center position. Center sprite around the 4 texture taps. */ + vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1; + /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin + * and because we smooth the bokeh shape a bit in the pixel shader. */ + vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5; + /* Issue a sprite for each field if any CoC matches. */ + if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) { + /* Same value for all threads. Not an issue if we don't sync access to it. */ + scatter_fg_indirect_buf.vertex_len = 4u; + /* Issue 1 strip instance per sprite. */ + uint rect_id = atomicAdd(scatter_fg_indirect_buf.instance_len, 1u); + if (rect_id < dof_buf.scatter_max_rect) { + + vec4 coc4_fg = max(vec4(0.0), -coc4); + vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4; + /* Filter NaNs. */ + fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0))); + + ScatterRect rect_fg; + rect_fg.offset = offset; + /* Negate extent to flip the sprite. Mimics optical phenomenon. */ + rect_fg.half_extent = -half_extent; + /* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and + * (0,0) values so that quad_offsets is in the right order in the vertex shader. */ + + /* Circle of Confusion absolute radius in halfres pixels. */ + rect_fg.color_and_coc[0].a = coc4_fg[0]; + rect_fg.color_and_coc[1].a = coc4_fg[3]; + rect_fg.color_and_coc[2].a = coc4_fg[2]; + rect_fg.color_and_coc[3].a = coc4_fg[1]; + /* Apply weights. */ + rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0]; + rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3]; + rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2]; + rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1]; + + scatter_fg_list_buf[rect_id] = rect_fg; + } + } + if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) { + /* Same value for all threads. Not an issue if we don't sync access to it. */ + scatter_bg_indirect_buf.vertex_len = 4u; + /* Issue 1 strip instance per sprite. */ + uint rect_id = atomicAdd(scatter_bg_indirect_buf.instance_len, 1u); + if (rect_id < dof_buf.scatter_max_rect) { + vec4 coc4_bg = max(vec4(0.0), coc4); + vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4; + /* Filter NaNs. */ + bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0))); + + ScatterRect rect_bg; + rect_bg.offset = offset; + rect_bg.half_extent = half_extent; + + /* Circle of Confusion absolute radius in halfres pixels. */ + rect_bg.color_and_coc[0].a = coc4_bg[0]; + rect_bg.color_and_coc[1].a = coc4_bg[1]; + rect_bg.color_and_coc[2].a = coc4_bg[2]; + rect_bg.color_and_coc[3].a = coc4_bg[3]; + /* Apply weights. */ + rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0]; + rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1]; + rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2]; + rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3]; + + scatter_bg_list_buf[rect_id] = rect_bg; + } + } + } + } + + /* Remove scatter color from gather. */ + color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX]; + imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]); + + /* Recursive downsample. */ + for (uint i = 1u; i < DOF_MIP_COUNT; i++) { + barrier(); + uint mask = ~(~0u << i); + if (all(equal(gl_LocalInvocationID.xy & mask, uvec2(0)))) { + uint ofs = 1u << (i - 1u); + + /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */ + vec4 coc4; + coc4.x = coc_cache[LOCAL_OFFSET(0, ofs)]; + coc4.y = coc_cache[LOCAL_OFFSET(ofs, ofs)]; + coc4.z = coc_cache[LOCAL_OFFSET(ofs, 0)]; + coc4.w = coc_cache[LOCAL_OFFSET(0, 0)]; + + vec4 colors[4]; + colors[0] = color_cache[LOCAL_OFFSET(0, ofs)]; + colors[1] = color_cache[LOCAL_OFFSET(ofs, ofs)]; + colors[2] = color_cache[LOCAL_OFFSET(ofs, 0)]; + colors[3] = color_cache[LOCAL_OFFSET(0, 0)]; + + vec4 weights = dof_bilateral_coc_weights(coc4); + weights *= dof_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights); + coc_cache[LOCAL_INDEX] = dot(coc4, weights); + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy >> i); + + if (i == 1) { + imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + else if (i == 2) { + imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + else /* if (i == 3) */ { + imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + } + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl new file mode 100644 index 00000000000..5123eb0c238 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl @@ -0,0 +1,178 @@ + +/** + * Recombine Pass: Load separate convolution layer and composite with self + * slight defocus convolution and in-focus fields. + * + * The halfres gather methods are fast but lack precision for small CoC areas. + * To fix this we do a bruteforce gather to have a smooth transition between + * in-focus and defocus regions. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +shared uint shared_max_slight_focus_abs_coc; + +/** + * Returns The max CoC in the Slight Focus range inside this compute tile. + */ +float dof_slight_focus_coc_tile_get(vec2 frag_coord) +{ + if (all(equal(gl_LocalInvocationID, uvec3(0)))) { + shared_max_slight_focus_abs_coc = floatBitsToUint(0.0); + } + barrier(); + + float local_abs_max = 0.0; + /* Sample in a cross (X) pattern. This covers all pixels over the whole tile, as long as + * dof_max_slight_focus_radius is less than the group size. */ + for (int i = 0; i < 4; i++) { + vec2 sample_uv = (frag_coord + quad_offsets[i] * 2.0 * dof_max_slight_focus_radius) / + vec2(textureSize(color_tx, 0)); + float coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r); + coc = clamp(coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + if (abs(coc) < dof_max_slight_focus_radius) { + local_abs_max = max(local_abs_max, abs(coc)); + } + } + /* Use atomic reduce operation. */ + atomicMax(shared_max_slight_focus_abs_coc, floatBitsToUint(local_abs_max)); + /* "Broadcast" result across all threads. */ + barrier(); + + return uintBitsToFloat(shared_max_slight_focus_abs_coc); +} + +vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float weight) +{ + /* Stabilize color by clamping with the stable half res neighborhood. */ + vec3 neighbor_min, neighbor_max; + const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1)); + for (int i = 0; i < 4; i++) { + /** + * Visit the 4 half-res texels around (and containing) the fullres texel. + * Here a diagram of a fullscreen texel (f) in the bottom left corner of a half res texel. + * We sample the stable half-resolution texture at the 4 location denoted by (h). + * ┌───────┬───────┐ + * │ h │ h │ + * │ │ │ + * │ │ f │ + * ├───────┼───────┤ + * │ h │ h │ + * │ │ │ + * │ │ │ + * └───────┴───────┘ + */ + vec2 uv_sample = ((frag_coord + corners[i]) * 0.5) / vec2(textureSize(stable_color_tx, 0)); + /* Reminder: The content of this buffer is YCoCg + CoC. */ + vec3 ycocg_sample = textureLod(stable_color_tx, uv_sample, 0.0).rgb; + neighbor_min = (i == 0) ? ycocg_sample : min(neighbor_min, ycocg_sample); + neighbor_max = (i == 0) ? ycocg_sample : max(neighbor_max, ycocg_sample); + } + /* Pad the bounds in the near in focus region to get back a bit of detail. */ + float padding = 0.125 * saturate(1.0 - sqr(center_coc) / sqr(8.0)); + neighbor_max += abs(neighbor_min) * padding; + neighbor_min -= abs(neighbor_min) * padding; + /* Progressively apply the clamp to avoid harsh transition. Also mask by weight. */ + float fac = saturate(sqr(center_coc) * 4.0) * weight; + /* Clamp in YCoCg space to avoid too much color drift. */ + color = colorspace_YCoCg_from_scene_linear(color); + color = mix(color, clamp(color, neighbor_min, neighbor_max), fac); + color = colorspace_scene_linear_from_YCoCg(color); + return color; +} + +void main() +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + ivec2 tile_co = ivec2(frag_coord / float(DOF_TILES_SIZE * 2)); + + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + vec2 uv = frag_coord / vec2(textureSize(color_tx, 0)); + vec2 uv_halfres = (frag_coord * 0.5) / vec2(textureSize(color_bg_tx, 0)); + + float slight_focus_max_coc = 0.0; + if (prediction.do_slight_focus) { + slight_focus_max_coc = dof_slight_focus_coc_tile_get(frag_coord); + prediction.do_slight_focus = slight_focus_max_coc >= 0.5; + if (prediction.do_slight_focus) { + prediction.do_focus = false; + } + } + + if (prediction.do_focus) { + float center_coc = (dof_coc_from_depth(dof_buf, uv, textureLod(depth_tx, uv, 0.0).r)); + prediction.do_focus = abs(center_coc) <= 0.5; + } + + vec4 out_color = vec4(0.0); + float weight = 0.0; + + vec4 layer_color; + float layer_weight; + + if (!no_hole_fill_pass && prediction.do_hole_fill) { + layer_color = textureLod(color_hole_fill_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_hole_fill_tx, uv_halfres, 0.0).r; + out_color = layer_color * safe_rcp(layer_weight); + weight = float(layer_weight > 0.0); + } + + if (!no_background_pass && prediction.do_background) { + layer_color = textureLod(color_bg_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_bg_tx, uv_halfres, 0.0).r; + /* Always prefer background to hole_fill pass. */ + layer_color *= safe_rcp(layer_weight); + layer_weight = float(layer_weight > 0.0); + /* Composite background. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + /* Fill holes with the composited background. */ + out_color *= safe_rcp(weight); + weight = float(weight > 0.0); + } + + if (!no_slight_focus_pass && prediction.do_slight_focus) { + float center_coc; + dof_slight_focus_gather(depth_tx, + color_tx, + bokeh_lut_tx, + slight_focus_max_coc, + layer_color, + layer_weight, + center_coc); + + /* Composite slight defocus. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + + out_color.rgb = dof_neighborhood_clamp(frag_coord, out_color.rgb, center_coc, layer_weight); + } + + if (!no_focus_pass && prediction.do_focus) { + layer_color = safe_color(textureLod(color_tx, uv, 0.0)); + layer_weight = 1.0; + /* Composite in focus. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + } + + if (!no_foreground_pass && prediction.do_foreground) { + layer_color = textureLod(color_fg_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_fg_tx, uv_halfres, 0.0).r; + /* Composite foreground. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + } + + /* Fix float precision issue in alpha compositing. */ + if (out_color.a > 0.99) { + out_color.a = 1.0; + } + + if (debug_resolve_perf && prediction.do_slight_focus) { + out_color.rgb *= vec3(1.0, 0.1, 0.1); + } + + imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl new file mode 100644 index 00000000000..cfb7fd2568b --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl @@ -0,0 +1,62 @@ + +/** + * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur. + * + * We only scatter one quad per sprite and one sprite per 4 pixels to reduce vertex shader + * invocations and overdraw. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0)) + +void main() +{ + vec4 coc4 = vec4(interp.color_and_coc1.w, + interp.color_and_coc2.w, + interp.color_and_coc3.w, + interp.color_and_coc4.w); + vec4 shapes; + if (use_bokeh_lut) { + shapes = vec4(texture(bokeh_lut_tx, interp.rect_uv1).r, + texture(bokeh_lut_tx, interp.rect_uv2).r, + texture(bokeh_lut_tx, interp.rect_uv3).r, + texture(bokeh_lut_tx, interp.rect_uv4).r); + } + else { + shapes = vec4(length(interp.rect_uv1), + length(interp.rect_uv2), + length(interp.rect_uv3), + length(interp.rect_uv4)); + } + shapes *= interp.distance_scale; + /* Becomes signed distance field in pixel units. */ + shapes -= coc4; + /* Smooth the edges a bit to fade out the undersampling artifacts. */ + shapes = saturate(1.0 - linearstep(-0.8, 0.8, shapes)); + /* Outside of bokeh shape. Try to avoid overloading ROPs. */ + if (max_v4(shapes) == 0.0) { + discard; + } + + if (!no_scatter_occlusion) { + /* Works because target is the same size as occlusion_tx. */ + vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusion_tx, 0).xy); + vec2 occlusion_data = texture(occlusion_tx, uv).rg; + /* Fix tilling artifacts. (Slide 90) */ + const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR; + /* Occlude the sprite with geometry from the same field using a chebychev test (slide 85). */ + float mean = occlusion_data.x; + float variance = occlusion_data.y; + shapes *= variance * safe_rcp(variance + sqr(max(coc4 * correction_fac - mean, 0.0))); + } + + out_color = (interp.color_and_coc1 * shapes[0] + interp.color_and_coc2 * shapes[1] + + interp.color_and_coc3 * shapes[2] + interp.color_and_coc4 * shapes[3]); + /* Do not accumulate alpha. This has already been accumulated by the gather pass. */ + out_color.a = 0.0; + + if (debug_scatter_perf) { + out_color.rgb = avg(out_color.rgb) * vec3(1.0, 0.0, 0.0); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl new file mode 100644 index 00000000000..d870496a06c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl @@ -0,0 +1,45 @@ + +/** + * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur. + * + * We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader + * invocations and overdraw. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + ScatterRect rect = scatter_list_buf[gl_InstanceID]; + + interp.color_and_coc1 = rect.color_and_coc[0]; + interp.color_and_coc2 = rect.color_and_coc[1]; + interp.color_and_coc3 = rect.color_and_coc[2]; + interp.color_and_coc4 = rect.color_and_coc[3]; + + vec2 uv = vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0 - 1.0; + uv = uv * rect.half_extent; + + gl_Position = vec4(uv + rect.offset, 0.0, 1.0); + /* NDC range [-1..1]. */ + gl_Position.xy = (gl_Position.xy / vec2(textureSize(occlusion_tx, 0).xy)) * 2.0 - 1.0; + + if (use_bokeh_lut) { + /* Bias scale to avoid sampling at the texture's border. */ + interp.distance_scale = (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1)); + vec2 uv_div = 1.0 / (interp.distance_scale * abs(rect.half_extent)); + interp.rect_uv1 = ((uv + quad_offsets[0]) * uv_div) * 0.5 + 0.5; + interp.rect_uv2 = ((uv + quad_offsets[1]) * uv_div) * 0.5 + 0.5; + interp.rect_uv3 = ((uv + quad_offsets[2]) * uv_div) * 0.5 + 0.5; + interp.rect_uv4 = ((uv + quad_offsets[3]) * uv_div) * 0.5 + 0.5; + /* Only for sampling. */ + interp.distance_scale *= max_v2(abs(rect.half_extent)); + } + else { + interp.distance_scale = 1.0; + interp.rect_uv1 = uv + quad_offsets[0]; + interp.rect_uv2 = uv + quad_offsets[1]; + interp.rect_uv3 = uv + quad_offsets[2]; + interp.rect_uv4 = uv + quad_offsets[3]; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl new file mode 100644 index 00000000000..c017a5aa965 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl @@ -0,0 +1,46 @@ + +/** + * Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer. + * + * An addition to the downsample CoC, we output the maximum slight out of focus CoC to be + * sure we don't miss a pixel. + * + * Input: + * Full-resolution color & depth buffer + * Output: + * Half-resolution Color, signed CoC (out_coc.x), and max slight focus abs CoC (out_coc.y). + **/ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + vec2 fullres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy); + /* Center uv around the 4 fullres pixels. */ + vec2 quad_center = vec2(gl_GlobalInvocationID.xy * 2 + 1) * fullres_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size; + /* NOTE: We use samplers without filtering. */ + colors[i] = safe_color(textureLod(color_tx, sample_uv, 0.0)); + cocs[i] = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r); + } + + cocs = clamp(cocs, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + + vec4 weights = dof_bilateral_coc_weights(cocs); + weights *= dof_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + vec4 out_color = weighted_sum_array(colors, weights); + imageStore(out_color_img, out_texel, out_color); + + float out_coc = dot(cocs, weights); + imageStore(out_coc_img, out_texel, vec4(out_coc)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl new file mode 100644 index 00000000000..46a25b84840 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl @@ -0,0 +1,367 @@ + +/** + * Temporal Stabilization of the Depth of field input. + * Corresponds to the TAA pass in the paper. + * We actually duplicate the TAA logic but with a few changes: + * - We run this pass at half resolution. + * - We store CoC instead of Opacity in the alpha channel of the history. + * + * This is and adaption of the code found in eevee_film_lib.glsl + * + * Inputs: + * - Output of setup pass (halfres). + * Outputs: + * - Stabilized Color and CoC (halfres). + **/ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) + +struct DofSample { + vec4 color; + float coc; +}; + +/* -------------------------------------------------------------------- */ +/** \name LDS Cache + * \{ */ + +const uint cache_size = gl_WorkGroupSize.x + 2; +shared vec4 color_cache[cache_size][cache_size]; +shared float coc_cache[cache_size][cache_size]; +/* Need 2 pixel border for depth. */ +const uint cache_depth_size = gl_WorkGroupSize.x + 4; +shared float depth_cache[cache_depth_size][cache_depth_size]; + +void dof_cache_init() +{ + /** + * Load enough values into LDS to perform the filter. + * + * ┌──────────────────────────────┐ + * │ │ < Border texels that needs to be loaded. + * │ x x x x x x x x │ ─┐ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ Thread Group Size 8x8. + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ ─┘ + * │ L L L L L │ < Border texels that needs to be loaded. + * └──────────────────────────────┘ + * └───────────┘ + * Load using 5x5 threads. + */ + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + /* 1 Pixel border. */ + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_size / 2u)))) { + ivec2 offset = ivec2(x, y) * ivec2(cache_size / 2u); + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset; + ivec2 load_texel = clamp(texel + offset - 1, ivec2(0), textureSize(color_tx, 0) - 1); + + vec4 color = texelFetch(color_tx, load_texel, 0); + color_cache[cache_texel.y][cache_texel.x] = colorspace_YCoCg_from_scene_linear(color); + coc_cache[cache_texel.y][cache_texel.x] = texelFetch(coc_tx, load_texel, 0).x; + } + /* 2 Pixels border. */ + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(cache_depth_size / 2u)))) { + ivec2 offset = ivec2(x, y) * ivec2(cache_depth_size / 2u); + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + offset; + /* Depth is fullres. Load every 2 pixels. */ + ivec2 load_texel = clamp((texel + offset - 2) * 2, ivec2(0), textureSize(depth_tx, 0) - 1); + + depth_cache[cache_texel.y][cache_texel.x] = texelFetch(depth_tx, load_texel, 0).x; + } + } + } + barrier(); +} + +/* NOTE: Sample color space is already in YCoCg space. */ +DofSample dof_fetch_input_sample(ivec2 offset) +{ + ivec2 coord = offset + 1 + ivec2(gl_LocalInvocationID.xy); + return DofSample(color_cache[coord.y][coord.x], coc_cache[coord.y][coord.x]); +} + +float dof_fetch_half_depth(ivec2 offset) +{ + ivec2 coord = offset + 2 + ivec2(gl_LocalInvocationID.xy); + return depth_cache[coord.y][coord.x]; +} + +/** \} */ + +float dof_luma_weight(float luma) +{ + /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */ + /* To preserve more details in dark areas, we use a bigger bias. */ + const float exposure_scale = 1.0; /* TODO. */ + return 1.0 / (4.0 + luma * exposure_scale); +} + +float dof_bilateral_weight(float reference_coc, float sample_coc) +{ + /* NOTE: The difference between the cocs should be inside a abs() function, + * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). + * Effectively bleed background into foreground. + * Compared to dof_bilateral_coc_weights() this saturates as 2x the reference CoC. */ + return saturate(1.0 - (sample_coc - reference_coc) / max(1.0, abs(reference_coc))); +} + +DofSample dof_spatial_filtering() +{ + /* Plus (+) shape offsets. */ + const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + DofSample center = dof_fetch_input_sample(ivec2(0)); + DofSample accum = DofSample(vec4(0.0), 0.0); + float accum_weight = 0.0; + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(plus_offsets[i]); + float weight = dof_buf.filter_samples_weight[i] * dof_luma_weight(samp.color.x) * + dof_bilateral_weight(center.coc, samp.coc); + + accum.color += samp.color * weight; + accum.coc += samp.coc * weight; + accum_weight += weight; + } + /* Accumulate center sample last as it does not need bilateral_weights. */ + float weight = dof_buf.filter_center_weight * dof_luma_weight(center.color.x); + accum.color += center.color * weight; + accum.coc += center.coc * weight; + accum_weight += weight; + + float rcp_weight = 1.0 / accum_weight; + accum.color *= rcp_weight; + accum.coc *= rcp_weight; + return accum; +} + +struct DofNeighborhoodMinMax { + DofSample min; + DofSample max; +}; + +/* Return history clipping bounding box in YCoCg color space. */ +DofNeighborhoodMinMax dof_neighbor_boundbox() +{ + /* Plus (+) shape offsets. */ + const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + /** + * Simple bounding box calculation in YCoCg as described in: + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 + */ + DofSample min_c = dof_fetch_input_sample(ivec2(0)); + DofSample max_c = min_c; + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(plus_offsets[i]); + min_c.color = min(min_c.color, samp.color); + max_c.color = max(max_c.color, samp.color); + min_c.coc = min(min_c.coc, samp.coc); + max_c.coc = max(max_c.coc, samp.coc); + } + /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts. + * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */ + DofSample min_c_3x3 = min_c; + DofSample max_c_3x3 = max_c; + const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); + for (int i = 0; i < 4; i++) { + DofSample samp = dof_fetch_input_sample(corners[i]); + min_c_3x3.color = min(min_c_3x3.color, samp.color); + max_c_3x3.color = max(max_c_3x3.color, samp.color); + min_c_3x3.coc = min(min_c_3x3.coc, samp.coc); + max_c_3x3.coc = max(max_c_3x3.coc, samp.coc); + } + min_c.color = (min_c.color + min_c_3x3.color) * 0.5; + max_c.color = (max_c.color + max_c_3x3.color) * 0.5; + min_c.coc = (min_c.coc + min_c_3x3.coc) * 0.5; + max_c.coc = (max_c.coc + max_c_3x3.coc) * 0.5; + + return DofNeighborhoodMinMax(min_c, max_c); +} + +/* Returns motion in pixel space to retrieve the pixel history. */ +vec2 dof_pixel_history_motion_vector(ivec2 texel_sample) +{ + /** + * Dilate velocity by using the nearest pixel in a cross pattern. + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27) + */ + const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); + float min_depth = dof_fetch_half_depth(ivec2(0)); + ivec2 nearest_texel = ivec2(0); + for (int i = 0; i < 4; i++) { + float depth = dof_fetch_half_depth(corners[i]); + if (min_depth > depth) { + min_depth = depth; + nearest_texel = corners[i]; + } + } + /* Convert to full resolution buffer pixel. */ + ivec2 velocity_texel = (texel_sample + nearest_texel) * 2; + velocity_texel = clamp(velocity_texel, ivec2(0), textureSize(velocity_tx, 0).xy - 1); + vec4 vector = velocity_resolve(velocity_tx, velocity_texel, min_depth); + /* Transform to **half** pixel space. */ + return vector.xy * vec2(textureSize(color_tx, 0)); +} + +/* Load color using a special filter to avoid losing detail. + * \a texel is sample position with subpixel accuracy. */ +DofSample dof_sample_history(vec2 input_texel) +{ +#if 1 /* Bilinar. */ + vec2 uv = vec2(input_texel + 0.5) / textureSize(in_history_tx, 0); + vec4 color = textureLod(in_history_tx, uv, 0.0); + +#else /* Catmull Rom interpolation. 5 Bilinear Taps. */ + vec2 center_texel; + vec2 inter_texel = modf(input_texel, center_texel); + vec2 weights[4]; + film_get_catmull_rom_weights(inter_texel, weights); + + /** + * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing + * corner taps. + * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016 + * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx + */ + center_texel += 0.5; + + /* Slide 92. */ + vec2 weight_12 = weights[1] + weights[2]; + vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv; + vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv; + vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv; + + vec4 color; + vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy); + float weight_center = weight_12.x * weight_12.y; + + color = textureLod(in_history_tx, uv_12, 0.0) * weight_center; + color += textureLod(in_history_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x; + color += textureLod(in_history_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y; + color += textureLod(in_history_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z; + color += textureLod(in_history_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w; + /* Re-normalize for the removed corners. */ + color /= (weight_center + sum(weight_cross)); +#endif + /* NOTE(fclem): Opacity is wrong on purpose. Final Opacity does not rely on history. */ + return DofSample(color.xyzz, color.w); +} + +/* Modulate the history color to avoid ghosting artifact. */ +DofSample dof_amend_history(DofNeighborhoodMinMax bbox, DofSample history, DofSample src) +{ +#if 0 + /* Clip instead of clamping to avoid color accumulating in the AABB corners. */ + vec3 clip_dir = src.color.rgb - history.color.rgb; + + float t = line_aabb_clipping_dist( + history.color.rgb, clip_dir, bbox.min.color.rgb, bbox.max.color.rgb); + history.color.rgb += clip_dir * saturate(t); +#else + /* More responsive. */ + history.color = clamp(history.color, bbox.min.color, bbox.max.color); +#endif + /* Clamp CoC to reduce convergence time. Otherwise the result is laggy. */ + history.coc = clamp(history.coc, bbox.min.coc, bbox.max.coc); + + return history; +} + +float dof_history_blend_factor( + float velocity, vec2 texel, DofNeighborhoodMinMax bbox, DofSample src, DofSample dst) +{ + float luma_min = bbox.min.color.x; + float luma_max = bbox.max.color.x; + float luma_incoming = src.color.x; + float luma_history = dst.color.x; + + /* 5% of incoming color by default. */ + float blend = 0.05; + /* Blend less history if the pixel has substantial velocity. */ + /* NOTE(fclem): velocity threshold multiplied by 2 because of half resolution. */ + blend = mix(blend, 0.20, saturate(velocity * 0.02 * 2.0)); + /** + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43) + * Bias towards history if incoming pixel is near clamping. Reduces flicker. + */ + float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history)); + /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */ + distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min); + /* Linearly blend when history gets below to 25% of the bbox size. */ + blend *= saturate(distance_to_luma_clip * 4.0 + 0.1); + /* Progressively discard history until history CoC is twice as big as the filtered CoC. + * Note we use absolute diff here because we are not comparing neighbors and thus do not risk to + * dilate thin features like hair (slide 19). */ + float coc_diff_ratio = saturate(abs(src.coc - dst.coc) / max(1.0, abs(src.coc))); + blend = mix(blend, 1.0, coc_diff_ratio); + /* Discard out of view history. */ + if (any(lessThan(texel, vec2(0))) || + any(greaterThanEqual(texel, vec2(imageSize(out_history_img))))) { + blend = 1.0; + } + /* Discard history if invalid. */ + if (use_history == false) { + blend = 1.0; + } + return blend; +} + +void main() +{ + dof_cache_init(); + + ivec2 src_texel = ivec2(gl_GlobalInvocationID.xy); + + /** + * Naming convention is taken from the film implementation. + * SRC is incoming new data. + * DST is history data. + */ + DofSample src = dof_spatial_filtering(); + + /* Reproject by finding where this pixel was in the previous frame. */ + vec2 motion = dof_pixel_history_motion_vector(src_texel); + vec2 history_texel = vec2(src_texel) + motion; + + float velocity = length(motion); + + DofSample dst = dof_sample_history(history_texel); + + /* Get local color bounding box of source neighborhood. */ + DofNeighborhoodMinMax bbox = dof_neighbor_boundbox(); + + float blend = dof_history_blend_factor(velocity, history_texel, bbox, src, dst); + + dst = dof_amend_history(bbox, dst, src); + + /* Luma weighted blend to reduce flickering. */ + float weight_dst = dof_luma_weight(dst.color.x) * (1.0 - blend); + float weight_src = dof_luma_weight(src.color.x) * (blend); + + DofSample result; + /* Weighted blend. */ + result.color = vec4(dst.color.rgb, dst.coc) * weight_dst + + vec4(src.color.rgb, src.coc) * weight_src; + result.color /= weight_src + weight_dst; + + /* Save history for next iteration. Still in YCoCg space with CoC in alpha. */ + imageStore(out_history_img, src_texel, result.color); + + /* Un-swizzle. */ + result.coc = result.color.a; + /* Clamp opacity since we don't store it in history. */ + result.color.a = clamp(src.color.a, bbox.min.color.a, bbox.max.color.a); + + result.color = colorspace_scene_linear_from_YCoCg(result.color); + + imageStore(out_color_img, src_texel, result.color); + imageStore(out_coc_img, src_texel, vec4(result.coc)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl new file mode 100644 index 00000000000..dba8b5fd79d --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl @@ -0,0 +1,97 @@ + +/** + * Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to + * their neighborhood. This pass is repeated multiple time until the maximum CoC can be covered. + * + * Input & Output: + * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* Error introduced by the random offset of the gathering kernel's center. */ +const float bluring_radius_error = 1.0 + 1.0 / (float(DOF_GATHER_RING_COUNT) + 0.5); +const float tile_to_fullres_factor = float(DOF_TILES_SIZE * 2); + +void main() +{ + ivec2 center_tile_pos = ivec2(gl_GlobalInvocationID.xy); + + CocTile ring_buckets[DOF_DILATE_RING_COUNT]; + + for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) { + ring_buckets[ring] = dof_coc_tile_init(); + + int ring_distance = ring + 1; + for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) { + ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id); + + offset *= ring_width_multiplier; + + for (int i = 0; i < 2; i++) { + ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset); + + CocTile adj_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, adj_tile_pos); + + if (DILATE_MODE_MIN_MAX) { + /* Actually gather the "absolute" biggest coc but keeping the sign. */ + ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc); + ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc); + } + else { /* DILATE_MODE_MIN_ABS */ + ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc); + ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc); + + /* Should be tight as possible to reduce gather overhead (see slide 61). */ + float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) * + tile_to_fullres_factor; + + ring_buckets[ring].fg_max_intersectable_coc = max( + ring_buckets[ring].fg_max_intersectable_coc, + adj_tile.fg_max_intersectable_coc + closest_neighbor_distance); + ring_buckets[ring].bg_min_intersectable_coc = min( + ring_buckets[ring].bg_min_intersectable_coc, + adj_tile.bg_min_intersectable_coc + closest_neighbor_distance); + } + } + } + } + + /* Load center tile. */ + CocTile out_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, center_tile_pos); + + for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) { + float ring_distance = float(ring + 1); + + ring_distance = (ring_distance * ring_width_multiplier - 1) * tile_to_fullres_factor; + + if (DILATE_MODE_MIN_MAX) { + /* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */ + if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc); + } + + if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc); + } + } + else { /* DILATE_MODE_MIN_ABS */ + /* Find minimum absolute CoC radii that will be intersected for the previously + * computed maximum CoC values. */ + if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc); + out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc, + ring_buckets[ring].fg_max_intersectable_coc); + } + + if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc); + out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc, + ring_buckets[ring].bg_min_intersectable_coc); + } + } + } + + ivec2 texel_out = ivec2(gl_GlobalInvocationID.xy); + dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, texel_out, out_tile); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl new file mode 100644 index 00000000000..88737ade386 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl @@ -0,0 +1,78 @@ + +/** + * Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles. + * + * Output min and max values for each tile and for both foreground & background. + * Also outputs min intersectable CoC for the background, which is the minimum CoC + * that comes from the background pixels. + * + * Input: + * - Half-resolution Circle of confusion. Out of setup pass. + * Output: + * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/** + * In order to use atomic operations, we have to use uints. But this means having to deal with the + * negative number ourselves. Luckily, each ground have a nicely defined range of values we can + * remap to positive float. + */ +shared uint fg_min_coc; +shared uint fg_max_coc; +shared uint fg_max_intersectable_coc; +shared uint bg_min_coc; +shared uint bg_max_coc; +shared uint bg_min_intersectable_coc; + +const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc); + +void main() +{ + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + /* NOTE: Min/Max flipped because of inverted fg_coc sign. */ + fg_min_coc = floatBitsToUint(0.0); + fg_max_coc = dof_tile_large_coc_uint; + fg_max_intersectable_coc = dof_tile_large_coc_uint; + bg_min_coc = dof_tile_large_coc_uint; + bg_max_coc = floatBitsToUint(0.0); + bg_min_intersectable_coc = dof_tile_large_coc_uint; + } + barrier(); + + ivec2 sample_texel = min(ivec2(gl_GlobalInvocationID.xy), textureSize(coc_tx, 0).xy - 1); + vec2 sample_data = texelFetch(coc_tx, sample_texel, 0).rg; + + float sample_coc = sample_data.x; + uint fg_coc = floatBitsToUint(max(-sample_coc, 0.0)); + /* NOTE: atomicMin/Max flipped because of inverted fg_coc sign. */ + atomicMax(fg_min_coc, fg_coc); + atomicMin(fg_max_coc, fg_coc); + atomicMin(fg_max_intersectable_coc, (sample_coc < 0.0) ? fg_coc : dof_tile_large_coc_uint); + + uint bg_coc = floatBitsToUint(max(sample_coc, 0.0)); + atomicMin(bg_min_coc, bg_coc); + atomicMax(bg_max_coc, bg_coc); + atomicMin(bg_min_intersectable_coc, (sample_coc > 0.0) ? bg_coc : dof_tile_large_coc_uint); + + barrier(); + + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + if (fg_max_intersectable_coc == dof_tile_large_coc_uint) { + fg_max_intersectable_coc = floatBitsToUint(0.0); + } + + CocTile tile; + /* Foreground sign is flipped since we compare unsigned representation. */ + tile.fg_min_coc = -uintBitsToFloat(fg_min_coc); + tile.fg_max_coc = -uintBitsToFloat(fg_max_coc); + tile.fg_max_intersectable_coc = -uintBitsToFloat(fg_max_intersectable_coc); + tile.bg_min_coc = uintBitsToFloat(bg_min_coc); + tile.bg_max_coc = uintBitsToFloat(bg_max_coc); + tile.bg_min_intersectable_coc = uintBitsToFloat(bg_min_intersectable_coc); + + ivec2 tile_co = ivec2(gl_WorkGroupID.xy); + dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, tile_co, tile); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl new file mode 100644 index 00000000000..ce1f19edf53 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_comp.glsl @@ -0,0 +1,13 @@ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_film_lib.glsl) + +void main() +{ + ivec2 texel_film = ivec2(gl_GlobalInvocationID.xy); + /* Not used. */ + vec4 out_color; + float out_depth; + + film_process_data(texel_film, out_color, out_depth); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl new file mode 100644 index 00000000000..120edd9c35e --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl @@ -0,0 +1,77 @@ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +#define CRYPTOMATTE_LEVELS_MAX 16 + +void cryptomatte_load_samples(ivec2 texel, int layer, out vec2 samples[CRYPTOMATTE_LEVELS_MAX]) +{ + int pass_len = divide_ceil(cryptomatte_samples_per_layer, 2); + int layer_id = layer * pass_len; + + /* Read all samples from the cryptomatte layer. */ + for (int p = 0; p < pass_len; p++) { + vec4 pass_sample = imageLoad(cryptomatte_img, ivec3(texel, p + layer_id)); + samples[p * 2] = pass_sample.xy; + samples[p * 2 + 1] = pass_sample.zw; + } + for (int i = pass_len * 2; i < CRYPTOMATTE_LEVELS_MAX; i++) { + samples[i] = vec2(0.0); + } +} + +void cryptomatte_sort_samples(inout vec2 samples[CRYPTOMATTE_LEVELS_MAX]) +{ + /* Sort samples. Lame implementation, can be replaced with a more efficient algorithm. */ + for (int i = 0; i < cryptomatte_samples_per_layer - 1 && samples[i].y != 0.0; i++) { + int highest_index = i; + float highest_weight = samples[i].y; + for (int j = i + 1; j < cryptomatte_samples_per_layer && samples[j].y != 0.0; j++) { + if (samples[j].y > highest_weight) { + highest_index = j; + highest_weight = samples[j].y; + } + }; + + if (highest_index != i) { + vec2 tmp = samples[i]; + samples[i] = samples[highest_index]; + samples[highest_index] = tmp; + } + } +} +void cryptomatte_normalize_weight(float total_weight, inout vec2 samples[CRYPTOMATTE_LEVELS_MAX]) +{ + for (int i = 0; i < CRYPTOMATTE_LEVELS_MAX; i++) { + samples[i].y /= total_weight; + } +} + +void cryptomatte_store_samples(ivec2 texel, int layer, in vec2 samples[CRYPTOMATTE_LEVELS_MAX]) +{ + int pass_len = divide_ceil(cryptomatte_samples_per_layer, 2); + int layer_id = layer * pass_len; + + /* Store samples back to the cryptomatte layer. */ + for (int p = 0; p < pass_len; p++) { + vec4 pass_sample; + pass_sample.xy = samples[p * 2]; + pass_sample.zw = samples[p * 2 + 1]; + imageStore(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample); + } +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + for (int layer = 0; layer < cryptomatte_layer_len; layer++) { + vec2 samples[CRYPTOMATTE_LEVELS_MAX]; + cryptomatte_load_samples(texel, layer, samples); + cryptomatte_sort_samples(samples); + /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */ + float weight = imageLoad( + weight_img, + ivec3(texel % imageSize(weight_img).xy, FILM_WEIGHT_LAYER_ACCUMULATION)) + .x; + cryptomatte_normalize_weight(weight, samples); + cryptomatte_store_samples(texel, layer, samples); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl new file mode 100644 index 00000000000..e2aaf9128a5 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_frag.glsl @@ -0,0 +1,35 @@ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_film_lib.glsl) + +void main() +{ + ivec2 texel_film = ivec2(gl_FragCoord.xy) - film_buf.offset; + float out_depth; + + if (film_buf.display_only) { + out_depth = imageLoad(depth_img, texel_film).r; + + if (film_buf.display_id == -1) { + out_color = texelFetch(in_combined_tx, texel_film, 0); + } + else if (film_buf.display_storage_type == PASS_STORAGE_VALUE) { + out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, film_buf.display_id)).rrr; + out_color.a = 1.0; + } + else if (film_buf.display_storage_type == PASS_STORAGE_COLOR) { + out_color = imageLoad(color_accum_img, ivec3(texel_film, film_buf.display_id)); + } + else /* PASS_STORAGE_CRYPTOMATTE */ { + out_color = cryptomatte_false_color( + imageLoad(cryptomatte_img, ivec3(texel_film, film_buf.display_id)).r); + } + } + else { + film_process_data(texel_film, out_color, out_depth); + } + + gl_FragDepth = get_depth_from_view_z(-out_depth); + + gl_FragDepth = film_display_depth_ammend(texel_film, gl_FragDepth); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl new file mode 100644 index 00000000000..21b9a83abb9 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl @@ -0,0 +1,755 @@ + +/** + * Film accumulation utils functions. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_camera_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_colorspace_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_cryptomatte_lib.glsl) + +/* Return scene linear Z depth from the camera or radial depth for panoramic cameras. */ +float film_depth_convert_to_scene(float depth) +{ + if (false /* Panoramic */) { + /* TODO */ + return 1.0; + } + return abs(get_view_z_from_depth(depth)); +} + +/* Load a texture sample in a specific format. Combined pass needs to use this. */ +vec4 film_texelfetch_as_YCoCg_opacity(sampler2D tx, ivec2 texel) +{ + vec4 color = texelFetch(combined_tx, texel, 0); + /* Convert transmittance to opacity. */ + color.a = saturate(1.0 - color.a); + /* Transform to YCoCg for accumulation. */ + color.rgb = colorspace_YCoCg_from_scene_linear(color.rgb); + return color; +} + +/* Returns a weight based on Luma to reduce the flickering introduced by high energy pixels. */ +float film_luma_weight(float luma) +{ + /* Slide 20 of "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014. */ + /* To preserve more details in dark areas, we use a bigger bias. */ + return 1.0 / (4.0 + luma * film_buf.exposure_scale); +} + +/* -------------------------------------------------------------------- */ +/** \name Filter + * \{ */ + +FilmSample film_sample_get(int sample_n, ivec2 texel_film) +{ +#ifdef PANORAMIC + /* TODO(fclem): Panoramic projection will be more complex. The samples will have to be retrieve + * at runtime, maybe by scanning a whole region. Offset and weight will have to be computed by + * reprojecting the incoming pixel data into film pixel space. */ +#else + +# ifdef SCALED_RENDERING + texel_film /= film_buf.scaling_factor; +# endif + + FilmSample film_sample = film_buf.samples[sample_n]; + film_sample.texel += texel_film + film_buf.offset; + /* Use extend on borders. */ + film_sample.texel = clamp(film_sample.texel, ivec2(0, 0), film_buf.render_extent - 1); + + /* TODO(fclem): Panoramic projection will need to compute the sample weight in the shader + * instead of precomputing it on CPU. */ +# ifdef SCALED_RENDERING + /* We need to compute the real distance and weight since a sample + * can be used by many final pixel. */ + vec2 offset = film_buf.subpixel_offset - vec2(texel_film % film_buf.scaling_factor); + film_sample.weight = film_filter_weight(film_buf.filter_size, len_squared(offset)); +# endif + +#endif /* PANORAMIC */ + + /* Always return a weight above 0 to avoid blind spots between samples. */ + film_sample.weight = max(film_sample.weight, 1e-6); + + return film_sample; +} + +/* Returns the combined weights of all samples affecting this film pixel. */ +float film_weight_accumulation(ivec2 texel_film) +{ +#if 0 /* TODO(fclem): Reference implementation, also needed for panoramic cameras. */ + float weight = 0.0; + for (int i = 0; i < film_buf.samples_len; i++) { + weight += film_sample_get(i, texel_film).weight; + } + return weight; +#endif + return film_buf.samples_weight_total; +} + +void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout vec4 accum) +{ + if (pass_id == -1) { + return; + } + accum += texelFetch(tex, samp.texel, 0) * samp.weight; +} + +void film_sample_accum(FilmSample samp, int pass_id, sampler2D tex, inout float accum) +{ + if (pass_id == -1) { + return; + } + accum += texelFetch(tex, samp.texel, 0).x * samp.weight; +} + +void film_sample_accum( + FilmSample samp, int pass_id, uint layer, sampler2DArray tex, inout vec4 accum) +{ + if (pass_id == -1) { + return; + } + accum += texelFetch(tex, ivec3(samp.texel, layer), 0) * samp.weight; +} + +void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout vec4 accum) +{ + film_sample_accum(samp, pass_id, pass_id, tex, accum); +} + +void film_sample_accum(FilmSample samp, int pass_id, sampler2DArray tex, inout float accum) +{ + if (pass_id == -1) { + return; + } + accum += texelFetch(tex, ivec3(samp.texel, pass_id), 0).x * samp.weight; +} + +void film_sample_accum_mist(FilmSample samp, inout float accum) +{ + if (film_buf.mist_id == -1) { + return; + } + float depth = texelFetch(depth_tx, samp.texel, 0).x; + vec2 uv = (vec2(samp.texel) + 0.5) / textureSize(depth_tx, 0).xy; + vec3 vP = get_view_space_from_depth(uv, depth); + bool is_persp = ProjectionMatrix[3][3] == 0.0; + float mist = (is_persp) ? length(vP) : abs(vP.z); + /* Remap to 0..1 range. */ + mist = saturate(mist * film_buf.mist_scale + film_buf.mist_bias); + /* Falloff. */ + mist = pow(mist, film_buf.mist_exponent); + accum += mist * samp.weight; +} + +void film_sample_accum_combined(FilmSample samp, inout vec4 accum, inout float weight_accum) +{ + if (film_buf.combined_id == -1) { + return; + } + vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, samp.texel); + + /* Weight by luma to remove fireflies. */ + float weight = film_luma_weight(color.x) * samp.weight; + + accum += color * weight; + weight_accum += weight; +} + +void film_sample_cryptomatte_accum(FilmSample samp, + int layer, + sampler2D tex, + inout vec2 crypto_samples[4]) +{ + float hash = texelFetch(tex, samp.texel, 0)[layer]; + /* Find existing entry. */ + for (int i = 0; i < 4; i++) { + if (crypto_samples[i].x == hash) { + crypto_samples[i].y += samp.weight; + return; + } + } + /* Overwrite entry with less weight. */ + for (int i = 0; i < 4; i++) { + if (crypto_samples[i].y < samp.weight) { + crypto_samples[i] = vec2(hash, samp.weight); + return; + } + } +} + +void film_cryptomatte_layer_accum_and_store( + FilmSample dst, ivec2 texel_film, int pass_id, int layer_component, inout vec4 out_color) +{ + if (pass_id == -1) { + return; + } + /* x = hash, y = accumed weight. Only keep track of 4 highest weighted samples. */ + vec2 crypto_samples[4] = vec2[4](vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0)); + for (int i = 0; i < film_buf.samples_len; i++) { + FilmSample src = film_sample_get(i, texel_film); + film_sample_cryptomatte_accum(src, layer_component, cryptomatte_tx, crypto_samples); + } + for (int i = 0; i < 4; i++) { + cryptomatte_store_film_sample(dst, pass_id, crypto_samples[i], out_color); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Load/Store Data + * \{ */ + +/* Returns the distance used to store nearest interpolation data. */ +float film_distance_load(ivec2 texel) +{ + /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */ + texel = texel % imageSize(in_weight_img).xy; + + if (!film_buf.use_history || film_buf.use_reprojection) { + return 1.0e16; + } + return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x; +} + +float film_weight_load(ivec2 texel) +{ + /* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */ + texel = texel % imageSize(in_weight_img).xy; + + if (!film_buf.use_history || film_buf.use_reprojection) { + return 0.0; + } + return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x; +} + +/* Returns motion in pixel space to retrieve the pixel history. */ +vec2 film_pixel_history_motion_vector(ivec2 texel_sample) +{ + /** + * Dilate velocity by using the nearest pixel in a cross pattern. + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 27) + */ + const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); + float min_depth = texelFetch(depth_tx, texel_sample, 0).x; + ivec2 nearest_texel = texel_sample; + for (int i = 0; i < 4; i++) { + ivec2 texel = clamp(texel_sample + corners[i], ivec2(0), textureSize(depth_tx, 0).xy - 1); + float depth = texelFetch(depth_tx, texel, 0).x; + if (min_depth > depth) { + min_depth = depth; + nearest_texel = texel; + } + } + + vec4 vector = velocity_resolve(vector_tx, nearest_texel, min_depth); + + /* Transform to pixel space. */ + vector.xy *= vec2(film_buf.extent); + + return vector.xy; +} + +/* \a t is inter-pixel position. 0 means perfectly on a pixel center. + * Returns weights in both dimensions. + * Multiply each dimension weights to get final pixel weights. */ +void film_get_catmull_rom_weights(vec2 t, out vec2 weights[4]) +{ + vec2 t2 = t * t; + vec2 t3 = t2 * t; + float fc = 0.5; /* Catmull-Rom. */ + + vec2 fct = t * fc; + vec2 fct2 = t2 * fc; + vec2 fct3 = t3 * fc; + weights[0] = (fct2 * 2.0 - fct3) - fct; + weights[1] = (t3 * 2.0 - fct3) + (-t2 * 3.0 + fct2) + 1.0; + weights[2] = (-t3 * 2.0 + fct3) + (t2 * 3.0 - (2.0 * fct2)) + fct; + weights[3] = fct3 - fct2; +} + +/* Load color using a special filter to avoid losing detail. + * \a texel is sample position with subpixel accuracy. */ +vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel) +{ + vec2 center_texel; + vec2 inter_texel = modf(input_texel, center_texel); + vec2 weights[4]; + film_get_catmull_rom_weights(inter_texel, weights); + +#if 0 /* Reference. 16 Taps. */ + vec4 color = vec4(0.0); + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + ivec2 texel = ivec2(center_texel) + ivec2(x, y) - 1; + texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1); + color += texelFetch(color_tx, texel, 0) * weights[x].x * weights[y].y; + } + } + return color; + +#elif 1 /* Optimize version. 5 Bilinear Taps. */ + /** + * Use optimized version by leveraging bilinear filtering from hardware sampler and by removing + * corner taps. + * From "Filmic SMAA" by Jorge Jimenez at Siggraph 2016 + * http://advances.realtimerendering.com/s2016/Filmic%20SMAA%20v7.pptx + */ + center_texel += 0.5; + + /* Slide 92. */ + vec2 weight_12 = weights[1] + weights[2]; + vec2 uv_12 = (center_texel + weights[2] / weight_12) * film_buf.extent_inv; + vec2 uv_0 = (center_texel - 1.0) * film_buf.extent_inv; + vec2 uv_3 = (center_texel + 2.0) * film_buf.extent_inv; + + vec4 color; + vec4 weight_cross = weight_12.xyyx * vec4(weights[0].yx, weights[3].xy); + float weight_center = weight_12.x * weight_12.y; + + color = textureLod(color_tx, uv_12, 0.0) * weight_center; + color += textureLod(color_tx, vec2(uv_12.x, uv_0.y), 0.0) * weight_cross.x; + color += textureLod(color_tx, vec2(uv_0.x, uv_12.y), 0.0) * weight_cross.y; + color += textureLod(color_tx, vec2(uv_3.x, uv_12.y), 0.0) * weight_cross.z; + color += textureLod(color_tx, vec2(uv_12.x, uv_3.y), 0.0) * weight_cross.w; + /* Re-normalize for the removed corners. */ + return color / (weight_center + sum(weight_cross)); + +#else /* Nearest interpolation for debugging. 1 Tap. */ + ivec2 texel = ivec2(center_texel) + ivec2(greaterThan(inter_texel, vec2(0.5))); + texel = clamp(texel, ivec2(0), textureSize(color_tx, 0).xy - 1); + return texelFetch(color_tx, texel, 0); +#endif +} + +/* Return history clipping bounding box in YCoCg color space. */ +void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c) +{ + /* Plus (+) shape offsets. */ + const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */ + ivec2(-1, 0), + ivec2(0, -1), + ivec2(1, 0), + ivec2(0, 1)); +#if 0 + /** + * Compute Variance of neighborhood as described in: + * "An Excursion in Temporal Supersampling" by Marco Salvi at GDC 2016. + * and: + * "A Survey of Temporal Antialiasing Techniques" by Yang et al. + */ + + /* First 2 moments. */ + vec4 mu1 = vec4(0), mu2 = vec4(0); + for (int i = 0; i < 5; i++) { + vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]); + mu1 += color; + mu2 += sqr(color); + } + mu1 *= (1.0 / 5.0); + mu2 *= (1.0 / 5.0); + + /* Extent scaling. Range [0.75..1.25]. + * Balance between more flickering (0.75) or more ghosting (1.25). */ + const float gamma = 1.25; + /* Standard deviation. */ + vec4 sigma = sqrt(abs(mu2 - sqr(mu1))); + /* eq. 6 in "A Survey of Temporal Antialiasing Techniques". */ + min_c = mu1 - gamma * sigma; + max_c = mu1 + gamma * sigma; +#else + /** + * Simple bounding box calculation in YCoCg as described in: + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 + */ + min_c = vec4(1e16); + max_c = vec4(-1e16); + for (int i = 0; i < 5; i++) { + vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + plus_offsets[i]); + min_c = min(min_c, color); + max_c = max(max_c, color); + } + /* (Slide 32) Simple clamp to min/max of 8 neighbors results in 3x3 box artifacts. + * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */ + vec4 min_c_3x3 = min_c; + vec4 max_c_3x3 = max_c; + const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); + for (int i = 0; i < 4; i++) { + vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]); + min_c_3x3 = min(min_c_3x3, color); + max_c_3x3 = max(max_c_3x3, color); + } + min_c = (min_c + min_c_3x3) * 0.5; + max_c = (max_c + max_c_3x3) * 0.5; +#endif +} + +/* 1D equivalent of line_aabb_clipping_dist(). */ +float film_aabb_clipping_dist_alpha(float origin, float direction, float aabb_min, float aabb_max) +{ + if (abs(direction) < 1e-5) { + return 0.0; + } + float nearest_plane = (direction > 0.0) ? aabb_min : aabb_max; + return (nearest_plane - origin) / direction; +} + +/* Modulate the history color to avoid ghosting artifact. */ +vec4 film_amend_combined_history( + vec4 min_color, vec4 max_color, vec4 color_history, vec4 src_color, ivec2 src_texel) +{ + /* Clip instead of clamping to avoid color accumulating in the AABB corners. */ + vec4 clip_dir = src_color - color_history; + + float t = line_aabb_clipping_dist(color_history.rgb, clip_dir.rgb, min_color.rgb, max_color.rgb); + color_history.rgb += clip_dir.rgb * saturate(t); + + /* Clip alpha on its own to avoid interference with other channels. */ + float t_a = film_aabb_clipping_dist_alpha(color_history.a, clip_dir.a, min_color.a, max_color.a); + color_history.a += clip_dir.a * saturate(t_a); + + return color_history; +} + +float film_history_blend_factor(float velocity, + vec2 texel, + float luma_min, + float luma_max, + float luma_incoming, + float luma_history) +{ + /* 5% of incoming color by default. */ + float blend = 0.05; + /* Blend less history if the pixel has substantial velocity. */ + blend = mix(blend, 0.20, saturate(velocity * 0.02)); + /** + * "High Quality Temporal Supersampling" by Brian Karis at Siggraph 2014 (Slide 43) + * Bias towards history if incoming pixel is near clamping. Reduces flicker. + */ + float distance_to_luma_clip = min_v2(vec2(luma_history - luma_min, luma_max - luma_history)); + /* Divide by bbox size to get a factor. 2 factor to compensate the line above. */ + distance_to_luma_clip *= 2.0 * safe_rcp(luma_max - luma_min); + /* Linearly blend when history gets below to 25% of the bbox size. */ + blend *= saturate(distance_to_luma_clip * 4.0 + 0.1); + /* Discard out of view history. */ + if (any(lessThan(texel, vec2(0))) || any(greaterThanEqual(texel, film_buf.extent))) { + blend = 1.0; + } + /* Discard history if invalid. */ + if (film_buf.use_history == false) { + blend = 1.0; + } + return blend; +} + +/* Returns resolved final color. */ +void film_store_combined( + FilmSample dst, ivec2 src_texel, vec4 color, float color_weight, inout vec4 display) +{ + if (film_buf.combined_id == -1) { + return; + } + + vec4 color_src, color_dst; + float weight_src, weight_dst; + + /* Undo the weighting to get final spatialy-filtered color. */ + color_src = color / color_weight; + + if (film_buf.use_reprojection) { + /* Interactive accumulation. Do reprojection and Temporal Anti-Aliasing. */ + + /* Reproject by finding where this pixel was in the previous frame. */ + vec2 motion = film_pixel_history_motion_vector(src_texel); + vec2 history_texel = vec2(dst.texel) + motion; + + float velocity = length(motion); + + /* Load weight if it is not uniform across the whole buffer (i.e: upsampling, panoramic). */ + // dst.weight = film_weight_load(texel_combined); + + color_dst = film_sample_catmull_rom(in_combined_tx, history_texel); + color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb); + + /* Get local color bounding box of source neighborhood. */ + vec4 min_color, max_color; + film_combined_neighbor_boundbox(src_texel, min_color, max_color); + + float blend = film_history_blend_factor( + velocity, history_texel, min_color.x, max_color.x, color_src.x, color_dst.x); + + color_dst = film_amend_combined_history(min_color, max_color, color_dst, color_src, src_texel); + + /* Luma weighted blend to avoid flickering. */ + weight_dst = film_luma_weight(color_dst.x) * (1.0 - blend); + weight_src = film_luma_weight(color_src.x) * (blend); + } + else { + /* Everything is static. Use render accumulation. */ + color_dst = texelFetch(in_combined_tx, dst.texel, 0); + color_dst.rgb = colorspace_YCoCg_from_scene_linear(color_dst.rgb); + + /* Luma weighted blend to avoid flickering. */ + weight_dst = film_luma_weight(color_dst.x) * dst.weight; + weight_src = color_weight; + } + /* Weighted blend. */ + color = color_dst * weight_dst + color_src * weight_src; + color /= weight_src + weight_dst; + + color.rgb = colorspace_scene_linear_from_YCoCg(color.rgb); + + /* Fix alpha not accumulating to 1 because of float imprecision. */ + if (color.a > 0.995) { + color.a = 1.0; + } + + /* Filter NaNs. */ + if (any(isnan(color))) { + color = vec4(0.0, 0.0, 0.0, 1.0); + } + + if (film_buf.display_id == -1) { + display = color; + } + imageStore(out_combined_img, dst.texel, color); +} + +void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display) +{ + if (pass_id == -1) { + return; + } + + vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id)); + + color = (data_film * dst.weight + color) * dst.weight_sum_inv; + + /* Filter NaNs. */ + if (any(isnan(color))) { + color = vec4(0.0, 0.0, 0.0, 1.0); + } + + if (film_buf.display_id == pass_id) { + display = color; + } + imageStore(color_accum_img, ivec3(dst.texel, pass_id), color); +} + +void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display) +{ + if (pass_id == -1) { + return; + } + + float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x; + + value = (data_film * dst.weight + value) * dst.weight_sum_inv; + + /* Filter NaNs. */ + if (isnan(value)) { + value = 0.0; + } + + if (film_buf.display_id == pass_id) { + display = vec4(value, value, value, 1.0); + } + imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value)); +} + +/* Nearest sample variant. Always stores the data. */ +void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4 display) +{ + if (pass_id == -1) { + return; + } + + if (film_buf.display_id == pass_id) { + display = data_sample; + } + imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample); +} + +void film_store_depth(ivec2 texel_film, float value, out float out_depth) +{ + if (film_buf.depth_id == -1) { + return; + } + + out_depth = film_depth_convert_to_scene(value); + + imageStore(depth_img, texel_film, vec4(out_depth)); +} + +void film_store_distance(ivec2 texel, float value) +{ + imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value)); +} + +void film_store_weight(ivec2 texel, float value) +{ + imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value)); +} + +float film_display_depth_ammend(ivec2 texel, float depth) +{ + /* This effectively offsets the depth of the whole 2x2 region to the lowest value of the region + * twice. One for X and one for Y direction. */ + /* TODO(fclem): This could be improved as it gives flickering result at depth discontinuity. + * But this is the quickest stable result I could come with for now. */ +#ifdef GPU_FRAGMENT_SHADER + depth += fwidth(depth); +#endif + /* Small offset to avoid depth test lessEqual failing because of all the conversions loss. */ + depth += 2.4e-7 * 4.0; + return saturate(depth); +} + +/** \} */ + +/** NOTE: out_depth is scene linear depth from the camera origin. */ +void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth) +{ + out_color = vec4(0.0); + out_depth = 0.0; + + float weight_accum = film_weight_accumulation(texel_film); + float film_weight = film_weight_load(texel_film); + float weight_sum = film_weight + weight_accum; + film_store_weight(texel_film, weight_sum); + + FilmSample dst; + dst.texel = texel_film; + dst.weight = film_weight; + dst.weight_sum_inv = 1.0 / weight_sum; + + /* NOTE: We split the accumulations into separate loops to avoid using too much registers and + * maximize occupancy. */ + + if (film_buf.combined_id != -1) { + /* NOTE: Do weight accumulation again since we use custom weights. */ + float weight_accum = 0.0; + vec4 combined_accum = vec4(0.0); + + FilmSample src; + for (int i = film_buf.samples_len - 1; i >= 0; i--) { + src = film_sample_get(i, texel_film); + film_sample_accum_combined(src, combined_accum, weight_accum); + } + /* NOTE: src.texel is center texel in incoming data buffer. */ + film_store_combined(dst, src.texel, combined_accum, weight_accum, out_color); + } + + if (film_buf.has_data) { + float film_distance = film_distance_load(texel_film); + + /* Get sample closest to target texel. It is always sample 0. */ + FilmSample film_sample = film_sample_get(0, texel_film); + + if (film_buf.use_reprojection || film_sample.weight < film_distance) { + vec4 normal = texelFetch(normal_tx, film_sample.texel, 0); + float depth = texelFetch(depth_tx, film_sample.texel, 0).x; + vec4 vector = velocity_resolve(vector_tx, film_sample.texel, depth); + /* Transform to pixel space. */ + vector *= vec4(film_buf.render_extent, -film_buf.render_extent); + + film_store_depth(texel_film, depth, out_depth); + film_store_data(texel_film, film_buf.normal_id, normal, out_color); + film_store_data(texel_film, film_buf.vector_id, vector, out_color); + film_store_distance(texel_film, film_sample.weight); + } + else { + out_depth = imageLoad(depth_img, texel_film).r; + } + } + + if (film_buf.any_render_pass_1) { + vec4 diffuse_light_accum = vec4(0.0); + vec4 specular_light_accum = vec4(0.0); + vec4 volume_light_accum = vec4(0.0); + vec4 emission_accum = vec4(0.0); + + for (int i = 0; i < film_buf.samples_len; i++) { + FilmSample src = film_sample_get(i, texel_film); + film_sample_accum(src, + film_buf.diffuse_light_id, + RENDER_PASS_LAYER_DIFFUSE_LIGHT, + light_tx, + diffuse_light_accum); + film_sample_accum(src, + film_buf.specular_light_id, + RENDER_PASS_LAYER_SPECULAR_LIGHT, + light_tx, + specular_light_accum); + film_sample_accum(src, film_buf.volume_light_id, volume_light_tx, volume_light_accum); + film_sample_accum(src, film_buf.emission_id, emission_tx, emission_accum); + } + film_store_color(dst, film_buf.diffuse_light_id, diffuse_light_accum, out_color); + film_store_color(dst, film_buf.specular_light_id, specular_light_accum, out_color); + film_store_color(dst, film_buf.volume_light_id, volume_light_accum, out_color); + film_store_color(dst, film_buf.emission_id, emission_accum, out_color); + } + + if (film_buf.any_render_pass_2) { + vec4 diffuse_color_accum = vec4(0.0); + vec4 specular_color_accum = vec4(0.0); + vec4 environment_accum = vec4(0.0); + float mist_accum = 0.0; + float shadow_accum = 0.0; + float ao_accum = 0.0; + + for (int i = 0; i < film_buf.samples_len; i++) { + FilmSample src = film_sample_get(i, texel_film); + film_sample_accum(src, film_buf.diffuse_color_id, diffuse_color_tx, diffuse_color_accum); + film_sample_accum(src, film_buf.specular_color_id, specular_color_tx, specular_color_accum); + film_sample_accum(src, film_buf.environment_id, environment_tx, environment_accum); + film_sample_accum(src, film_buf.shadow_id, shadow_tx, shadow_accum); + film_sample_accum(src, film_buf.ambient_occlusion_id, ambient_occlusion_tx, ao_accum); + film_sample_accum_mist(src, mist_accum); + } + film_store_color(dst, film_buf.diffuse_color_id, diffuse_color_accum, out_color); + film_store_color(dst, film_buf.specular_color_id, specular_color_accum, out_color); + film_store_color(dst, film_buf.environment_id, environment_accum, out_color); + film_store_value(dst, film_buf.shadow_id, shadow_accum, out_color); + film_store_value(dst, film_buf.ambient_occlusion_id, ao_accum, out_color); + film_store_value(dst, film_buf.mist_id, mist_accum, out_color); + } + + for (int aov = 0; aov < film_buf.aov_color_len; aov++) { + vec4 aov_accum = vec4(0.0); + + for (int i = 0; i < film_buf.samples_len; i++) { + FilmSample src = film_sample_get(i, texel_film); + film_sample_accum(src, aov, aov_color_tx, aov_accum); + } + film_store_color(dst, film_buf.aov_color_id + aov, aov_accum, out_color); + } + + for (int aov = 0; aov < film_buf.aov_value_len; aov++) { + float aov_accum = 0.0; + + for (int i = 0; i < film_buf.samples_len; i++) { + FilmSample src = film_sample_get(i, texel_film); + film_sample_accum(src, aov, aov_value_tx, aov_accum); + } + film_store_value(dst, film_buf.aov_value_id + aov, aov_accum, out_color); + } + + if (film_buf.cryptomatte_samples_len != 0) { + /* Cryptomatte passes cannot be cleared by a weighted store like other passes. */ + if (!film_buf.use_history || film_buf.use_reprojection) { + cryptomatte_clear_samples(dst); + } + + film_cryptomatte_layer_accum_and_store( + dst, texel_film, film_buf.cryptomatte_object_id, 0, out_color); + film_cryptomatte_layer_accum_and_store( + dst, texel_film, film_buf.cryptomatte_asset_id, 1, out_color); + film_cryptomatte_layer_accum_and_store( + dst, texel_film, film_buf.cryptomatte_material_id, 2, out_color); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl new file mode 100644 index 00000000000..e93d0f472fa --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl @@ -0,0 +1,24 @@ + +/** + * Debug hiz down sampling pass. + * Output red if above any max pixels, blue otherwise. + */ + +void main() +{ + ivec2 texel = ivec2(gl_FragCoord.xy); + + float depth0 = texelFetch(hiz_tx, texel, 0).r; + + vec4 color = vec4(0.1, 0.1, 1.0, 1.0); + for (int i = 1; i < HIZ_MIP_COUNT; i++) { + ivec2 lvl_texel = texel / ivec2(uvec2(1) << uint(i)); + lvl_texel = min(lvl_texel, textureSize(hiz_tx, i) - 1); + if (texelFetch(hiz_tx, lvl_texel, i).r < depth0) { + color = vec4(1.0, 0.1, 0.1, 1.0); + break; + } + } + out_debug_color_add = vec4(color.rgb, 0.0) * 0.2; + out_debug_color_mul = color; +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl new file mode 100644 index 00000000000..597bc73e2ad --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl @@ -0,0 +1,121 @@ + +/** + * Shader that down-sample depth buffer, creating a Hierarchical-Z buffer. + * Saves max value of each 2x2 texel in the mipmap above the one we are + * rendering to. Adapted from + * http://rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/ + * + * Major simplification has been made since we pad the buffer to always be + * bigger than input to avoid mipmapping misalignement. + * + * Start by copying the base level by quad loading the depth. + * Then each thread compute it's local depth for level 1. + * After that we use shared variables to do inter thread comunication and + * downsample to max level. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +shared float local_depths[gl_WorkGroupSize.y][gl_WorkGroupSize.x]; + +/* Load values from the previous lod level. */ +vec4 load_local_depths(ivec2 pixel) +{ + pixel *= 2; + return vec4(local_depths[pixel.y + 1][pixel.x + 0], + local_depths[pixel.y + 1][pixel.x + 1], + local_depths[pixel.y + 0][pixel.x + 1], + local_depths[pixel.y + 0][pixel.x + 0]); +} + +void store_local_depth(ivec2 pixel, float depth) +{ + local_depths[pixel.y][pixel.x] = depth; +} + +void main() +{ + ivec2 local_px = ivec2(gl_LocalInvocationID.xy); + /* Bottom left corner of the kernel. */ + ivec2 kernel_origin = ivec2(gl_WorkGroupSize.xy * gl_WorkGroupID.xy); + + /* Copy level 0. */ + ivec2 src_px = ivec2(kernel_origin + local_px) * 2; + vec2 samp_co = (vec2(src_px) + 0.5) / vec2(textureSize(depth_tx, 0)); + vec4 samp = textureGather(depth_tx, samp_co); + + if (update_mip_0) { + imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx); + imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy); + imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz); + imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww); + } + + /* Level 1. (No load) */ + float max_depth = max_v4(samp); + ivec2 dst_px = ivec2(kernel_origin + local_px); + imageStore(out_mip_1, dst_px, vec4(max_depth)); + store_local_depth(local_px, max_depth); + + /* Level 2-5. */ + bool active_thread; + int mask_shift = 1; + +#define downsample_level(out_mip__, lod_) \ + active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \ + barrier(); /* Wait for previous writes to finish. */ \ + if (active_thread) { \ + max_depth = max_v4(load_local_depths(local_px)); \ + dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \ + imageStore(out_mip__, dst_px, vec4(max_depth)); \ + } \ + barrier(); /* Wait for previous reads to finish. */ \ + if (active_thread) { \ + store_local_depth(local_px, max_depth); \ + } \ + mask_shift++; + + downsample_level(out_mip_2, 2); + downsample_level(out_mip_3, 3); + downsample_level(out_mip_4, 4); + downsample_level(out_mip_5, 5); + + /* Since we pad the destination texture, the mip size is equal to the dispatch size. */ + uint tile_count = uint(imageSize(out_mip_5).x * imageSize(out_mip_5).y); + /* Let the last tile handle the remaining LOD. */ + bool last_tile = atomicAdd(finished_tile_counter, 1u) + 1u < tile_count; + if (last_tile == false) { + return; + } + finished_tile_counter = 0u; + + ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u)); + ivec2 image_border = imageSize(out_mip_5) - 1; + for (int y = 0; y < iter.y; y++) { + for (int x = 0; x < iter.x; x++) { + /* Load result of the other work groups. */ + kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y); + src_px = ivec2(kernel_origin + local_px) * 2; + vec4 samp; + samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; + samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x; + samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x; + samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x; + /* Level 6. */ + float max_depth = max_v4(samp); + ivec2 dst_px = ivec2(kernel_origin + local_px); + imageStore(out_mip_6, dst_px, vec4(max_depth)); + store_local_depth(local_px, max_depth); + + mask_shift = 1; + + /* Level 7. */ + downsample_level(out_mip_7, 7); + + /* Limited by OpenGL maximum of 8 image slot. */ + // downsample_level(out_mip_8, 8); + // downsample_level(out_mip_9, 9); + // downsample_level(out_mip_10, 10); + } + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl new file mode 100644 index 00000000000..eefc024d0b8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl @@ -0,0 +1,54 @@ + +/** + * Debug Shader outputing a gradient of orange - white - blue to mark culling hotspots. + * Green pixels are error pixels that are missing lights from the culling pass (i.e: when culling + * pass is not conservative enough). + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_FragCoord.xy); + + float depth = texelFetch(hiz_tx, texel, 0).r; + float vP_z = get_view_z_from_depth(depth); + vec3 P = get_world_space_from_depth(uvcoordsvar.xy, depth); + + float light_count = 0.0; + uint light_cull = 0u; + vec2 px = gl_FragCoord.xy; + LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx) + { + LightData light = light_buf[l_idx]; + light_cull |= 1u << l_idx; + light_count += 1.0; + } + LIGHT_FOREACH_END + + uint light_nocull = 0u; + LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(light_cull_buf, l_idx) + { + LightData light = light_buf[l_idx]; + vec3 L; + float dist; + light_vector_get(light, P, L, dist); + if (light_attenuation(light, L, dist) > 0.0) { + light_nocull |= 1u << l_idx; + } + } + LIGHT_FOREACH_END + + vec4 color = vec4(heatmap_gradient(light_count / 4.0), 1.0); + + if ((light_cull & light_nocull) != light_nocull) { + /* ERROR. Some lights were culled incorrectly. */ + color = vec4(0.0, 1.0, 0.0, 1.0); + } + + out_debug_color_add = vec4(color.rgb, 0.0) * 0.2; + out_debug_color_mul = color; +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl new file mode 100644 index 00000000000..9c12b0e50e6 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl @@ -0,0 +1,62 @@ + +/** + * Select the visible items inside the active view and put them inside the sorting buffer. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) + +void main() +{ + uint l_idx = gl_GlobalInvocationID.x; + if (l_idx >= light_cull_buf.items_count) { + return; + } + + LightData light = in_light_buf[l_idx]; + + /* Do not select 0 power lights. */ + if (light.influence_radius_max < 1e-8) { + return; + } + + /* Sun lights are packed at the end of the array. Perform early copy. */ + if (light.type == LIGHT_SUN) { + /* NOTE: We know the index because sun lights are packed at the start of the input buffer. */ + out_light_buf[light_cull_buf.local_lights_len + l_idx] = light; + return; + } + + Sphere sphere; + switch (light.type) { + case LIGHT_SPOT: + /* Only for < ~170° Cone due to plane extraction precision. */ + if (light.spot_tan < 10.0) { + Pyramid pyramid = shape_pyramid_non_oblique( + light._position, + light._position - light._back * light.influence_radius_max, + light._right * light.influence_radius_max * light.spot_tan / light.spot_size_inv.x, + light._up * light.influence_radius_max * light.spot_tan / light.spot_size_inv.y); + if (!intersect_view(pyramid)) { + return; + } + } + case LIGHT_RECT: + case LIGHT_ELLIPSE: + case LIGHT_POINT: + sphere = Sphere(light._position, light.influence_radius_max); + break; + } + + /* TODO(fclem): HiZ culling? Could be quite beneficial given the nature of the 2.5D culling. */ + + /* TODO(fclem): Small light culling / fading? */ + + if (intersect_view(sphere)) { + uint index = atomicAdd(light_cull_buf.visible_count, 1u); + + out_zdist_buf[index] = dot(cameraForward, light._position) - dot(cameraForward, cameraPos); + out_key_buf[index] = l_idx; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl new file mode 100644 index 00000000000..e98b170cd4c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl @@ -0,0 +1,57 @@ + +/** + * Sort the lights by their Z distance to the camera. + * Outputs ordered light buffer. + * One thread processes one Light entity. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +shared float zdists_cache[gl_WorkGroupSize.x]; + +void main() +{ + uint src_index = gl_GlobalInvocationID.x; + bool valid_thread = true; + + if (src_index >= light_cull_buf.visible_count) { + /* Do not return because we use barriers later on (which need uniform control flow). + * Just process the same last item but avoid insertion. */ + src_index = light_cull_buf.visible_count - 1; + valid_thread = false; + } + + float local_zdist = in_zdist_buf[src_index]; + + int prefix_sum = 0; + /* Iterate over the whole key buffer. */ + uint iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x); + for (uint i = 0u; i < iter; i++) { + uint index = gl_WorkGroupSize.x * i + gl_LocalInvocationID.x; + /* NOTE: This will load duplicated values, but they will be discarded. */ + index = min(index, light_cull_buf.visible_count - 1); + zdists_cache[gl_LocalInvocationID.x] = in_zdist_buf[index]; + + barrier(); + + /* Iterate over the cache line. */ + uint line_end = min(gl_WorkGroupSize.x, light_cull_buf.visible_count - gl_WorkGroupSize.x * i); + for (uint j = 0u; j < line_end; j++) { + if (zdists_cache[j] < local_zdist) { + prefix_sum++; + } + else if (zdists_cache[j] == local_zdist) { + /* Same depth, use index to order and avoid same prefix for 2 different lights. */ + if ((gl_WorkGroupSize.x * i + j) < src_index) { + prefix_sum++; + } + } + } + } + + if (valid_thread) { + /* Copy sorted light to render light buffer. */ + uint input_index = in_key_buf[src_index]; + out_light_buf[prefix_sum] = in_light_buf[input_index]; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl new file mode 100644 index 00000000000..37705e22b22 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl @@ -0,0 +1,188 @@ + +/** + * 2D Culling pass for lights. + * We iterate over all items and check if they intersect with the tile frustum. + * Dispatch one thread per word. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Culling shapes extraction + * \{ */ + +struct CullingTile { + IsectFrustum frustum; + vec4 bounds; +}; + +/* Corners are expected to be in viewspace so that the cone is starting from the origin. + * Corner order does not matter. */ +vec4 tile_bound_cone(vec3 v00, vec3 v01, vec3 v10, vec3 v11) +{ + v00 = normalize(v00); + v01 = normalize(v01); + v10 = normalize(v10); + v11 = normalize(v11); + vec3 center = normalize(v00 + v01 + v10 + v11); + float angle_cosine = dot(center, v00); + angle_cosine = max(angle_cosine, dot(center, v01)); + angle_cosine = max(angle_cosine, dot(center, v10)); + angle_cosine = max(angle_cosine, dot(center, v11)); + return vec4(center, angle_cosine); +} + +/* Corners are expected to be in viewspace. Returns Z-aligned bounding cylinder. + * Corner order does not matter. */ +vec4 tile_bound_cylinder(vec3 v00, vec3 v01, vec3 v10, vec3 v11) +{ + vec3 center = (v00 + v01 + v10 + v11) * 0.25; + vec4 corners_dist; + float dist_sqr = distance_squared(center, v00); + dist_sqr = max(dist_sqr, distance_squared(center, v01)); + dist_sqr = max(dist_sqr, distance_squared(center, v10)); + dist_sqr = max(dist_sqr, distance_squared(center, v11)); + /* Return a cone. Later converted to cylinder. */ + return vec4(center, sqrt(dist_sqr)); +} + +vec2 tile_to_ndc(vec2 tile_co, vec2 offset) +{ + /* Add a margin to prevent culling too much if the frustum becomes too much unstable. */ + const float margin = 0.02; + tile_co += margin * (offset * 2.0 - 1.0); + + tile_co += offset; + return tile_co * light_cull_buf.tile_to_uv_fac * 2.0 - 1.0; +} + +CullingTile tile_culling_get(uvec2 tile_co) +{ + vec2 ftile = vec2(tile_co); + /* Culling frustum corners for this tile. */ + vec3 corners[8]; + /* Follow same corners order as view frustum. */ + corners[1].xy = corners[0].xy = tile_to_ndc(ftile, vec2(0, 0)); + corners[5].xy = corners[4].xy = tile_to_ndc(ftile, vec2(1, 0)); + corners[6].xy = corners[7].xy = tile_to_ndc(ftile, vec2(1, 1)); + corners[2].xy = corners[3].xy = tile_to_ndc(ftile, vec2(0, 1)); + corners[1].z = corners[5].z = corners[6].z = corners[2].z = -1.0; + corners[0].z = corners[4].z = corners[7].z = corners[3].z = 1.0; + + for (int i = 0; i < 8; i++) { + /* Culling in view space for precision. */ + corners[i] = project_point(ProjectionMatrixInverse, corners[i]); + } + + bool is_persp = ProjectionMatrix[3][3] == 0.0; + CullingTile tile; + tile.bounds = (is_persp) ? tile_bound_cone(corners[0], corners[4], corners[7], corners[3]) : + tile_bound_cylinder(corners[0], corners[4], corners[7], corners[3]); + + tile.frustum = isect_data_setup(shape_frustum(corners)); + return tile; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Intersection Tests + * \{ */ + +bool intersect(CullingTile tile, Sphere sphere) +{ + bool isect = true; + /* Test tile intersection using bounding cone or bounding cylinder. + * This has less false positive cases when the sphere is large. */ + if (ProjectionMatrix[3][3] == 0.0) { + isect = intersect(shape_cone(tile.bounds.xyz, tile.bounds.w), sphere); + } + else { + /* Simplify to a 2D circle test on the view Z axis plane. */ + isect = intersect(shape_circle(tile.bounds.xy, tile.bounds.w), + shape_circle(sphere.center.xy, sphere.radius)); + } + /* Refine using frustum test. If the sphere is small it avoids intersection + * with a neighbor tile. */ + if (isect) { + isect = intersect(tile.frustum, sphere); + } + return isect; +} + +bool intersect(CullingTile tile, Box bbox) +{ + return intersect(tile.frustum, bbox); +} + +bool intersect(CullingTile tile, Pyramid pyramid) +{ + return intersect(tile.frustum, pyramid); +} + +/** \} */ + +void main() +{ + uint word_idx = gl_GlobalInvocationID.x % light_cull_buf.tile_word_len; + uint tile_idx = gl_GlobalInvocationID.x / light_cull_buf.tile_word_len; + uvec2 tile_co = uvec2(tile_idx % light_cull_buf.tile_x_len, + tile_idx / light_cull_buf.tile_x_len); + + if (tile_co.y >= light_cull_buf.tile_y_len) { + return; + } + + /* TODO(fclem): We could stop the tile at the HiZ depth. */ + CullingTile tile = tile_culling_get(tile_co); + + uint l_idx = word_idx * 32u; + uint l_end = min(l_idx + 32u, light_cull_buf.visible_count); + uint word = 0u; + for (; l_idx < l_end; l_idx++) { + LightData light = light_buf[l_idx]; + + /* Culling in view space for precision and simplicity. */ + vec3 vP = transform_point(ViewMatrix, light._position); + vec3 v_right = transform_direction(ViewMatrix, light._right); + vec3 v_up = transform_direction(ViewMatrix, light._up); + vec3 v_back = transform_direction(ViewMatrix, light._back); + float radius = light.influence_radius_max; + + Sphere sphere = shape_sphere(vP, radius); + bool intersect_tile = intersect(tile, sphere); + + switch (light.type) { + case LIGHT_SPOT: + /* Only for < ~170° Cone due to plane extraction precision. */ + if (light.spot_tan < 10.0) { + Pyramid pyramid = shape_pyramid_non_oblique( + vP, + vP - v_back * radius, + v_right * radius * light.spot_tan / light.spot_size_inv.x, + v_up * radius * light.spot_tan / light.spot_size_inv.y); + intersect_tile = intersect_tile && intersect(tile, pyramid); + break; + } + /* Fallthrough to the hemispheric case. */ + case LIGHT_RECT: + case LIGHT_ELLIPSE: + vec3 v000 = vP - v_right * radius - v_up * radius; + vec3 v100 = v000 + v_right * (radius * 2.0); + vec3 v010 = v000 + v_up * (radius * 2.0); + vec3 v001 = v000 - v_back * radius; + Box bbox = shape_box(v000, v100, v010, v001); + intersect_tile = intersect_tile && intersect(tile, bbox); + default: + break; + } + + if (intersect_tile) { + word |= 1u << (l_idx % 32u); + } + } + + out_light_tile_buf[gl_GlobalInvocationID.x] = word; +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl new file mode 100644 index 00000000000..ae20153f26c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_zbin_comp.glsl @@ -0,0 +1,56 @@ + +/** + * Create the Zbins from Z-sorted lights. + * Perform min-max operation in LDS memory for speed. + * For this reason, we only dispatch 1 thread group. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* Fits the limit of 32KB. */ +shared uint zbin_max[CULLING_ZBIN_COUNT]; +shared uint zbin_min[CULLING_ZBIN_COUNT]; + +void main() +{ + const uint zbin_iter = CULLING_ZBIN_COUNT / gl_WorkGroupSize.x; + const uint zbin_local = gl_LocalInvocationID.x * zbin_iter; + + uint src_index = gl_GlobalInvocationID.x; + + for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) { + zbin_max[l] = 0x0u; + zbin_min[l] = ~0x0u; + } + barrier(); + + uint light_iter = divide_ceil(light_cull_buf.visible_count, gl_WorkGroupSize.x); + for (uint i = 0u; i < light_iter; i++) { + uint index = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x; + if (index >= light_cull_buf.visible_count) { + continue; + } + vec3 P = light_buf[index]._position; + /* TODO(fclem): Could have better bounds for spot and area lights. */ + float radius = light_buf[index].influence_radius_max; + float z_dist = dot(cameraForward, P) - dot(cameraForward, cameraPos); + int z_min = culling_z_to_zbin( + light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist + radius); + int z_max = culling_z_to_zbin( + light_cull_buf.zbin_scale, light_cull_buf.zbin_bias, z_dist - radius); + z_min = clamp(z_min, 0, CULLING_ZBIN_COUNT - 1); + z_max = clamp(z_max, 0, CULLING_ZBIN_COUNT - 1); + /* Register to Z bins. */ + for (int z = z_min; z <= z_max; z++) { + atomicMin(zbin_min[z], index); + atomicMax(zbin_max[z], index); + } + } + barrier(); + + /* Write result to zbins buffer. Pack min & max into 1 uint. */ + for (uint i = 0u, l = zbin_local; i < zbin_iter; i++, l++) { + out_zbin_buf[l] = (zbin_max[l] << 16u) | (zbin_min[l] & 0xFFFFu); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl new file mode 100644 index 00000000000..d4abdd43aa4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl @@ -0,0 +1,129 @@ + +/** + * The resources expected to be defined are: + * - light_buf + * - light_zbin_buf + * - light_cull_buf + * - light_tile_buf + * - shadow_atlas_tx + * - shadow_tilemaps_tx + * - sss_transmittance_tx + * - utility_tx + */ + +#pragma BLENDER_REQUIRE(eevee_light_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl) + +/* TODO(fclem): We could reduce register pressure by only having static branches for sun lights. */ +void light_eval_ex(ClosureDiffuse diffuse, + ClosureReflection reflection, + const bool is_directional, + vec3 P, + vec3 V, + float vP_z, + float thickness, + vec4 ltc_mat, + uint l_idx, + inout vec3 out_diffuse, + inout vec3 out_specular) +{ + LightData light = light_buf[l_idx]; + vec3 L; + float dist; + light_vector_get(light, P, L, dist); + + float visibility = light_attenuation(light, L, dist); + +#if 0 /* TODO(fclem): Shadows */ + if ((light.shadow_id != LIGHT_NO_SHADOW) && (visibility > 0.0)) { + vec3 lL = light_world_to_local(light, -L) * dist; + + float shadow_delta = shadow_delta_get( + shadow_atlas_tx, shadow_tilemaps_tx, light, light.shadow_data, lL, dist, P); + +# ifdef SSS_TRANSMITTANCE + /* Transmittance evaluation first to use initial visibility. */ + if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) { + float delta = max(thickness, shadow_delta); + + vec3 intensity = visibility * light.transmit_power * + light_translucent(sss_transmittance_tx, + is_directional, + light, + diffuse.N, + L, + dist, + diffuse.sss_radius, + delta); + out_diffuse += light.color * intensity; + } +# endif + + visibility *= float(shadow_delta - light.shadow_data.bias <= 0.0); + } +#endif + + if (visibility < 1e-6) { + return; + } + + if (light.diffuse_power > 0.0) { + float intensity = visibility * light.diffuse_power * + light_diffuse(utility_tx, is_directional, light, diffuse.N, V, L, dist); + out_diffuse += light.color * intensity; + } + + if (light.specular_power > 0.0) { + float intensity = visibility * light.specular_power * + light_ltc( + utility_tx, is_directional, light, reflection.N, V, L, dist, ltc_mat); + out_specular += light.color * intensity; + } +} + +void light_eval(ClosureDiffuse diffuse, + ClosureReflection reflection, + vec3 P, + vec3 V, + float vP_z, + float thickness, + inout vec3 out_diffuse, + inout vec3 out_specular) +{ + vec2 uv = vec2(reflection.roughness, safe_sqrt(1.0 - dot(reflection.N, V))); + uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS; + vec4 ltc_mat = utility_tx_sample(utility_tx, uv, UTIL_LTC_MAT_LAYER); + + LIGHT_FOREACH_BEGIN_DIRECTIONAL(light_cull_buf, l_idx) + { + light_eval_ex(diffuse, + reflection, + true, + P, + V, + vP_z, + thickness, + ltc_mat, + l_idx, + out_diffuse, + out_specular); + } + LIGHT_FOREACH_END + + vec2 px = gl_FragCoord.xy; + LIGHT_FOREACH_BEGIN_LOCAL(light_cull_buf, light_zbin_buf, light_tile_buf, px, vP_z, l_idx) + { + light_eval_ex(diffuse, + reflection, + false, + P, + V, + vP_z, + thickness, + ltc_mat, + l_idx, + out_diffuse, + out_specular); + } + LIGHT_FOREACH_END +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl new file mode 100644 index 00000000000..22a5f98e6c3 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_iter_lib.glsl @@ -0,0 +1,72 @@ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +uint zbin_mask(uint word_index, uint zbin_min, uint zbin_max) +{ + uint word_start = word_index * 32u; + uint word_end = word_start + 31u; + uint local_min = max(zbin_min, word_start); + uint local_max = min(zbin_max, word_end); + uint mask_width = local_max - local_min + 1; + return bit_field_mask(mask_width, local_min); +} + +int culling_z_to_zbin(float scale, float bias, float z) +{ + return int(z * scale + bias); +} + +/* Waiting to implement extensions support. We need: + * - GL_KHR_shader_subgroup_ballot + * - GL_KHR_shader_subgroup_arithmetic + * or + * - Vulkan 1.1 + */ +#if 1 +# define subgroupMin(a) a +# define subgroupMax(a) a +# define subgroupOr(a) a +# define subgroupBroadcastFirst(a) a +#endif + +#define LIGHT_FOREACH_BEGIN_DIRECTIONAL(_culling, _index) \ + { \ + { \ + for (uint _index = _culling.local_lights_len; _index < _culling.items_count; _index++) { + +#define LIGHT_FOREACH_BEGIN_LOCAL(_culling, _zbins, _words, _pixel, _linearz, _item_index) \ + { \ + uvec2 tile_co = uvec2(_pixel / _culling.tile_size); \ + uint tile_word_offset = (tile_co.x + tile_co.y * _culling.tile_x_len) * \ + _culling.tile_word_len; \ + int zbin_index = culling_z_to_zbin(_culling.zbin_scale, _culling.zbin_bias, _linearz); \ + zbin_index = clamp(zbin_index, 0, CULLING_ZBIN_COUNT - 1); \ + uint zbin_data = _zbins[zbin_index]; \ + uint min_index = zbin_data & 0xFFFFu; \ + uint max_index = zbin_data >> 16u; \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + min_index = subgroupBroadcastFirst(subgroupMin(min_index)); \ + max_index = subgroupBroadcastFirst(subgroupMax(max_index)); \ + /* Same as divide by 32 but avoid interger division. */ \ + uint word_min = min_index >> 5u; \ + uint word_max = max_index >> 5u; \ + for (uint word_idx = word_min; word_idx <= word_max; word_idx++) { \ + uint word = _words[tile_word_offset + word_idx]; \ + word &= zbin_mask(word_idx, min_index, max_index); \ + /* Ensure all threads inside a subgroup get the same value to reduce VGPR usage. */ \ + word = subgroupBroadcastFirst(subgroupOr(word)); \ + int bit_index; \ + while ((bit_index = findLSB(word)) != -1) { \ + word &= ~1u << uint(bit_index); \ + uint _item_index = word_idx * 32u + bit_index; + +/* No culling. Iterate over all items. */ +#define LIGHT_FOREACH_BEGIN_LOCAL_NO_CULL(_culling, _item_index) \ + { \ + { \ + for (uint _item_index = 0; _item_index < _culling.visible_count; _item_index++) { + +#define LIGHT_FOREACH_END \ + } \ + } \ + } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl new file mode 100644 index 00000000000..58608f6e1f0 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_lib.glsl @@ -0,0 +1,209 @@ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_ltc_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_iter_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Light Functions + * \{ */ + +void light_vector_get(LightData ld, vec3 P, out vec3 L, out float dist) +{ + if (ld.type == LIGHT_SUN) { + L = ld._back; + dist = 1.0; + } + else { + L = ld._position - P; + dist = inversesqrt(len_squared(L)); + L *= dist; + dist = 1.0 / dist; + } +} + +/* Rotate vector to light's local space. Does not translate. */ +vec3 light_world_to_local(LightData ld, vec3 L) +{ + /* Avoid relying on compiler to optimize this. + * vec3 lL = transpose(mat3(ld.object_mat)) * L; */ + vec3 lL; + lL.x = dot(ld.object_mat[0].xyz, L); + lL.y = dot(ld.object_mat[1].xyz, L); + lL.z = dot(ld.object_mat[2].xyz, L); + return lL; +} + +/* From Frostbite PBR Course + * Distance based attenuation + * http://www.frostbite.com/wp-content/uploads/2014/11/course_notes_moving_frostbite_to_pbr.pdf */ +float light_influence_attenuation(float dist, float inv_sqr_influence) +{ + float factor = sqr(dist) * inv_sqr_influence; + float fac = saturate(1.0 - sqr(factor)); + return sqr(fac); +} + +float light_spot_attenuation(LightData ld, vec3 L) +{ + vec3 lL = light_world_to_local(ld, L); + float ellipse = inversesqrt(1.0 + len_squared(lL.xy * ld.spot_size_inv / lL.z)); + float spotmask = smoothstep(0.0, 1.0, ellipse * ld._spot_mul + ld._spot_bias); + return spotmask; +} + +float light_attenuation(LightData ld, vec3 L, float dist) +{ + float vis = 1.0; + if (ld.type == LIGHT_SPOT) { + vis *= light_spot_attenuation(ld, L); + } + if (ld.type >= LIGHT_SPOT) { + vis *= step(0.0, -dot(L, -ld._back)); + } + if (ld.type != LIGHT_SUN) { +#ifdef VOLUME_LIGHTING + vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_volume); +#else + vis *= light_influence_attenuation(dist, ld.influence_radius_invsqr_surface); +#endif + } + return vis; +} + +/* Cheaper alternative than evaluating the LTC. + * The result needs to be multiplied by BSDF or Phase Function. */ +float light_point_light(LightData ld, const bool is_directional, vec3 L, float dist) +{ + if (is_directional) { + return 1.0; + } + /** + * Using "Point Light Attenuation Without Singularity" from Cem Yuksel + * http://www.cemyuksel.com/research/pointlightattenuation/pointlightattenuation.pdf + * http://www.cemyuksel.com/research/pointlightattenuation/ + **/ + float d_sqr = sqr(dist); + float r_sqr = ld.radius_squared; + /* Using reformulation that has better numerical percision. */ + float power = 2.0 / (d_sqr + r_sqr + dist * sqrt(d_sqr + r_sqr)); + + if (is_area_light(ld.type)) { + /* Modulate by light plane orientation / solid angle. */ + power *= saturate(dot(ld._back, L)); + } + return power; +} + +float light_diffuse(sampler2DArray utility_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 V, + vec3 L, + float dist) +{ + if (is_directional || !is_area_light(ld.type)) { + float radius = ld._radius / dist; + return ltc_evaluate_disk_simple(utility_tx, radius, dot(N, L)); + } + else if (ld.type == LIGHT_RECT) { + vec3 corners[4]; + corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y; + corners[2] = -corners[0]; + corners[3] = -corners[1]; + + corners[0] = normalize(L * dist + corners[0]); + corners[1] = normalize(L * dist + corners[1]); + corners[2] = normalize(L * dist + corners[2]); + corners[3] = normalize(L * dist + corners[3]); + + return ltc_evaluate_quad(utility_tx, corners, N); + } + else /* (ld.type == LIGHT_ELLIPSE) */ { + vec3 points[3]; + points[0] = ld._right * -ld._area_size_x + ld._up * -ld._area_size_y; + points[1] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + points[2] = -points[0]; + + points[0] += L * dist; + points[1] += L * dist; + points[2] += L * dist; + + return ltc_evaluate_disk(utility_tx, N, V, mat3(1.0), points); + } +} + +float light_ltc(sampler2DArray utility_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 V, + vec3 L, + float dist, + vec4 ltc_mat) +{ + if (is_directional || ld.type != LIGHT_RECT) { + vec3 Px = ld._right; + vec3 Py = ld._up; + + if (is_directional || !is_area_light(ld.type)) { + make_orthonormal_basis(L, Px, Py); + } + + vec3 points[3]; + points[0] = Px * -ld._area_size_x + Py * -ld._area_size_y; + points[1] = Px * ld._area_size_x + Py * -ld._area_size_y; + points[2] = -points[0]; + + points[0] += L * dist; + points[1] += L * dist; + points[2] += L * dist; + + return ltc_evaluate_disk(utility_tx, N, V, ltc_matrix(ltc_mat), points); + } + else { + vec3 corners[4]; + corners[0] = ld._right * ld._area_size_x + ld._up * -ld._area_size_y; + corners[1] = ld._right * ld._area_size_x + ld._up * ld._area_size_y; + corners[2] = -corners[0]; + corners[3] = -corners[1]; + + corners[0] += L * dist; + corners[1] += L * dist; + corners[2] += L * dist; + corners[3] += L * dist; + + ltc_transform_quad(N, V, ltc_matrix(ltc_mat), corners); + + return ltc_evaluate_quad(utility_tx, corners, vec3(0.0, 0.0, 1.0)); + } +} + +vec3 light_translucent(sampler1D transmittance_tx, + const bool is_directional, + LightData ld, + vec3 N, + vec3 L, + float dist, + vec3 sss_radius, + float delta) +{ + /* TODO(fclem): We should compute the power at the entry point. */ + /* NOTE(fclem): we compute the light attenuation using the light vector but the transmittance + * using the shadow depth delta. */ + float power = light_point_light(ld, is_directional, L, dist); + /* Do not add more energy on front faces. Also apply lambertian BSDF. */ + power *= max(0.0, dot(-N, L)) * M_1_PI; + + sss_radius *= SSS_TRANSMIT_LUT_RADIUS; + vec3 channels_co = saturate(delta / sss_radius) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS; + + vec3 translucency; + translucency.x = (sss_radius.x > 0.0) ? texture(transmittance_tx, channels_co.x).r : 0.0; + translucency.y = (sss_radius.y > 0.0) ? texture(transmittance_tx, channels_co.y).r : 0.0; + translucency.z = (sss_radius.z > 0.0) ? texture(transmittance_tx, channels_co.z).r : 0.0; + return translucency * power; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl new file mode 100644 index 00000000000..57e92b0b9b4 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ltc_lib.glsl @@ -0,0 +1,299 @@ + +/** + * Adapted from : + * Real-Time Polygonal-Light Shading with Linearly Transformed Cosines. + * Eric Heitz, Jonathan Dupuy, Stephen Hill and David Neubelt. + * ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2016) 35(4), 2016. + * Project page: https://eheitzresearch.wordpress.com/415-2/ + */ + +/* Diffuse *clipped* sphere integral. */ +float ltc_diffuse_sphere_integral(sampler2DArray utility_tx, float avg_dir_z, float form_factor) +{ +#if 1 + /* use tabulated horizon-clipped sphere */ + vec2 uv = vec2(avg_dir_z * 0.5 + 0.5, form_factor); + uv = uv * UTIL_TEX_UV_SCALE + UTIL_TEX_UV_BIAS; + + return texture(utility_tx, vec3(uv, UTIL_DISK_INTEGRAL_LAYER))[UTIL_DISK_INTEGRAL_COMP]; +#else + /* Cheap approximation. Less smooth and have energy issues. */ + return max((form_factor * form_factor + avg_dir_z) / (form_factor + 1.0), 0.0); +#endif +} + +/** + * An extended version of the implementation from + * "How to solve a cubic equation, revisited" + * http://momentsingraphics.de/?p=105 + */ +vec3 ltc_solve_cubic(vec4 coefs) +{ + /* Normalize the polynomial */ + coefs.xyz /= coefs.w; + /* Divide middle coefficients by three */ + coefs.yz /= 3.0; + + float A = coefs.w; + float B = coefs.z; + float C = coefs.y; + float D = coefs.x; + + /* Compute the Hessian and the discriminant */ + vec3 delta = vec3(-coefs.zy * coefs.zz + coefs.yx, dot(vec2(coefs.z, -coefs.y), coefs.xy)); + + /* Discriminant */ + float discr = dot(vec2(4.0 * delta.x, -delta.y), delta.zy); + + /* Clamping avoid NaN output on some platform. (see T67060) */ + float sqrt_discr = sqrt(clamp(discr, 0.0, FLT_MAX)); + + vec2 xlc, xsc; + + /* Algorithm A */ + { + float A_a = 1.0; + float C_a = delta.x; + float D_a = -2.0 * B * delta.x + delta.y; + + /* Take the cubic root of a normalized complex number */ + float theta = atan(sqrt_discr, -D_a) / 3.0; + + float _2_sqrt_C_a = 2.0 * sqrt(-C_a); + float x_1a = _2_sqrt_C_a * cos(theta); + float x_3a = _2_sqrt_C_a * cos(theta + (2.0 / 3.0) * M_PI); + + float xl; + if ((x_1a + x_3a) > 2.0 * B) { + xl = x_1a; + } + else { + xl = x_3a; + } + + xlc = vec2(xl - B, A); + } + + /* Algorithm D */ + { + float A_d = D; + float C_d = delta.z; + float D_d = -D * delta.y + 2.0 * C * delta.z; + + /* Take the cubic root of a normalized complex number */ + float theta = atan(D * sqrt_discr, -D_d) / 3.0; + + float _2_sqrt_C_d = 2.0 * sqrt(-C_d); + float x_1d = _2_sqrt_C_d * cos(theta); + float x_3d = _2_sqrt_C_d * cos(theta + (2.0 / 3.0) * M_PI); + + float xs; + if (x_1d + x_3d < 2.0 * C) { + xs = x_1d; + } + else { + xs = x_3d; + } + + xsc = vec2(-D, xs + C); + } + + float E = xlc.y * xsc.y; + float F = -xlc.x * xsc.y - xlc.y * xsc.x; + float G = xlc.x * xsc.x; + + vec2 xmc = vec2(C * F - B * G, -B * F + C * E); + + vec3 root = vec3(xsc.x / xsc.y, xmc.x / xmc.y, xlc.x / xlc.y); + + if (root.x < root.y && root.x < root.z) { + root.xyz = root.yxz; + } + else if (root.z < root.x && root.z < root.y) { + root.xyz = root.xzy; + } + + return root; +} + +/* from Real-Time Area Lighting: a Journey from Research to Production + * Stephen Hill and Eric Heitz */ +vec3 ltc_edge_integral_vec(vec3 v1, vec3 v2) +{ + float x = dot(v1, v2); + float y = abs(x); + + float a = 0.8543985 + (0.4965155 + 0.0145206 * y) * y; + float b = 3.4175940 + (4.1616724 + y) * y; + float v = a / b; + + float theta_sintheta = (x > 0.0) ? v : 0.5 * inversesqrt(max(1.0 - x * x, 1e-7)) - v; + + return cross(v1, v2) * theta_sintheta; +} + +mat3 ltc_matrix(vec4 lut) +{ + /* Load inverse matrix. */ + return mat3(vec3(lut.x, 0, lut.y), vec3(0, 1, 0), vec3(lut.z, 0, lut.w)); +} + +void ltc_transform_quad(vec3 N, vec3 V, mat3 Minv, inout vec3 corners[4]) +{ + /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */ + V = normalize(V + 1e-8); + + /* Construct orthonormal basis around N. */ + vec3 T1, T2; + T1 = normalize(V - N * dot(N, V)); + T2 = cross(N, T1); + + /* Rotate area light in (T1, T2, R) basis. */ + Minv = Minv * transpose(mat3(T1, T2, N)); + + /* Apply LTC inverse matrix. */ + corners[0] = normalize(Minv * corners[0]); + corners[1] = normalize(Minv * corners[1]); + corners[2] = normalize(Minv * corners[2]); + corners[3] = normalize(Minv * corners[3]); +} + +/* If corners have already pass through ltc_transform_quad(), + * then N **MUST** be vec3(0.0, 0.0, 1.0), corresponding to the Up axis of the shading basis. */ +float ltc_evaluate_quad(sampler2DArray utility_tx, vec3 corners[4], vec3 N) +{ + /* Approximation using a sphere of the same solid angle than the quad. + * Finding the clipped sphere diffuse integral is easier than clipping the quad. */ + vec3 avg_dir; + avg_dir = ltc_edge_integral_vec(corners[0], corners[1]); + avg_dir += ltc_edge_integral_vec(corners[1], corners[2]); + avg_dir += ltc_edge_integral_vec(corners[2], corners[3]); + avg_dir += ltc_edge_integral_vec(corners[3], corners[0]); + + float form_factor = length(avg_dir); + float avg_dir_z = dot(N, avg_dir / form_factor); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir_z, form_factor); +} + +/* If disk does not need to be transformed and is already front facing. */ +float ltc_evaluate_disk_simple(sampler2DArray utility_tx, float disk_radius, float NL) +{ + float r_sqr = disk_radius * disk_radius; + float one_r_sqr = 1.0 + r_sqr; + float form_factor = r_sqr * inversesqrt(one_r_sqr * one_r_sqr); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, NL, form_factor); +} + +/* disk_points are WS vectors from the shading point to the disk "bounding domain" */ +float ltc_evaluate_disk(sampler2DArray utility_tx, vec3 N, vec3 V, mat3 Minv, vec3 disk_points[3]) +{ + /* Avoid dot(N, V) == 1 in ortho mode, leading T1 normalize to fail. */ + V = normalize(V + 1e-8); + + /* construct orthonormal basis around N */ + vec3 T1, T2; + T1 = normalize(V - N * dot(V, N)); + T2 = cross(N, T1); + + /* rotate area light in (T1, T2, R) basis */ + mat3 R = transpose(mat3(T1, T2, N)); + + /* Intermediate step: init ellipse. */ + vec3 L_[3]; + L_[0] = mul(R, disk_points[0]); + L_[1] = mul(R, disk_points[1]); + L_[2] = mul(R, disk_points[2]); + + vec3 C = 0.5 * (L_[0] + L_[2]); + vec3 V1 = 0.5 * (L_[1] - L_[2]); + vec3 V2 = 0.5 * (L_[1] - L_[0]); + + /* Transform ellipse by Minv. */ + C = Minv * C; + V1 = Minv * V1; + V2 = Minv * V2; + + /* Compute eigenvectors of new ellipse. */ + + float d11 = dot(V1, V1); + float d22 = dot(V2, V2); + float d12 = dot(V1, V2); + float a, b; /* Eigenvalues */ + const float threshold = 0.0007; /* Can be adjusted. Fix artifacts. */ + if (abs(d12) / sqrt(d11 * d22) > threshold) { + float tr = d11 + d22; + float det = -d12 * d12 + d11 * d22; + + /* use sqrt matrix to solve for eigenvalues */ + det = sqrt(det); + float u = 0.5 * sqrt(tr - 2.0 * det); + float v = 0.5 * sqrt(tr + 2.0 * det); + float e_max = (u + v); + float e_min = (u - v); + e_max *= e_max; + e_min *= e_min; + + vec3 V1_, V2_; + if (d11 > d22) { + V1_ = d12 * V1 + (e_max - d11) * V2; + V2_ = d12 * V1 + (e_min - d11) * V2; + } + else { + V1_ = d12 * V2 + (e_max - d22) * V1; + V2_ = d12 * V2 + (e_min - d22) * V1; + } + + a = 1.0 / e_max; + b = 1.0 / e_min; + V1 = normalize(V1_); + V2 = normalize(V2_); + } + else { + a = 1.0 / d11; + b = 1.0 / d22; + V1 *= sqrt(a); + V2 *= sqrt(b); + } + + /* Now find front facing ellipse with same solid angle. */ + + vec3 V3 = normalize(cross(V1, V2)); + if (dot(C, V3) < 0.0) { + V3 *= -1.0; + } + + float L = dot(V3, C); + float inv_L = 1.0 / L; + float x0 = dot(V1, C) * inv_L; + float y0 = dot(V2, C) * inv_L; + + float L_sqr = L * L; + a *= L_sqr; + b *= L_sqr; + + float t = 1.0 + x0 * x0; + float c0 = a * b; + float c1 = c0 * (t + y0 * y0) - a - b; + float c2 = (1.0 - a * t) - b * (1.0 + y0 * y0); + float c3 = 1.0; + + vec3 roots = ltc_solve_cubic(vec4(c0, c1, c2, c3)); + float e1 = roots.x; + float e2 = roots.y; + float e3 = roots.z; + + vec3 avg_dir = vec3(a * x0 / (a - e2), b * y0 / (b - e2), 1.0); + + mat3 rotate = mat3(V1, V2, V3); + + avg_dir = rotate * avg_dir; + avg_dir = normalize(avg_dir); + + /* L1, L2 are the extends of the front facing ellipse. */ + float L1 = sqrt(-e2 / e3); + float L2 = sqrt(-e2 / e1); + + /* Find the sphere and compute lighting. */ + float form_factor = max(0.0, L1 * L2 * inversesqrt((1.0 + L1 * L1) * (1.0 + L2 * L2))); + return form_factor * ltc_diffuse_sphere_integral(utility_tx, avg_dir.z, form_factor); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl new file mode 100644 index 00000000000..07139ea6a09 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl @@ -0,0 +1,115 @@ + +/** + * Dilate motion vector tiles until we covered maximum velocity. + * Outputs the largest intersecting motion vector in the neighborhood. + */ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl) + +#define DEBUG_BYPASS_DILATION 0 + +struct MotionRect { + ivec2 bottom_left; + ivec2 extent; +}; + +MotionRect compute_motion_rect(ivec2 tile, vec2 motion) +{ +#if DEBUG_BYPASS_DILATION + return MotionRect(tile, ivec2(1)); +#endif + /* Ceil to number of tile touched. */ + ivec2 point1 = tile + ivec2(sign(motion) * ceil(abs(motion) / float(MOTION_BLUR_TILE_SIZE))); + ivec2 point2 = tile; + + ivec2 max_point = max(point1, point2); + ivec2 min_point = min(point1, point2); + /* Clamp to bounds. */ + max_point = min(max_point, imageSize(in_tiles_img) - 1); + min_point = max(min_point, ivec2(0)); + + MotionRect rect; + rect.bottom_left = min_point; + rect.extent = 1 + max_point - min_point; + return rect; +} + +struct MotionLine { + /** Origin of the line. */ + vec2 origin; + /** Normal to the line direction. */ + vec2 normal; +}; + +MotionLine compute_motion_line(ivec2 tile, vec2 motion) +{ + vec2 dir = safe_normalize(motion); + + MotionLine line; + line.origin = vec2(tile); + /* Rotate 90° Counter-Clockwise. */ + line.normal = vec2(-dir.y, dir.x); + return line; +} + +bool is_inside_motion_line(ivec2 tile, MotionLine motion_line) +{ +#if DEBUG_BYPASS_DILATION + return true; +#endif + /* NOTE: Everything in is tile unit. */ + float dist = point_line_projection_dist(vec2(tile), motion_line.origin, motion_line.normal); + /* In order to be conservative and for simplicity, we use the tiles bounding circles. + * Consider that both the tile and the line have bounding radius of M_SQRT1_2. */ + return abs(dist) < M_SQRT2; +} + +void main() +{ + ivec2 src_tile = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(src_tile, imageSize(in_tiles_img)))) { + return; + } + + vec4 max_motion = imageLoad(in_tiles_img, src_tile); + + MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile); + MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + if (true) { + /* Rectangular area (in tiles) where the motion vector spreads. */ + MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy); + MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy); + /* Do a conservative rasterization of the line of the motion vector line. */ + for (int x = 0; x < motion_rect.extent.x; x++) { + for (int y = 0; y < motion_rect.extent.y; y++) { + ivec2 tile = motion_rect.bottom_left + ivec2(x, y); + if (is_inside_motion_line(tile, motion_line)) { + motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in + * the motion next so that weighting in gather pass is better. */ + motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + } + } + } + } + + if (true) { + MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + /* Rectangular area (in tiles) where the motion vector spreads. */ + MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw); + MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw); + /* Do a conservative rasterization of the line of the motion vector line. */ + for (int x = 0; x < motion_rect.extent.x; x++) { + for (int y = 0; y < motion_rect.extent.y; y++) { + ivec2 tile = motion_rect.bottom_left + ivec2(x, y); + if (is_inside_motion_line(tile, motion_line)) { + motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in + * the motion next so that weighting in gather pass is better. */ + motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + } + } + } + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl new file mode 100644 index 00000000000..cbbeea25d20 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl @@ -0,0 +1,103 @@ + +/** + * Shaders that down-sample velocity buffer into squared tile of MB_TILE_DIVISOR pixels wide. + * Outputs the largest motion vector in the tile area. + * Also perform velocity resolve to speedup the convolution pass. + * + * Based on: + * A Fast and Stable Feature-Aware Motion Blur Filter + * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai + * + * Adapted from G3D Innovation Engine implementation. + */ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) + +shared uint payload_prev; +shared uint payload_next; +shared vec2 max_motion_prev; +shared vec2 max_motion_next; + +/* Store velocity magnitude in the MSB and thread id in the LSB. */ +uint pack_payload(vec2 motion, uvec2 thread_id) +{ + /* NOTE: We clamp max velocity to 16k pixels. */ + return (min(uint(ceil(length(motion))), 0xFFFFu) << 16u) | (thread_id.y << 8) | thread_id.x; +} + +/* Return thread index from the payload. */ +uvec2 unpack_payload(uint payload) +{ + return uvec2(payload & 0xFFu, (payload >> 8) & 0xFFu); +} + +void main() +{ + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + payload_prev = 0u; + payload_next = 0u; + } + barrier(); + + uint local_payload_prev = 0u; + uint local_payload_next = 0u; + vec2 local_max_motion_prev; + vec2 local_max_motion_next; + + ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(velocity_img) - 1); + + vec2 render_size = vec2(imageSize(velocity_img).xy); + vec2 uv = (vec2(texel) + 0.5) / render_size; + float depth = texelFetch(depth_tx, texel, 0).r; + vec4 motion = velocity_resolve(imageLoad(velocity_img, texel), uv, depth); +#ifdef FLATTEN_VIEWPORT + /* imageLoad does not perform the swizzling like sampler does. Do it manually. */ + motion = motion.xyxy; +#endif + + /* Store resolved velocity to speedup the gather pass. Out of bounds writes are ignored. + * Unfortunately, we cannot convert to pixel space here since it is also used by TAA and the + * motion blur needs to remain optional. */ + imageStore(velocity_img, ivec2(gl_GlobalInvocationID.xy), velocity_pack(motion)); + /* Clip velocity to viewport bounds (in NDC space). */ + vec2 line_clip; + line_clip.x = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, motion.xy * 2.0); + line_clip.y = line_unit_square_intersect_dist_safe(uv * 2.0 - 1.0, -motion.zw * 2.0); + motion *= min(line_clip, vec2(1.0)).xxyy; + /* Convert to pixel space. Note this is only for velocity tiles. */ + motion *= render_size.xyxy; + /* Rescale to shutter relative motion for viewport. */ + motion *= motion_blur_buf.motion_scale.xxyy; + + uint sample_payload_prev = pack_payload(motion.xy, gl_LocalInvocationID.xy); + if (local_payload_prev < sample_payload_prev) { + local_payload_prev = sample_payload_prev; + local_max_motion_prev = motion.xy; + } + + uint sample_payload_next = pack_payload(motion.zw, gl_LocalInvocationID.xy); + if (local_payload_next < sample_payload_next) { + local_payload_next = sample_payload_next; + local_max_motion_next = motion.zw; + } + + /* Compare the local payload with the other threads. */ + atomicMax(payload_prev, local_payload_prev); + atomicMax(payload_next, local_payload_next); + barrier(); + + /* Need to broadcast the result to another thread in order to issue a unique write. */ + if (all(equal(unpack_payload(payload_prev), gl_LocalInvocationID.xy))) { + max_motion_prev = local_max_motion_prev; + } + if (all(equal(unpack_payload(payload_next), gl_LocalInvocationID.xy))) { + max_motion_next = local_max_motion_next; + } + barrier(); + + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + ivec2 tile_co = ivec2(gl_WorkGroupID.xy); + imageStore(out_tiles_img, tile_co, vec4(max_motion_prev, max_motion_next)); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl new file mode 100644 index 00000000000..5249e6637b6 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl @@ -0,0 +1,221 @@ + +/** + * Perform two gather blur in the 2 motion blur directions + * Based on: + * A Fast and Stable Feature-Aware Motion Blur Filter + * by Jean-Philippe Guertin, Morgan McGuire, Derek Nowrouzezahrai + * + * With modification from the presentation: + * Next Generation Post Processing in Call of Duty Advanced Warfare + * by Jorge Jimenez + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_motion_blur_lib.glsl) + +const int gather_sample_count = 8; + +/* Converts uv velocity into pixel space. Assumes velocity_tx is the same resolution as the + * target post-fx framebuffer. */ +vec4 motion_blur_sample_velocity(sampler2D velocity_tx, vec2 uv) +{ + /* We can load velocity without velocity_resolve() since we resolved during the flatten pass. */ + vec4 velocity = velocity_unpack(texture(velocity_tx, uv)); + return velocity * vec2(textureSize(velocity_tx, 0)).xyxy * motion_blur_buf.motion_scale.xxyy; +} + +vec2 spread_compare(float center_motion_length, float sample_motion_length, float offset_length) +{ + return saturate(vec2(center_motion_length, sample_motion_length) - offset_length + 1.0); +} + +vec2 depth_compare(float center_depth, float sample_depth) +{ + vec2 depth_scale = vec2(-motion_blur_buf.depth_scale, motion_blur_buf.depth_scale); + return saturate(0.5 + depth_scale * (sample_depth - center_depth)); +} + +/* Kill contribution if not going the same direction. */ +float dir_compare(vec2 offset, vec2 sample_motion, float sample_motion_length) +{ + if (sample_motion_length < 0.5) { + return 1.0; + } + return (dot(offset, sample_motion) > 0.0) ? 1.0 : 0.0; +} + +/* Return background (x) and foreground (y) weights. */ +vec2 sample_weights(float center_depth, + float sample_depth, + float center_motion_length, + float sample_motion_length, + float offset_length) +{ + /* Classify foreground/background. */ + vec2 depth_weight = depth_compare(center_depth, sample_depth); + /* Weight if sample is overlapping or under the center pixel. */ + vec2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length); + return depth_weight * spread_weight; +} + +struct Accumulator { + vec4 fg; + vec4 bg; + /** x: Background, y: Foreground, z: dir. */ + vec3 weight; +}; + +void gather_sample(vec2 screen_uv, + float center_depth, + float center_motion_len, + vec2 offset, + float offset_len, + const bool next, + inout Accumulator accum) +{ + vec2 sample_uv = screen_uv - offset * motion_blur_buf.target_size_inv; + vec4 sample_vectors = motion_blur_sample_velocity(velocity_tx, sample_uv); + vec2 sample_motion = (next) ? sample_vectors.zw : sample_vectors.xy; + float sample_motion_len = length(sample_motion); + float sample_depth = texture(depth_tx, sample_uv).r; + vec4 sample_color = textureLod(in_color_tx, sample_uv, 0.0); + + sample_depth = get_view_z_from_depth(sample_depth); + + vec3 weights; + weights.xy = sample_weights( + center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len); + weights.z = dir_compare(offset, sample_motion, sample_motion_len); + weights.xy *= weights.z; + + accum.fg += sample_color * weights.y; + accum.bg += sample_color * weights.x; + accum.weight += weights; +} + +void gather_blur(vec2 screen_uv, + vec2 center_motion, + float center_depth, + vec2 max_motion, + float ofs, + const bool next, + inout Accumulator accum) +{ + float center_motion_len = length(center_motion); + float max_motion_len = length(max_motion); + + /* Tile boundaries randomization can fetch a tile where there is less motion than this pixel. + * Fix this by overriding the max_motion. */ + if (max_motion_len < center_motion_len) { + max_motion_len = center_motion_len; + max_motion = center_motion; + } + + if (max_motion_len < 0.5) { + return; + } + + int i; + float t, inc = 1.0 / float(gather_sample_count); + for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) { + gather_sample(screen_uv, + center_depth, + center_motion_len, + max_motion * t, + max_motion_len * t, + next, + accum); + } + + if (center_motion_len < 0.5) { + return; + } + + for (i = 0, t = ofs * inc; i < gather_sample_count; i++, t += inc) { + /* Also sample in center motion direction. + * Allow recovering motion where there is conflicting + * motion between foreground and background. */ + gather_sample(screen_uv, + center_depth, + center_motion_len, + center_motion * t, + center_motion_len * t, + next, + accum); + } +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(depth_tx, 0).xy); + + if (!in_texture_range(texel, depth_tx)) { + return; + } + + /* Data of the center pixel of the gather (target). */ + float center_depth = get_view_z_from_depth(texelFetch(depth_tx, texel, 0).r); + vec4 center_motion = motion_blur_sample_velocity(velocity_tx, uv); + + vec4 center_color = textureLod(in_color_tx, uv, 0.0); + + float noise_offset = sampling_rng_1D_get(SAMPLING_TIME); + /** TODO(fclem) Blue noise. */ + vec2 rand = vec2(interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 0, noise_offset), + interlieved_gradient_noise(vec2(gl_GlobalInvocationID.xy), 1, noise_offset)); + + /* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile. + * Note this randomize only in one direction but in practice it's enough. */ + rand.x = rand.x * 2.0 - 1.0; + ivec2 tile = (texel + ivec2(rand.x * float(MOTION_BLUR_TILE_SIZE) * 0.25)) / + MOTION_BLUR_TILE_SIZE; + tile = clamp(tile, ivec2(0), imageSize(in_tiles_img) - 1); + /* NOTE: Tile velocity is already in pixel space and with correct zw sign. */ + vec4 max_motion; + /* Load dilation result from the indirection table. */ + ivec2 tile_prev; + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev); + max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy; + ivec2 tile_next; + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next); + max_motion.zw = imageLoad(in_tiles_img, tile_next).zw; + + Accumulator accum; + accum.weight = vec3(0.0, 0.0, 1.0); + accum.bg = vec4(0.0); + accum.fg = vec4(0.0); + /* First linear gather. time = [T - delta, T] */ + gather_blur(uv, center_motion.xy, center_depth, max_motion.xy, rand.y, false, accum); + /* Second linear gather. time = [T, T + delta] */ + gather_blur(uv, center_motion.zw, center_depth, max_motion.zw, rand.y, true, accum); + +#if 1 /* Own addition. Not present in reference implementation. */ + /* Avoid division by 0.0. */ + float w = 1.0 / (50.0 * float(gather_sample_count) * 4.0); + accum.bg += center_color * w; + accum.weight.x += w; + /* NOTE: In Jimenez's presentation, they used center sample. + * We use background color as it contains more information for foreground + * elements that have not enough weights. + * Yield better blur in complex motion. */ + center_color = accum.bg / accum.weight.x; +#endif + /* Merge background. */ + accum.fg += accum.bg; + accum.weight.y += accum.weight.x; + /* Balance accumulation for failed samples. + * We replace the missing foreground by the background. */ + float blend_fac = saturate(1.0 - accum.weight.y / accum.weight.z); + vec4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac; + +#if 0 /* For debugging. */ + out_color.rgb = out_color.ggg; + out_color.rg += max_motion.xy; +#endif + + imageStore(out_color_img, texel, out_color); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl new file mode 100644 index 00000000000..436fd01795a --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_lib.glsl @@ -0,0 +1,48 @@ + + +/* -------------------------------------------------------------------- */ +/** \name Tile indirection packing + * \{ */ + +#define MotionPayload uint + +/* Store velocity magnitude in the MSB to be able to use it with atomicMax operations. */ +MotionPayload motion_blur_tile_indirection_pack_payload(vec2 motion, uvec2 payload) +{ + /* NOTE: Clamp to 16383 pixel velocity. After that, it is tile position that determine the tile + * to dilate over. */ + uint velocity = min(uint(ceil(length(motion))), 0x3FFFu); + /* Designed for 512x512 tiles max. */ + return (velocity << 18u) | ((payload.x & 0x1FFu) << 9u) | (payload.y & 0x1FFu); +} + +/* Return thread index. */ +ivec2 motion_blur_tile_indirection_pack_payload(uint data) +{ + return ivec2((data >> 9u) & 0x1FFu, data & 0x1FFu); +} + +uint motion_blur_tile_indirection_index(uint motion_step, uvec2 tile) +{ + uint index = tile.x; + index += tile.y * MOTION_BLUR_MAX_TILE; + index += motion_step * MOTION_BLUR_MAX_TILE * MOTION_BLUR_MAX_TILE; + return index; +} + +#define MOTION_PREV 0u +#define MOTION_NEXT 1u + +#define motion_blur_tile_indirection_store(table_, step_, tile, payload_) \ + if (true) { \ + uint index = motion_blur_tile_indirection_index(step_, tile); \ + atomicMax(table_[index], payload_); \ + } + +#define motion_blur_tile_indirection_load(table_, step_, tile_, result_) \ + if (true) { \ + uint index = motion_blur_tile_indirection_index(step_, tile_); \ + result_ = motion_blur_tile_indirection_pack_payload(table_[index]); \ + } + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl index 0ccf06a9e14..dd047709afd 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl @@ -39,6 +39,8 @@ bool closure_select(float weight, inout float total_weight, inout float r) destination = candidate; \ } +float g_closure_rand; + void closure_weights_reset() { g_diffuse_data.weight = 0.0; @@ -58,18 +60,8 @@ void closure_weights_reset() g_refraction_data.roughness = 0.0; g_refraction_data.ior = 0.0; - /* TEMP */ -#define P(x) ((x + 0.5) / 16.0) - const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)), - vec4(P(12.0), P(4.0), P(14.0), P(6.0)), - vec4(P(3.0), P(11.0), P(1.0), P(9.0)), - vec4(P(15.0), P(7.0), P(13.0), P(5.0))); -#undef P #if defined(GPU_FRAGMENT_SHADER) - ivec2 pix = ivec2(gl_FragCoord.xy) % ivec2(4); - g_diffuse_rand = dither_mat4x4[pix.x][pix.y]; - g_reflection_rand = dither_mat4x4[pix.x][pix.y]; - g_refraction_rand = dither_mat4x4[pix.x][pix.y]; + g_diffuse_rand = g_reflection_rand = g_refraction_rand = g_closure_rand; #else g_diffuse_rand = 0.0; g_reflection_rand = 0.0; @@ -245,6 +237,20 @@ float F_eta(float a, float b) } void output_aov(vec4 color, float value, uint hash) { +#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER) + for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) { + if (aov_buf.hash_color[i] == hash) { + imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color); + return; + } + } + for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) { + if (aov_buf.hash_value[i] == hash) { + imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value)); + return; + } + } +#endif } #ifdef EEVEE_MATERIAL_STUBS @@ -255,6 +261,10 @@ void output_aov(vec4 color, float value, uint hash) # define nodetree_thickness() 0.1 #endif +#ifdef GPU_VERTEX_SHADER +# define closure_to_rgba(a) vec4(0.0) +#endif + /* -------------------------------------------------------------------- */ /** \name Fragment Displacement * @@ -359,3 +369,71 @@ vec3 coordinate_incoming(vec3 P) } /** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Volume Attribute post + * + * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on + * the engine side. But as of now, the engines are responsible for loading the attributes. + * + * \{ */ + +#if defined(MAT_GEOM_VOLUME) + +float attr_load_temperature_post(float attr) +{ + /* Bring the into standard range without having to modify the grid values */ + attr = (attr > 0.01) ? (attr * drw_volume.temperature_mul + drw_volume.temperature_bias) : 0.0; + return attr; +} +vec4 attr_load_color_post(vec4 attr) +{ + /* Density is premultiplied for interpolation, divide it out here. */ + attr.rgb *= safe_rcp(attr.a); + attr.rgb *= drw_volume.color_mul.rgb; + attr.a = 1.0; + return attr; +} + +#else /* Noop for any other surface. */ + +float attr_load_temperature_post(float attr) +{ + return attr; +} +vec4 attr_load_color_post(vec4 attr) +{ + return attr; +} + +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Uniform Attributes + * + * TODO(@fclem): These implementation details should concern the DRWManager and not be a fix on + * the engine side. But as of now, the engines are responsible for loading the attributes. + * + * \{ */ + +vec4 attr_load_uniform(vec4 attr, const uint attr_hash) +{ +#if defined(OBATTR_LIB) + uint index = floatBitsToUint(ObjectAttributeStart); + for (uint i = 0; i < floatBitsToUint(ObjectAttributeLen); i++, index++) { + if (drw_attrs[index].hash_code == attr_hash) { + return vec4(drw_attrs[index].data_x, + drw_attrs[index].data_y, + drw_attrs[index].data_z, + drw_attrs[index].data_w); + } + } + return vec4(0.0); +#else + return attr; +#endif +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl new file mode 100644 index 00000000000..0eea4a5ff33 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_sampling_lib.glsl @@ -0,0 +1,104 @@ + +/** + * Sampling data accessors and random number generators. + * Also contains some sample mapping functions. + **/ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +/* -------------------------------------------------------------------- */ +/** \name Sampling data. + * + * Return a random values from Low Discrepancy Sequence in [0..1) range. + * This value is uniform (constant) for the whole scene sample. + * You might want to couple it with a noise function. + * \{ */ + +#ifdef EEVEE_SAMPLING_DATA + +float sampling_rng_1D_get(const eSamplingDimension dimension) +{ + return sampling_buf.dimensions[dimension]; +} + +vec2 sampling_rng_2D_get(const eSamplingDimension dimension) +{ + return vec2(sampling_buf.dimensions[dimension], sampling_buf.dimensions[dimension + 1u]); +} + +vec3 sampling_rng_3D_get(const eSamplingDimension dimension) +{ + return vec3(sampling_buf.dimensions[dimension], + sampling_buf.dimensions[dimension + 1u], + sampling_buf.dimensions[dimension + 2u]); +} + +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Random Number Generators. + * \{ */ + +/* Interlieved gradient noise by Jorge Jimenez + * http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare + * Seeding found by Epic Game. */ +float interlieved_gradient_noise(vec2 pixel, float seed, float offset) +{ + pixel += seed * (vec2(47, 17) * 0.695); + return fract(offset + 52.9829189 * fract(0.06711056 * pixel.x + 0.00583715 * pixel.y)); +} + +/* From: http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html */ +float van_der_corput_radical_inverse(uint bits) +{ +#if 0 /* Reference */ + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); +#else + bits = bitfieldReverse(bits); +#endif + /* Same as dividing by 0x100000000. */ + return float(bits) * 2.3283064365386963e-10; +} + +vec2 hammersley_2d(float i, float sample_count) +{ + vec2 rand; + rand.x = i / sample_count; + rand.y = van_der_corput_radical_inverse(uint(i)); + return rand; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Distribution mapping. + * + * Functions mapping input random numbers to sampling shapes (i.e: hemisphere). + * \{ */ + +/* Given 2 random number in [0..1] range, return a random unit disk sample. */ +vec2 sample_disk(vec2 noise) +{ + float angle = noise.x * M_2PI; + return vec2(cos(angle), sin(angle)) * sqrt(noise.y); +} + +/* This transform a 2d random sample (in [0..1] range) to a sample located on a cylinder of the + * same range. This is because the sampling functions expect such a random sample which is + * normally precomputed. */ +vec3 sample_cylinder(vec2 rand) +{ + float theta = rand.x; + float phi = (rand.y - 0.5) * M_2PI; + float cos_phi = cos(phi); + float sin_phi = sqrt(1.0 - sqr(cos_phi)) * sign(phi); + return vec3(theta, cos_phi, sin_phi); +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl index 7ddf941df7c..183aac1e546 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_depth_frag.glsl @@ -6,10 +6,23 @@ #pragma BLENDER_REQUIRE(common_view_lib.glsl) #pragma BLENDER_REQUIRE(common_math_lib.glsl) #pragma BLENDER_REQUIRE(common_hair_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl) #pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl) #pragma BLENDER_REQUIRE(eevee_surf_lib.glsl) #pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) +vec4 closure_to_rgba(Closure cl) +{ + vec4 out_color; + out_color.rgb = g_emission; + out_color.a = saturate(1.0 - avg(g_transmittance)); + + /* Reset for the next closure tree. */ + closure_weights_reset(); + + return out_color; +} + /* From the paper "Hashed Alpha Testing" by Chris Wyman and Morgan McGuire. */ float hash(vec2 a) { @@ -61,8 +74,7 @@ void main() nodetree_surface(); - // float noise_offset = sampling_rng_1D_get(sampling_buf, SAMPLING_TRANSPARENCY); - float noise_offset = 0.5; + float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY); float random_threshold = hashed_alpha_threshold(1.0, noise_offset, g_data.P); float transparency = avg(g_transmittance); @@ -72,14 +84,7 @@ void main() #endif #ifdef MAT_VELOCITY - vec4 out_velocity_camera; /* TODO(fclem): Panoramic cameras. */ - velocity_camera(interp.P + motion.prev, - interp.P, - interp.P - motion.next, - out_velocity_camera, - out_velocity_view); - - /* For testing in viewport. */ - out_velocity_view.zw = vec2(0.0); + out_velocity = velocity_surface(interp.P + motion.prev, interp.P, interp.P + motion.next); + out_velocity = velocity_pack(out_velocity); #endif } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl index 143e88dbe68..ab29067763d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_forward_frag.glsl @@ -5,35 +5,36 @@ * This is used by alpha blended materials and materials using Shader to RGB nodes. **/ -#pragma BLENDER_REQUIRE(common_view_lib.glsl) -#pragma BLENDER_REQUIRE(common_math_lib.glsl) #pragma BLENDER_REQUIRE(common_hair_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_light_eval_lib.glsl) #pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl) #pragma BLENDER_REQUIRE(eevee_surf_lib.glsl) -float spec_light(ClosureReflection ref) -{ - float gloss = saturate(1.0 - ref.roughness); - float shininess = exp2(10.0 * gloss + 1.0); - vec3 N = ref.N; - vec3 L = vec3(0.0, 0.0, 1.0); - vec3 H = normalize(L + cameraVec(g_data.P)); - float spec_angle = saturate(dot(N, H)); - float normalization_factor = shininess * 0.125 + 1.0; - float spec_light = pow(spec_angle, shininess) * saturate(dot(N, L)) * normalization_factor; - return spec_light; -} - vec4 closure_to_rgba(Closure cl) { + vec3 diffuse_light = vec3(0.0); + vec3 reflection_light = vec3(0.0); + vec3 refraction_light = vec3(0.0); + + float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos); + + light_eval(g_diffuse_data, + g_reflection_data, + g_data.P, + cameraVec(g_data.P), + vP_z, + 0.01 /* TODO(fclem) thickness. */, + diffuse_light, + reflection_light); + vec4 out_color; out_color.rgb = g_emission; - out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight * - saturate(g_diffuse_data.N.z * 0.5 + 0.5); - out_color.rgb += g_reflection_data.color * g_reflection_data.weight * - spec_light(g_reflection_data); - out_color.rgb += g_refraction_data.color * g_refraction_data.weight * - saturate(g_refraction_data.N.z * 0.5 + 0.5); + out_color.rgb += g_diffuse_data.color * g_diffuse_data.weight * diffuse_light; + out_color.rgb += g_reflection_data.color * g_reflection_data.weight * reflection_light; + out_color.rgb += g_refraction_data.color * g_refraction_data.weight * refraction_light; out_color.a = saturate(1.0 - avg(g_transmittance)); @@ -47,27 +48,72 @@ void main() { init_globals(); + float noise = utility_tx_fetch(utility_tx, gl_FragCoord.xy, UTIL_BLUE_NOISE_LAYER).r; + g_closure_rand = fract(noise + sampling_rng_1D_get(SAMPLING_CLOSURE)); + fragment_displacement(); nodetree_surface(); g_holdout = saturate(g_holdout); + vec3 diffuse_light = vec3(0.0); + vec3 reflection_light = vec3(0.0); + vec3 refraction_light = vec3(0.0); + + float vP_z = dot(cameraForward, g_data.P) - dot(cameraForward, cameraPos); + + light_eval(g_diffuse_data, + g_reflection_data, + g_data.P, + cameraVec(g_data.P), + vP_z, + 0.01 /* TODO(fclem) thickness. */, + diffuse_light, + reflection_light); + + g_diffuse_data.color *= g_diffuse_data.weight; + g_reflection_data.color *= g_reflection_data.weight; + g_refraction_data.color *= g_refraction_data.weight; + diffuse_light *= step(1e-5, g_diffuse_data.weight); + reflection_light *= step(1e-5, g_reflection_data.weight); + refraction_light *= step(1e-5, g_refraction_data.weight); + out_radiance.rgb = g_emission; - out_radiance.rgb += g_diffuse_data.color * g_diffuse_data.weight * - saturate(g_diffuse_data.N.z * 0.5 + 0.5); - out_radiance.rgb += g_reflection_data.color * g_reflection_data.weight * - spec_light(g_reflection_data); - out_radiance.rgb += g_refraction_data.color * g_refraction_data.weight * - saturate(g_refraction_data.N.z * 0.5 + 0.5); + out_radiance.rgb += g_diffuse_data.color * diffuse_light; + out_radiance.rgb += g_reflection_data.color * reflection_light; + out_radiance.rgb += g_refraction_data.color * refraction_light; out_radiance.a = 0.0; + vec3 specular_light = reflection_light + refraction_light; + vec3 specular_color = g_reflection_data.color + g_refraction_data.color; + + /* TODO(fclem): This feels way too complex for what is it. */ + bool has_any_bsdf_weight = g_diffuse_data.weight != 0.0 || g_reflection_data.weight != 0.0 || + g_refraction_data.weight != 0.0; + vec3 out_normal = has_any_bsdf_weight ? vec3(0.0) : g_data.N; + out_normal += g_diffuse_data.N * g_diffuse_data.weight; + out_normal += g_reflection_data.N * g_reflection_data.weight; + out_normal += g_refraction_data.N * g_refraction_data.weight; + out_normal = safe_normalize(out_normal); + +#ifdef MAT_RENDER_PASS_SUPPORT + ivec2 out_texel = ivec2(gl_FragCoord.xy); + imageStore(rp_normal_img, out_texel, vec4(out_normal, 1.0)); + imageStore( + rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(diffuse_light, 1.0)); + imageStore( + rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(specular_light, 1.0)); + imageStore(rp_diffuse_color_img, out_texel, vec4(g_diffuse_data.color, 1.0)); + imageStore(rp_specular_color_img, out_texel, vec4(specular_color, 1.0)); + imageStore(rp_emission_img, out_texel, vec4(g_emission, 1.0)); + imageStore(rp_cryptomatte_img, + out_texel, + vec4(cryptomatte_object_buf[resource_id], node_tree.crypto_hash, 0.0)); +#endif + out_radiance.rgb *= 1.0 - g_holdout; out_transmittance.rgb = g_transmittance; out_transmittance.a = saturate(avg(g_transmittance)); - - /* Test */ - out_transmittance.a = 1.0 - out_transmittance.a; - out_radiance.a = 1.0 - out_radiance.a; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl index 30b48edaa78..6c1fc818f41 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_lib.glsl @@ -40,7 +40,7 @@ void init_globals_curves() /* Shade as a cylinder. */ float cos_theta = interp.curves_time_width / interp.curves_thickness; float sin_theta = sqrt(max(0.0, 1.0 - cos_theta * cos_theta)); - g_data.N = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta); + g_data.N = g_data.Ni = normalize(interp.N * sin_theta + interp.curves_binormal * cos_theta); /* Costly, but follows cycles per pixel tangent space (not following curve shape). */ vec3 V = cameraVec(g_data.P); @@ -60,13 +60,14 @@ void init_globals_curves() void init_globals_gpencil() { /* Undo backface flip as the gpencil normal is already pointing towards the camera. */ - g_data.N = interp.N; + g_data.N = g_data.Ni = interp.N; } void init_globals() { /* Default values. */ g_data.P = interp.P; + g_data.Ni = interp.N; g_data.N = safe_normalize(interp.N); g_data.Ng = g_data.N; g_data.is_strand = false; @@ -81,6 +82,7 @@ void init_globals() #ifdef GPU_FRAGMENT_SHADER g_data.N = (FrontFacing) ? g_data.N : -g_data.N; + g_data.Ni = (FrontFacing) ? g_data.Ni : -g_data.Ni; g_data.Ng = safe_normalize(cross(dFdx(g_data.P), dFdy(g_data.P))); #endif diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl index ac657afc922..442c2579c84 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl @@ -24,6 +24,20 @@ void main() g_holdout = saturate(g_holdout); + ivec2 out_texel = ivec2(gl_FragCoord.xy); + imageStore(rp_normal_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0)); + imageStore( + rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_DIFFUSE_LIGHT), vec4(0.0, 0.0, 0.0, 1.0)); + imageStore( + rp_light_img, ivec3(out_texel, RENDER_PASS_LAYER_SPECULAR_LIGHT), vec4(0.0, 0.0, 0.0, 1.0)); + imageStore(rp_diffuse_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0)); + imageStore(rp_specular_color_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0)); + imageStore(rp_emission_img, out_texel, vec4(0.0, 0.0, 0.0, 1.0)); + imageStore(rp_cryptomatte_img, out_texel, vec4(0.0)); + out_background.rgb = safe_color(g_emission) * (1.0 - g_holdout); out_background.a = saturate(avg(g_transmittance)) * g_holdout; + + /* World opacity. */ + out_background = mix(vec4(0.0, 0.0, 0.0, 1.0), out_background, world_opacity_fade); } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl index 435ae6658c9..8d02609fedc 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_lib.glsl @@ -2,23 +2,38 @@ #pragma BLENDER_REQUIRE(common_view_lib.glsl) #pragma BLENDER_REQUIRE(eevee_camera_lib.glsl) +vec4 velocity_pack(vec4 data) +{ + return data * 0.01; +} + +vec4 velocity_unpack(vec4 data) +{ + return data * 100.0; +} + #ifdef VELOCITY_CAMERA /** * Given a triple of position, compute the previous and next motion vectors. - * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy) + * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy). */ -vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next) +vec4 velocity_surface(vec3 P_prv, vec3 P, vec3 P_nxt) { - vec2 prev_uv, curr_uv, next_uv; - - prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy; - curr_uv = transform_point(ViewProjectionMatrix, P).xy; - next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy; - - vec4 motion; - motion.xy = prev_uv - curr_uv; - motion.zw = curr_uv - next_uv; + /* NOTE: We don't use the drw_view.persmat to avoid adding the TAA jitter to the velocity. */ + vec2 prev_uv = project_point(camera_prev.persmat, P_prv).xy; + vec2 curr_uv = project_point(camera_curr.persmat, P).xy; + vec2 next_uv = project_point(camera_next.persmat, P_nxt).xy; + /* Fix issue with perspective division. */ + if (any(isnan(prev_uv))) { + prev_uv = curr_uv; + } + if (any(isnan(next_uv))) { + next_uv = curr_uv; + } + /* NOTE: We output both vectors in the same direction so we can reuse the same vector + * with rgrg swizzle in viewport. */ + vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv); /* Convert NDC velocity to UV velocity */ motion *= 0.5; @@ -26,37 +41,55 @@ vec4 velocity_view(vec3 P_prev, vec3 P, vec3 P_next) } /** - * Given a triple of position, compute the previous and next motion vectors. - * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy) - * \a velocity_camera is the motion in film UV space after camera projection. - * \a velocity_view is the motion in ShadingView UV space. It is different - * from velocity_camera for multi-view rendering. + * Given a view space view vector \a vV, compute the previous and next motion vectors for + * background pixels. + * Returns uv space motion vectors in pairs (motion_prev.xy, motion_next.xy). */ -void velocity_camera(vec3 P_prev, vec3 P, vec3 P_next, out vec4 vel_camera, out vec4 vel_view) +vec4 velocity_background(vec3 vV) { - vec2 prev_uv, curr_uv, next_uv; - prev_uv = camera_uv_from_world(camera_prev, P_prev); - curr_uv = camera_uv_from_world(camera_curr, P); - next_uv = camera_uv_from_world(camera_next, P_next); - - vel_camera.xy = prev_uv - curr_uv; - vel_camera.zw = curr_uv - next_uv; + /* Only transform direction to avoid losing precision. */ + vec3 V = transform_direction(camera_curr.viewinv, vV); + /* NOTE: We don't use the drw_view.winmat to avoid adding the TAA jitter to the velocity. */ + vec2 prev_uv = project_point(camera_prev.winmat, V).xy; + vec2 curr_uv = project_point(camera_curr.winmat, V).xy; + vec2 next_uv = project_point(camera_next.winmat, V).xy; + /* NOTE: We output both vectors in the same direction so we can reuse the same vector + * with rgrg swizzle in viewport. */ + vec4 motion = vec4(prev_uv - curr_uv, curr_uv - next_uv); + /* Convert NDC velocity to UV velocity */ + motion *= 0.5; - if (is_panoramic(camera_curr.type)) { - /* This path is only used if using using panoramic projections. Since the views always have - * the same 45° aperture angle, we can safely reuse the projection matrix. */ - prev_uv = transform_point(ProjectionMatrix, transform_point(camera_prev.viewmat, P_prev)).xy; - curr_uv = transform_point(ViewProjectionMatrix, P).xy; - next_uv = transform_point(ProjectionMatrix, transform_point(camera_next.viewmat, P_next)).xy; + return motion; +} - vel_view.xy = prev_uv - curr_uv; - vel_view.zw = curr_uv - next_uv; - /* Convert NDC velocity to UV velocity */ - vel_view *= 0.5; - } - else { - vel_view = vel_camera; +vec4 velocity_resolve(vec4 vector, vec2 uv, float depth) +{ + if (vector.x == VELOCITY_INVALID) { + bool is_background = (depth == 1.0); + if (is_background) { + /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */ + vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0)); + return velocity_background(vV); + } + else { + /* Static geometry. No translation in world space. */ + vec3 P = get_world_space_from_depth(uv, depth); + return velocity_surface(P, P, P); + } } + return velocity_unpack(vector); +} + +/** + * Load and resolve correct velocity as some pixels might still not have correct + * motion data for performance reasons. + * Returns motion vector in render UV space. + */ +vec4 velocity_resolve(sampler2D vector_tx, ivec2 texel, float depth) +{ + vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize(vector_tx, 0).xy); + vec4 vector = texelFetch(vector_tx, texel, 0); + return velocity_resolve(vector, uv, depth); } #endif diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl deleted file mode 100644 index b68b2eaf117..00000000000 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_velocity_resolve_comp.glsl +++ /dev/null @@ -1,58 +0,0 @@ - -/** - * Fullscreen pass that compute motion vector for static geometry. - * Animated geometry has already written correct motion vectors. - */ - -#pragma BLENDER_REQUIRE(common_view_lib.glsl) -#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl) - -#define is_valid_output(img_) (imageSize(img_).x > 1) - -void main() -{ - ivec2 texel = ivec2(gl_GlobalInvocationID.xy); - vec4 motion = imageLoad(velocity_view_img, texel); - - bool pixel_has_valid_motion = (motion.x != VELOCITY_INVALID); - float depth = texelFetch(depth_tx, texel, 0).r; - bool is_background = (depth == 1.0f); - - vec2 uv = vec2(texel) * drw_view.viewport_size_inverse; - vec3 P_next, P_prev, P_curr; - - if (pixel_has_valid_motion) { - /* Animated geometry. View motion already computed during prepass. Convert only to camera. */ - // P_prev = get_world_space_from_depth(uv + motion.xy, 0.5); - // P_curr = get_world_space_from_depth(uv, 0.5); - // P_next = get_world_space_from_depth(uv + motion.zw, 0.5); - return; - } - else if (is_background) { - /* NOTE: Use viewCameraVec to avoid imprecision if camera is far from origin. */ - vec3 vV = viewCameraVec(get_view_space_from_depth(uv, 1.0)); - vec3 V = transform_direction(ViewMatrixInverse, vV); - /* Background has no motion under camera translation. Translate view vector with the camera. */ - /* WATCH(fclem): Might create precision issues. */ - P_next = camera_next.viewinv[3].xyz + V; - P_curr = camera_curr.viewinv[3].xyz + V; - P_prev = camera_prev.viewinv[3].xyz + V; - } - else { - /* Static geometry. No translation in world space. */ - P_curr = get_world_space_from_depth(uv, depth); - P_prev = P_curr; - P_next = P_curr; - } - - vec4 vel_camera, vel_view; - velocity_camera(P_prev, P_curr, P_next, vel_camera, vel_view); - - if (in_texture_range(texel, depth_tx)) { - imageStore(velocity_view_img, texel, vel_view); - - if (is_valid_output(velocity_camera_img)) { - imageStore(velocity_camera_img, texel, vel_camera); - } - } -} diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh new file mode 100644 index 00000000000..b689a7f53a2 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +/* -------------------------------------------------------------------- */ +/** \name Setup + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut) + .do_static_compilation(true) + .local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img") + .image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img") + .compute_source("eevee_depth_of_field_bokeh_lut_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup) + .do_static_compilation(true) + .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::DEPTH_2D, "depth_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img") + .compute_source("eevee_depth_of_field_setup_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize) + .do_static_compilation(true) + .local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::FLOAT_2D, "coc_tx") + .sampler(1, ImageType::FLOAT_2D, "color_tx") + .sampler(2, ImageType::FLOAT_2D, "velocity_tx") + .sampler(3, ImageType::FLOAT_2D, "in_history_tx") + .sampler(4, ImageType::DEPTH_2D, "depth_tx") + .push_constant(Type::BOOL, "use_history") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_history_img") + .compute_source("eevee_depth_of_field_stabilize_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample) + .do_static_compilation(true) + .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "coc_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .compute_source("eevee_depth_of_field_downsample_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce) + .do_static_compilation(true) + .local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::FLOAT_2D, "downsample_tx") + .storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]") + .storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]") + .storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "scatter_fg_indirect_buf") + .storage_buf(3, Qualifier::READ_WRITE, "DrawCommand", "scatter_bg_indirect_buf") + .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "inout_color_lod0_img") + .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod1_img") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod2_img") + .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod3_img") + .image(4, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D, "in_coc_lod0_img") + .image(5, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod1_img") + .image(6, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod2_img") + .image(7, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod3_img") + .compute_source("eevee_depth_of_field_reduce_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Circle-Of-Confusion Tiles + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_flatten) + .do_static_compilation(true) + .local_group_size(DOF_TILES_FLATTEN_GROUP_SIZE, DOF_TILES_FLATTEN_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "coc_tx") + .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img") + .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img") + .compute_source("eevee_depth_of_field_tiles_flatten_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate) + .additional_info("eevee_shared", "draw_view", "eevee_depth_of_field_tiles_common") + .local_group_size(DOF_TILES_DILATE_GROUP_SIZE, DOF_TILES_DILATE_GROUP_SIZE) + .image(2, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img") + .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img") + .push_constant(Type::INT, "ring_count") + .push_constant(Type::INT, "ring_width_multiplier") + .compute_source("eevee_depth_of_field_tiles_dilate_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minabs) + .do_static_compilation(true) + .define("DILATE_MODE_MIN_MAX", "false") + .additional_info("eevee_depth_of_field_tiles_dilate"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minmax) + .do_static_compilation(true) + .define("DILATE_MODE_MIN_MAX", "true") + .additional_info("eevee_depth_of_field_tiles_dilate"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_common) + .image(0, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_fg_img") + .image(1, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_bg_img"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Variations + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_no_lut) + .define("DOF_BOKEH_TEXTURE", "false") + /** + * WORKAROUND(@fclem): This is to keep the code as is for now. The bokeh_lut_tx is referenced + * even if not used after optimization. But we don't want to include it in the create infos. + */ + .define("bokeh_lut_tx", "color_tx"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lut) + .define("DOF_BOKEH_TEXTURE", "true") + .sampler(5, ImageType::FLOAT_2D, "bokeh_lut_tx"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_background).define("DOF_FOREGROUND_PASS", "false"); +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_foreground).define("DOF_FOREGROUND_PASS", "true"); + +#define EEVEE_DOF_FINAL_VARIATION(name, ...) \ + GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true); + +#define EEVEE_DOF_LUT_VARIATIONS(prefix, ...) \ + EEVEE_DOF_FINAL_VARIATION(prefix##_lut, "eevee_depth_of_field_lut", __VA_ARGS__) \ + EEVEE_DOF_FINAL_VARIATION(prefix##_no_lut, "eevee_depth_of_field_no_lut", __VA_ARGS__) + +#define EEVEE_DOF_GROUND_VARIATIONS(name, ...) \ + EEVEE_DOF_LUT_VARIATIONS(name##_background, "eevee_depth_of_field_background", __VA_ARGS__) \ + EEVEE_DOF_LUT_VARIATIONS(name##_foreground, "eevee_depth_of_field_foreground", __VA_ARGS__) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common) + .additional_info("eevee_shared", + "draw_view", + "eevee_depth_of_field_tiles_common", + "eevee_sampling_data") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE) + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx") + .sampler(2, ImageType::FLOAT_2D, "coc_tx") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(3, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather) + .image(4, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_occlusion_img") + .compute_source("eevee_depth_of_field_gather_comp.glsl") + .additional_info("eevee_depth_of_field_gather_common"); + +EEVEE_DOF_GROUND_VARIATIONS(eevee_depth_of_field_gather, "eevee_depth_of_field_gather") + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hole_fill) + .do_static_compilation(true) + .compute_source("eevee_depth_of_field_hole_fill_comp.glsl") + .additional_info("eevee_depth_of_field_gather_common", "eevee_depth_of_field_no_lut"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_filter) + .do_static_compilation(true) + .local_group_size(DOF_FILTER_GROUP_SIZE, DOF_FILTER_GROUP_SIZE) + .additional_info("eevee_shared") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "weight_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img") + .compute_source("eevee_depth_of_field_filter_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Scatter + * \{ */ + +GPU_SHADER_INTERFACE_INFO(eevee_depth_of_field_scatter_iface, "interp") + /** Colors, weights, and Circle of confusion radii for the 4 pixels to scatter. */ + .flat(Type::VEC4, "color_and_coc1") + .flat(Type::VEC4, "color_and_coc2") + .flat(Type::VEC4, "color_and_coc3") + .flat(Type::VEC4, "color_and_coc4") + /** Sprite pixel position with origin at sprite center. In pixels. */ + .no_perspective(Type::VEC2, "rect_uv1") + .no_perspective(Type::VEC2, "rect_uv2") + .no_perspective(Type::VEC2, "rect_uv3") + .no_perspective(Type::VEC2, "rect_uv4") + /** Scaling factor for the bokeh distance. */ + .flat(Type::FLOAT, "distance_scale"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_scatter) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "occlusion_tx") + .sampler(1, ImageType::FLOAT_2D, "bokeh_lut_tx") + .storage_buf(0, Qualifier::READ, "ScatterRect", "scatter_list_buf[]") + .fragment_out(0, Type::VEC4, "out_color") + .push_constant(Type::BOOL, "use_bokeh_lut") + .vertex_out(eevee_depth_of_field_scatter_iface) + .vertex_source("eevee_depth_of_field_scatter_vert.glsl") + .fragment_source("eevee_depth_of_field_scatter_frag.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Resolve + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve) + .define("DOF_RESOLVE_PASS", "true") + .local_group_size(DOF_RESOLVE_GROUP_SIZE, DOF_RESOLVE_GROUP_SIZE) + .additional_info("eevee_shared", + "draw_view", + "eevee_depth_of_field_tiles_common", + "eevee_sampling_data") + .uniform_buf(6, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .sampler(1, ImageType::FLOAT_2D, "color_tx") + .sampler(2, ImageType::FLOAT_2D, "color_bg_tx") + .sampler(3, ImageType::FLOAT_2D, "color_fg_tx") + .sampler(4, ImageType::FLOAT_2D, "color_hole_fill_tx") + .sampler(7, ImageType::FLOAT_2D, "weight_bg_tx") + .sampler(8, ImageType::FLOAT_2D, "weight_fg_tx") + .sampler(9, ImageType::FLOAT_2D, "weight_hole_fill_tx") + .sampler(10, ImageType::FLOAT_2D, "stable_color_tx") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .compute_source("eevee_depth_of_field_resolve_comp.glsl"); + +EEVEE_DOF_LUT_VARIATIONS(eevee_depth_of_field_resolve, "eevee_depth_of_field_resolve") + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh new file mode 100644 index 00000000000..4541f14d96c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_film_info.hh @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(eevee_film) + .uniform_buf(6, "FilmData", "film_buf") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .sampler(1, ImageType::FLOAT_2D, "combined_tx") + .sampler(2, ImageType::FLOAT_2D, "normal_tx") + .sampler(3, ImageType::FLOAT_2D, "vector_tx") + .sampler(4, ImageType::FLOAT_2D_ARRAY, "light_tx") + .sampler(5, ImageType::FLOAT_2D, "diffuse_color_tx") + .sampler(6, ImageType::FLOAT_2D, "specular_color_tx") + .sampler(7, ImageType::FLOAT_2D, "volume_light_tx") + .sampler(8, ImageType::FLOAT_2D, "emission_tx") + .sampler(9, ImageType::FLOAT_2D, "environment_tx") + .sampler(10, ImageType::FLOAT_2D, "shadow_tx") + .sampler(11, ImageType::FLOAT_2D, "ambient_occlusion_tx") + .sampler(12, ImageType::FLOAT_2D_ARRAY, "aov_color_tx") + .sampler(13, ImageType::FLOAT_2D_ARRAY, "aov_value_tx") + /* Color History for TAA needs to be sampler to leverage bilinear sampling. */ + .sampler(14, ImageType::FLOAT_2D, "in_combined_tx") + .sampler(15, ImageType::FLOAT_2D, "cryptomatte_tx") + .image(0, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "in_weight_img") + .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D_ARRAY, "out_weight_img") + /* Color History for TAA needs to be sampler to leverage bilinear sampling. */ + //.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img") + .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img") + .image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img") + .image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img") + .image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img") + .image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img") + .additional_info("eevee_shared") + .additional_info("eevee_velocity_camera") + .additional_info("draw_view"); + +GPU_SHADER_CREATE_INFO(eevee_film_frag) + .do_static_compilation(true) + .fragment_out(0, Type::VEC4, "out_color") + .fragment_source("eevee_film_frag.glsl") + .additional_info("draw_fullscreen", "eevee_film"); + +GPU_SHADER_CREATE_INFO(eevee_film_comp) + .do_static_compilation(true) + .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE) + .compute_source("eevee_film_comp.glsl") + .additional_info("eevee_film"); + +GPU_SHADER_CREATE_INFO(eevee_film_cryptomatte_post) + .do_static_compilation(true) + .image(0, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img") + .image(1, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "weight_img") + .push_constant(Type::INT, "cryptomatte_layer_len") + .push_constant(Type::INT, "cryptomatte_samples_per_layer") + .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE) + .compute_source("eevee_film_cryptomatte_post_comp.glsl") + .additional_info("eevee_shared"); diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh new file mode 100644 index 00000000000..5e32631a8f8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(eevee_hiz_data) + .sampler(15, ImageType::FLOAT_2D, "hiz_tx") + .uniform_buf(5, "HiZData", "hiz_buf"); + +GPU_SHADER_CREATE_INFO(eevee_hiz_update) + .do_static_compilation(true) + .local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "uint", "finished_tile_counter") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_0") + .image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_1") + .image(2, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_2") + .image(3, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_3") + .image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4") + .image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5") + .image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6") + .image(7, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_7") + .push_constant(Type::BOOL, "update_mip_0") + .compute_source("eevee_hiz_update_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_hiz_debug) + .do_static_compilation(true) + .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0) + .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1) + .fragment_source("eevee_hiz_debug_frag.glsl") + .additional_info("eevee_shared", "eevee_hiz_data", "draw_fullscreen"); diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh new file mode 100644 index 00000000000..41602426a1d --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +/* -------------------------------------------------------------------- */ +/** \name Shared + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_data) + .storage_buf(LIGHT_CULL_BUF_SLOT, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(LIGHT_BUF_SLOT, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(LIGHT_ZBIN_BUF_SLOT, Qualifier::READ, "uint", "light_zbin_buf[]") + .storage_buf(LIGHT_TILE_BUF_SLOT, Qualifier::READ, "uint", "light_tile_buf[]"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Culling + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_culling_select) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_SELECT_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]") + .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]") + .storage_buf(3, Qualifier::WRITE, "float", "out_zdist_buf[]") + .storage_buf(4, Qualifier::WRITE, "uint", "out_key_buf[]") + .compute_source("eevee_light_culling_select_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_sort) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "in_light_buf[]") + .storage_buf(2, Qualifier::WRITE, "LightData", "out_light_buf[]") + .storage_buf(3, Qualifier::READ, "float", "in_zdist_buf[]") + .storage_buf(4, Qualifier::READ, "uint", "in_key_buf[]") + .local_group_size(CULLING_SORT_GROUP_SIZE) + .compute_source("eevee_light_culling_sort_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_zbin) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_ZBIN_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(2, Qualifier::WRITE, "uint", "out_zbin_buf[]") + .compute_source("eevee_light_culling_zbin_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_light_culling_tile) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .local_group_size(CULLING_TILE_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf") + .storage_buf(1, Qualifier::READ, "LightData", "light_buf[]") + .storage_buf(2, Qualifier::WRITE, "uint", "out_light_tile_buf[]") + .compute_source("eevee_light_culling_tile_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_light_culling_debug) + .do_static_compilation(true) + .fragment_out(0, Type::VEC4, "out_debug_color_add", DualBlend::SRC_0) + .fragment_out(0, Type::VEC4, "out_debug_color_mul", DualBlend::SRC_1) + .fragment_source("eevee_light_culling_debug_frag.glsl") + .additional_info( + "eevee_shared", "draw_view", "draw_fullscreen", "eevee_light_data", "eevee_hiz_data"); + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh index a944bea402e..78d52d4b90e 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_material_info.hh @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "eevee_defines.hh" #include "gpu_shader_create_info.hh" /* -------------------------------------------------------------------- */ @@ -12,8 +13,12 @@ GPU_SHADER_CREATE_INFO(eevee_shared) .typedef_source("eevee_shader_shared.hh"); GPU_SHADER_CREATE_INFO(eevee_sampling_data) + .define("EEVEE_SAMPLING_DATA") .additional_info("eevee_shared") - .uniform_buf(14, "SamplingData", "sampling_buf"); + .storage_buf(6, Qualifier::READ, "SamplingData", "sampling_buf"); + +GPU_SHADER_CREATE_INFO(eevee_utility_texture) + .sampler(RBUFS_UTILITY_TEX_SLOT, ImageType::FLOAT_2D_ARRAY, "utility_tx"); /** \} */ @@ -27,7 +32,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_mesh) .vertex_in(0, Type::VEC3, "pos") .vertex_in(1, Type::VEC3, "nor") .vertex_source("eevee_geom_mesh_vert.glsl") - .additional_info("draw_mesh", "draw_resource_id_varying", "draw_resource_handle"); + .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view"); GPU_SHADER_CREATE_INFO(eevee_geom_gpencil) .additional_info("eevee_shared") @@ -49,7 +54,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_world) .define("MAT_GEOM_WORLD") .builtins(BuiltinBits::VERTEX_ID) .vertex_source("eevee_geom_world_vert.glsl") - .additional_info("draw_modelmat", "draw_resource_id_varying", "draw_resource_handle"); + .additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view"); /** \} */ @@ -70,6 +75,26 @@ GPU_SHADER_INTERFACE_INFO(eevee_surf_iface, "interp") #define image_out(slot, qualifier, format, name) \ image(slot, format, qualifier, ImageType::FLOAT_2D, name, Frequency::PASS) +#define image_array_out(slot, qualifier, format, name) \ + image(slot, format, qualifier, ImageType::FLOAT_2D_ARRAY, name, Frequency::PASS) + +GPU_SHADER_CREATE_INFO(eevee_aov_out) + .define("MAT_AOV_SUPPORT") + .image_array_out(RBUFS_AOV_COLOR_SLOT, Qualifier::WRITE, GPU_RGBA16F, "aov_color_img") + .image_array_out(RBUFS_AOV_VALUE_SLOT, Qualifier::WRITE, GPU_R16F, "aov_value_img") + .storage_buf(RBUFS_AOV_BUF_SLOT, Qualifier::READ, "AOVsInfoData", "aov_buf"); + +GPU_SHADER_CREATE_INFO(eevee_render_pass_out) + .define("MAT_RENDER_PASS_SUPPORT") + .image_out(RBUFS_NORMAL_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img") + .image_array_out(RBUFS_LIGHT_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img") + .image_out(RBUFS_DIFF_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img") + .image_out(RBUFS_SPEC_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img") + .image_out(RBUFS_EMISSION_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img"); + +GPU_SHADER_CREATE_INFO(eevee_cryptomatte_out) + .storage_buf(7, Qualifier::READ, "vec2", "cryptomatte_object_buf[]", Frequency::PASS) + .image_out(7, Qualifier::WRITE, GPU_RGBA32F, "rp_cryptomatte_img"); GPU_SHADER_CREATE_INFO(eevee_surf_deferred) .vertex_out(eevee_surf_iface) @@ -85,44 +110,51 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred) // .image_out(3, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_reflection_color") // .image_out(4, Qualifier::WRITE, GPU_RGBA16F, "gbuff_reflection_normal") // .image_out(5, Qualifier::WRITE, GPU_R11F_G11F_B10F, "gbuff_emission") - /* Renderpasses. */ + /* Render-passes. */ // .image_out(6, Qualifier::READ_WRITE, GPU_RGBA16F, "rpass_volume_light") /* TODO: AOVs maybe? */ .fragment_source("eevee_surf_deferred_frag.glsl") - // .additional_info("eevee_sampling_data", "eevee_utility_texture") + // .additional_info("eevee_aov_out", "eevee_sampling_data", "eevee_utility_texture") ; -#undef image_out - GPU_SHADER_CREATE_INFO(eevee_surf_forward) - .auto_resource_location(true) .vertex_out(eevee_surf_iface) + /* Early fragment test is needed for render passes support for forward surfaces. */ + /* NOTE: This removes the possibility of using gl_FragDepth. */ + .early_fragment_test(true) .fragment_out(0, Type::VEC4, "out_radiance", DualBlend::SRC_0) .fragment_out(0, Type::VEC4, "out_transmittance", DualBlend::SRC_1) .fragment_source("eevee_surf_forward_frag.glsl") - // .additional_info("eevee_sampling_data", - // "eevee_lightprobe_data", - /* Optionally added depending on the material. */ - // "eevee_raytrace_data", - // "eevee_transmittance_data", - // "eevee_utility_texture", - // "eevee_light_data", - // "eevee_shadow_data" - // ) - ; + .additional_info("eevee_cryptomatte_out", + "eevee_light_data", + "eevee_utility_texture", + "eevee_sampling_data" + // "eevee_lightprobe_data", + // "eevee_shadow_data" + /* Optionally added depending on the material. */ + // "eevee_raytrace_data", + // "eevee_transmittance_data", + // "eevee_aov_out", + // "eevee_render_pass_out", + ); GPU_SHADER_CREATE_INFO(eevee_surf_depth) .vertex_out(eevee_surf_iface) .fragment_source("eevee_surf_depth_frag.glsl") - // .additional_info("eevee_sampling_data", "eevee_utility_texture") - ; + .additional_info("eevee_sampling_data", "eevee_utility_texture"); GPU_SHADER_CREATE_INFO(eevee_surf_world) .vertex_out(eevee_surf_iface) + .push_constant(Type::FLOAT, "world_opacity_fade") .fragment_out(0, Type::VEC4, "out_background") .fragment_source("eevee_surf_world_frag.glsl") - // .additional_info("eevee_utility_texture") - ; + .additional_info("eevee_aov_out", + "eevee_cryptomatte_out", + "eevee_render_pass_out", + "eevee_utility_texture"); + +#undef image_out +#undef image_array_out /** \} */ @@ -161,10 +193,7 @@ GPU_SHADER_CREATE_INFO(eevee_volume_deferred) GPU_SHADER_CREATE_INFO(eevee_material_stub).define("EEVEE_MATERIAL_STUBS"); # define EEVEE_MAT_FINAL_VARIATION(name, ...) \ - GPU_SHADER_CREATE_INFO(name) \ - .additional_info(__VA_ARGS__) \ - .auto_resource_location(true) \ - .do_static_compilation(true); + GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true); # define EEVEE_MAT_GEOM_VARIATIONS(prefix, ...) \ EEVEE_MAT_FINAL_VARIATION(prefix##_world, "eevee_geom_world", __VA_ARGS__) \ diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh new file mode 100644 index 00000000000..ec302ec6770 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten) + .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view", "eevee_velocity_camera") + .uniform_buf(6, "MotionBlurData", "motion_blur_buf") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_img") + .compute_source("eevee_motion_blur_flatten_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_viewport) + .do_static_compilation(true) + .define("FLATTEN_VIEWPORT") + .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img") + .additional_info("eevee_motion_blur_tiles_flatten"); + +GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten_render) + .do_static_compilation(true) + .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_img") + .additional_info("eevee_motion_blur_tiles_flatten"); + +GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_dilate) + .do_static_compilation(true) + .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE) + .additional_info("eevee_shared") + /* NOTE: See MotionBlurTileIndirection. */ + .storage_buf(0, Qualifier::READ_WRITE, "uint", "tile_indirection_buf[]") + .image(1, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img") + .compute_source("eevee_motion_blur_dilate_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_motion_blur_gather) + .do_static_compilation(true) + .local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view", "eevee_sampling_data") + .uniform_buf(6, "MotionBlurData", "motion_blur_buf") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .sampler(1, ImageType::FLOAT_2D, "velocity_tx") + .sampler(2, ImageType::FLOAT_2D, "in_color_tx") + /* NOTE: See MotionBlurTileIndirection. */ + .storage_buf(0, Qualifier::READ, "uint", "tile_indirection_buf[]") + .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_img") + .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .compute_source("eevee_motion_blur_gather_comp.glsl"); diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh index a5f16363466..0a1c2721c61 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh @@ -1,4 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "eevee_defines.hh" #include "gpu_shader_create_info.hh" /* -------------------------------------------------------------------- */ @@ -16,40 +18,22 @@ GPU_SHADER_INTERFACE_INFO(eevee_velocity_surface_iface, "motion") GPU_SHADER_CREATE_INFO(eevee_velocity_camera) .define("VELOCITY_CAMERA") - .uniform_buf(1, "CameraData", "camera_prev") - .uniform_buf(2, "CameraData", "camera_curr") - .uniform_buf(3, "CameraData", "camera_next"); + .uniform_buf(VELOCITY_CAMERA_PREV_BUF, "CameraData", "camera_prev") + .uniform_buf(VELOCITY_CAMERA_CURR_BUF, "CameraData", "camera_curr") + .uniform_buf(VELOCITY_CAMERA_NEXT_BUF, "CameraData", "camera_next"); GPU_SHADER_CREATE_INFO(eevee_velocity_geom) .define("MAT_VELOCITY") - .auto_resource_location(true) - .storage_buf(4, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]", Frequency::PASS) - .storage_buf(5, Qualifier::READ, "mat4", "velocity_obj_next_buf[]", Frequency::PASS) - .storage_buf(6, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]", Frequency::PASS) - .storage_buf(7, Qualifier::READ, "vec4", "velocity_geo_next_buf[]", Frequency::PASS) - .storage_buf( - 7, Qualifier::READ, "VelocityIndex", "velocity_indirection_buf[]", Frequency::PASS) + .storage_buf(VELOCITY_OBJ_PREV_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]") + .storage_buf(VELOCITY_OBJ_NEXT_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_next_buf[]") + .storage_buf(VELOCITY_GEO_PREV_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]") + .storage_buf(VELOCITY_GEO_NEXT_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_next_buf[]") + .storage_buf(VELOCITY_INDIRECTION_BUF_SLOT, + Qualifier::READ, + "VelocityIndex", + "velocity_indirection_buf[]") .vertex_out(eevee_velocity_surface_iface) - .fragment_out(0, Type::VEC4, "out_velocity_view") + .fragment_out(0, Type::VEC4, "out_velocity") .additional_info("eevee_velocity_camera"); /** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Velocity Resolve - * - * Computes velocity for static objects. - * Also converts motion to camera space (as opposed to view space) if needed. - * \{ */ - -GPU_SHADER_CREATE_INFO(eevee_velocity_resolve) - .do_static_compilation(true) - .local_group_size(8, 8) - .sampler(0, ImageType::DEPTH_2D, "depth_tx") - .image(0, GPU_RG16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "velocity_view_img") - .image(1, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "velocity_camera_img") - .additional_info("eevee_shared") - .compute_source("eevee_velocity_resolve_comp.glsl") - .additional_info("draw_view", "eevee_velocity_camera"); - -/** \} */ diff --git a/source/blender/draw/engines/external/external_engine.c b/source/blender/draw/engines/external/external_engine.c index b9c09e2bc4f..3f047d8de68 100644 --- a/source/blender/draw/engines/external/external_engine.c +++ b/source/blender/draw/engines/external/external_engine.c @@ -236,7 +236,11 @@ static void external_draw_scene_do_v3d(void *vedata) RegionView3D *rv3d = draw_ctx->rv3d; ARegion *region = draw_ctx->region; - DRW_state_reset_ex(DRW_STATE_DEFAULT & ~DRW_STATE_DEPTH_LESS_EQUAL); + DRW_state_reset_ex(DRW_STATE_WRITE_COLOR); + + /* The external engine can use the OpenGL rendering API directly, so make sure the state is + * already applied. */ + GPU_apply_state(); /* Create render engine. */ if (!rv3d->render_engine) { @@ -332,6 +336,12 @@ static void external_draw_scene_do_image(void *UNUSED(vedata)) BLI_assert(re != NULL); BLI_assert(engine != NULL); + DRW_state_reset_ex(DRW_STATE_WRITE_COLOR); + + /* The external engine can use the OpenGL rendering API directly, so make sure the state is + * already applied. */ + GPU_apply_state(); + const DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get(); /* Clear the depth buffer to the value used by the background overlay so that the overlay is not diff --git a/source/blender/draw/engines/gpencil/gpencil_draw_data.c b/source/blender/draw/engines/gpencil/gpencil_draw_data.c index 65ddb80ad55..e54ac99a888 100644 --- a/source/blender/draw/engines/gpencil/gpencil_draw_data.c +++ b/source/blender/draw/engines/gpencil/gpencil_draw_data.c @@ -460,7 +460,7 @@ GPENCIL_ViewLayerData *GPENCIL_view_layer_data_ensure(void) GPENCIL_ViewLayerData **vldata = (GPENCIL_ViewLayerData **)DRW_view_layer_engine_data_ensure( &draw_engine_gpencil_type, gpencil_view_layer_data_free); - /* NOTE(&fclem): Putting this stuff in viewlayer means it is shared by all viewports. + /* NOTE(@fclem): Putting this stuff in view-layer means it is shared by all viewports. * For now it is ok, but in the future, it could become a problem if we implement * the caching system. */ if (*vldata == NULL) { diff --git a/source/blender/draw/engines/gpencil/gpencil_engine.c b/source/blender/draw/engines/gpencil/gpencil_engine.c index 4f520e61936..42c396a0d43 100644 --- a/source/blender/draw/engines/gpencil/gpencil_engine.c +++ b/source/blender/draw/engines/gpencil/gpencil_engine.c @@ -799,7 +799,7 @@ static void gpencil_draw_mask(GPENCIL_Data *vedata, GPENCIL_tObject *ob, GPENCIL } GPENCIL_tLayer *mask_layer = gpencil_layer_cache_get(ob, i); - /* When filtering by viewlayer, the mask could be null and must be ignored. */ + /* When filtering by view-layer, the mask could be null and must be ignored. */ if (mask_layer == NULL) { continue; } diff --git a/source/blender/draw/engines/gpencil/gpencil_engine.h b/source/blender/draw/engines/gpencil/gpencil_engine.h index 332c7f67c64..2f9d20b3902 100644 --- a/source/blender/draw/engines/gpencil/gpencil_engine.h +++ b/source/blender/draw/engines/gpencil/gpencil_engine.h @@ -19,6 +19,8 @@ extern "C" { #endif +#define GP_LIGHT + #include "gpencil_defines.h" #include "gpencil_shader_shared.h" diff --git a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h index 50ff7e7efc7..4c621e955b9 100644 --- a/source/blender/draw/engines/gpencil/gpencil_shader_shared.h +++ b/source/blender/draw/engines/gpencil/gpencil_shader_shared.h @@ -7,7 +7,9 @@ typedef struct gpMaterial gpMaterial; typedef struct gpLight gpLight; typedef enum gpMaterialFlag gpMaterialFlag; +# ifdef GP_LIGHT typedef enum gpLightType gpLightType; +# endif # endif #endif @@ -75,8 +77,9 @@ struct gpMaterial { }; BLI_STATIC_ASSERT_ALIGN(gpMaterial, 16) +#ifdef GP_LIGHT struct gpLight { -#ifndef GPU_SHADER +# ifndef GPU_SHADER float3 color; gpLightType type; float3 right; @@ -87,7 +90,7 @@ struct gpLight { float _pad0; float3 position; float _pad1; -#else +# else /* Some drivers are completely messing the alignment or the fetches here. * We are forced to pack these into vec4 otherwise we only get 0.0 as value. */ /* NOTE(@fclem): This was the case on MacOS OpenGL implementation. @@ -97,17 +100,18 @@ struct gpLight { float4 packed2; float4 packed3; float4 packed4; -# define _color packed0.xyz -# define _type packed0.w -# define _right packed1.xyz -# define _spot_size packed1.w -# define _up packed2.xyz -# define _spot_blend packed2.w -# define _forward packed3.xyz -# define _position packed4.xyz -#endif +# define _color packed0.xyz +# define _type packed0.w +# define _right packed1.xyz +# define _spot_size packed1.w +# define _up packed2.xyz +# define _spot_blend packed2.w +# define _forward packed3.xyz +# define _position packed4.xyz +# endif }; BLI_STATIC_ASSERT_ALIGN(gpLight, 16) +#endif #ifndef GPU_SHADER # undef gpMaterialFlag diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl index 75bd3d30d68..6671c16aa0b 100644 --- a/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl +++ b/source/blender/draw/engines/gpencil/shaders/gpencil_common_lib.glsl @@ -344,7 +344,7 @@ float stroke_thickness_modulate(float thickness) } else { /* World space point size. */ - thickness *= thicknessWorldScale * ProjectionMatrix[1][1] * sizeViewport.y; + thickness *= thicknessWorldScale * drw_view.winmat[1][1] * sizeViewport.y; } return thickness; } diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl index e162c5bf45e..2fca8b69183 100644 --- a/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl +++ b/source/blender/draw/engines/gpencil/shaders/gpencil_depth_merge_vert.glsl @@ -5,5 +5,5 @@ void main() int v = gl_VertexID % 3; float x = -1.0 + float((v & 1) << 2); float y = -1.0 + float((v & 2) << 1); - gl_Position = ViewProjectionMatrix * (model_matrix * vec4(x, y, 0.0, 1.0)); + gl_Position = drw_view.persmat * (model_matrix * vec4(x, y, 0.0, 1.0)); } diff --git a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl index af8aec85598..b0ee059cb9d 100644 --- a/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl +++ b/source/blender/draw/engines/gpencil/shaders/gpencil_vert.glsl @@ -32,7 +32,7 @@ void main() vec3 vert_N; gpMaterial gp_mat = materials[ma1.x + gpMaterialOffset]; - gpMaterialFlag gp_flag = floatBitsToInt(gp_mat._flag); + gpMaterialFlag gp_flag = floatBitsToUint(gp_mat._flag); gl_Position = gpencil_vertex(ma, ma1, @@ -125,7 +125,7 @@ void main() gpencil_color_output(fill_col, fcol_decode, 1.0, gp_mat._fill_texture_mix); gp_interp.mat_flag = gp_flag & GP_FILL_FLAGS; - gp_interp.mat_flag |= uint(ma1.x) << GPENCIl_MATID_SHIFT; + gp_interp.mat_flag |= uint(ma1.x + gpMaterialOffset) << GPENCIl_MATID_SHIFT; gp_interp.uv = mat2(gp_mat.fill_uv_rot_scale.xy, gp_mat.fill_uv_rot_scale.zw) * uv1.xy + gp_mat._fill_uv_offset; diff --git a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh index 3b4de704c00..1db98d13c4a 100644 --- a/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh +++ b/source/blender/draw/engines/gpencil/shaders/infos/gpencil_info.hh @@ -20,8 +20,8 @@ GPU_SHADER_INTERFACE_INFO(gpencil_geometry_iface, "gp_interp") GPU_SHADER_CREATE_INFO(gpencil_geometry) .do_static_compilation(true) + .define("GP_LIGHT") .typedef_source("gpencil_defines.h") - .typedef_source("gpencil_shader_shared.h") .sampler(0, ImageType::FLOAT_2D, "gpFillTexture") .sampler(1, ImageType::FLOAT_2D, "gpStrokeTexture") .sampler(2, ImageType::DEPTH_2D, "gpSceneDepthTexture") diff --git a/source/blender/draw/engines/overlay/overlay_antialiasing.c b/source/blender/draw/engines/overlay/overlay_antialiasing.c index 27ee479cf36..780915b7fc4 100644 --- a/source/blender/draw/engines/overlay/overlay_antialiasing.c +++ b/source/blender/draw/engines/overlay/overlay_antialiasing.c @@ -52,7 +52,7 @@ void OVERLAY_antialiasing_init(OVERLAY_Data *vedata) OVERLAY_PrivateData *pd = vedata->stl->pd; DefaultTextureList *dtxl = DRW_viewport_texture_list_get(); - /* Small texture which will have very small impact on rendertime. */ + /* Small texture which will have very small impact on render-time. */ if (txl->dummy_depth_tx == NULL) { const float pixel[1] = {1.0f}; txl->dummy_depth_tx = DRW_texture_create_2d(1, 1, GPU_DEPTH_COMPONENT24, 0, pixel); diff --git a/source/blender/draw/engines/overlay/overlay_armature.c b/source/blender/draw/engines/overlay/overlay_armature.c index ea0c2f287a6..df5ee6a18c0 100644 --- a/source/blender/draw/engines/overlay/overlay_armature.c +++ b/source/blender/draw/engines/overlay/overlay_armature.c @@ -2102,7 +2102,7 @@ static void pchan_culling_calc_bsphere(const Object *ob, { float min[3], max[3]; INIT_MINMAX(min, max); - BKE_pchan_minmax(ob, pchan, min, max); + BKE_pchan_minmax(ob, pchan, true, min, max); mid_v3_v3v3(r_bsphere->center, min, max); r_bsphere->radius = len_v3v3(min, r_bsphere->center); } @@ -2220,7 +2220,7 @@ static void draw_armature_edit(ArmatureDrawContext *ctx) const bool show_text = DRW_state_show_text(); const Object *ob_orig = DEG_get_original_object(ob); - /* FIXME(campbell): We should be able to use the CoW object, + /* FIXME(@campbellbarton): We should be able to use the CoW object, * however the active bone isn't updated. Long term solution is an 'EditArmature' struct. * for now we can draw from the original armature. See: T66773. */ // bArmature *arm = ob->data; diff --git a/source/blender/draw/engines/overlay/overlay_edit_text.c b/source/blender/draw/engines/overlay/overlay_edit_text.c index dfef5b3c241..bd8720042f1 100644 --- a/source/blender/draw/engines/overlay/overlay_edit_text.c +++ b/source/blender/draw/engines/overlay/overlay_edit_text.c @@ -7,6 +7,8 @@ #include "DRW_render.h" +#include "UI_resources.h" + #include "BKE_vfont.h" #include "DNA_curve_types.h" @@ -38,17 +40,24 @@ void OVERLAY_edit_text_cache_init(OVERLAY_Data *vedata) DRW_shgroup_uniform_vec4_copy(grp, "color", G_draw.block.color_wire); } { + /* Cursor (text caret). */ state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA; - DRW_PASS_CREATE(psl->edit_text_overlay_ps, state | pd->clipping_state); - + DRW_PASS_CREATE(psl->edit_text_cursor_ps, state | pd->clipping_state); sh = OVERLAY_shader_uniform_color(); - pd->edit_text_overlay_grp = grp = DRW_shgroup_create(sh, psl->edit_text_overlay_ps); + pd->edit_text_cursor_grp = grp = DRW_shgroup_create(sh, psl->edit_text_cursor_ps); + DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.cursor_color, 1); - DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.overlay_color, 1); + /* Selection boxes. */ + state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA; + DRW_PASS_CREATE(psl->edit_text_selection_ps, state | pd->clipping_state); + sh = OVERLAY_shader_uniform_color(); + pd->edit_text_selection_grp = grp = DRW_shgroup_create(sh, psl->edit_text_selection_ps); + DRW_shgroup_uniform_vec4(grp, "color", pd->edit_text.selection_color, 1); - state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_MUL | DRW_STATE_DEPTH_GREATER_EQUAL | + /* Highlight text within selection boxes. */ + state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ALPHA | DRW_STATE_DEPTH_GREATER_EQUAL | pd->clipping_state; - DRW_PASS_INSTANCE_CREATE(psl->edit_text_darken_ps, psl->edit_text_overlay_ps, state); + DRW_PASS_INSTANCE_CREATE(psl->edit_text_highlight_ps, psl->edit_text_selection_ps, state); } { /* Create view which will render everything (hopefully) behind the text geometry. */ @@ -112,7 +121,7 @@ static void edit_text_cache_populate_select(OVERLAY_Data *vedata, Object *ob) v2_quad_corners_to_mat4(box, final_mat); mul_m4_m4m4(final_mat, ob->obmat, final_mat); - DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, final_mat); + DRW_shgroup_call_obmat(pd->edit_text_selection_grp, geom, final_mat); } } @@ -128,7 +137,7 @@ static void edit_text_cache_populate_cursor(OVERLAY_Data *vedata, Object *ob) mul_m4_m4m4(mat, ob->obmat, mat); struct GPUBatch *geom = DRW_cache_quad_get(); - DRW_shgroup_call_obmat(pd->edit_text_overlay_grp, geom, mat); + DRW_shgroup_call_obmat(pd->edit_text_cursor_grp, geom, mat); } static void edit_text_cache_populate_boxes(OVERLAY_Data *vedata, Object *ob) @@ -193,11 +202,18 @@ void OVERLAY_edit_text_draw(OVERLAY_Data *vedata) DRW_view_set_active(pd->view_edit_text); - /* Alpha blended. */ - copy_v4_fl4(pd->edit_text.overlay_color, 0.8f, 0.8f, 0.8f, 0.5f); - DRW_draw_pass(psl->edit_text_overlay_ps); + /* Selection Boxes. */ + UI_GetThemeColor4fv(TH_WIDGET_TEXT_SELECTION, pd->edit_text.selection_color); + srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color); + DRW_draw_pass(psl->edit_text_selection_ps); + + /* Highlight text within selection boxes. */ + UI_GetThemeColor4fv(TH_WIDGET_TEXT_HIGHLIGHT, pd->edit_text.selection_color); + srgb_to_linearrgb_v4(pd->edit_text.selection_color, pd->edit_text.selection_color); + DRW_draw_pass(psl->edit_text_highlight_ps); - /* Multiply previous result where depth test fail. */ - copy_v4_fl4(pd->edit_text.overlay_color, 0.0f, 0.0f, 0.0f, 1.0f); - DRW_draw_pass(psl->edit_text_darken_ps); + /* Cursor (text caret). */ + UI_GetThemeColor4fv(TH_WIDGET_TEXT_CURSOR, pd->edit_text.cursor_color); + srgb_to_linearrgb_v4(pd->edit_text.cursor_color, pd->edit_text.cursor_color); + DRW_draw_pass(psl->edit_text_cursor_ps); } diff --git a/source/blender/draw/engines/overlay/overlay_edit_uv.c b/source/blender/draw/engines/overlay/overlay_edit_uv.c index 4cfe9fcea4e..d2737d73333 100644 --- a/source/blender/draw/engines/overlay/overlay_edit_uv.c +++ b/source/blender/draw/engines/overlay/overlay_edit_uv.c @@ -160,7 +160,6 @@ void OVERLAY_edit_uv_init(OVERLAY_Data *vedata) pd->edit_uv.draw_type = sima->dt_uvstretch; BLI_listbase_clear(&pd->edit_uv.totals); pd->edit_uv.total_area_ratio = 0.0f; - pd->edit_uv.total_area_ratio_inv = 0.0f; /* During engine initialization phase the `sima` isn't locked and * we are able to retrieve the needed data. @@ -280,8 +279,6 @@ void OVERLAY_edit_uv_cache_init(OVERLAY_Data *vedata) DRW_shgroup_uniform_block(pd->edit_uv_stretching_grp, "globalsBlock", G_draw.block_ubo); DRW_shgroup_uniform_float( pd->edit_uv_stretching_grp, "totalAreaRatio", &pd->edit_uv.total_area_ratio, 1); - DRW_shgroup_uniform_float( - pd->edit_uv_stretching_grp, "totalAreaRatioInv", &pd->edit_uv.total_area_ratio_inv, 1); } } @@ -411,7 +408,7 @@ void OVERLAY_edit_uv_cache_init(OVERLAY_Data *vedata) draw_ctx->obact->type == OB_MESH) { uint objects_len = 0; Object **objects = BKE_view_layer_array_from_objects_in_mode_unique_data( - draw_ctx->view_layer, NULL, &objects_len, draw_ctx->object_mode); + draw_ctx->scene, draw_ctx->view_layer, NULL, &objects_len, draw_ctx->object_mode); for (uint ob_index = 0; ob_index < objects_len; ob_index++) { Object *object_eval = DEG_get_evaluated_object(draw_ctx->depsgraph, objects[ob_index]); DRW_mesh_batch_cache_validate(object_eval, (Mesh *)object_eval->data); @@ -510,7 +507,6 @@ static void edit_uv_stretching_update_ratios(OVERLAY_Data *vedata) if (total_area > FLT_EPSILON && total_area_uv > FLT_EPSILON) { pd->edit_uv.total_area_ratio = total_area / total_area_uv; - pd->edit_uv.total_area_ratio_inv = total_area_uv / total_area; } } BLI_freelistN(&pd->edit_uv.totals); diff --git a/source/blender/draw/engines/overlay/overlay_engine.c b/source/blender/draw/engines/overlay/overlay_engine.c index f8c28394b16..6e2da95e405 100644 --- a/source/blender/draw/engines/overlay/overlay_engine.c +++ b/source/blender/draw/engines/overlay/overlay_engine.c @@ -192,6 +192,8 @@ static void OVERLAY_cache_init(void *vedata) OVERLAY_edit_curves_cache_init(vedata); break; case CTX_MODE_SCULPT_CURVES: + OVERLAY_sculpt_curves_cache_init(vedata); + break; case CTX_MODE_OBJECT: break; default: @@ -308,13 +310,16 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob) (pd->ctx_mode == CTX_MODE_PARTICLE); const bool in_paint_mode = (ob == draw_ctx->obact) && (draw_ctx->object_mode & OB_MODE_ALL_PAINT); + const bool in_sculpt_curve_mode = (ob == draw_ctx->obact) && + (draw_ctx->object_mode & OB_MODE_SCULPT_CURVES); const bool in_sculpt_mode = (ob == draw_ctx->obact) && (ob->sculpt != NULL) && (ob->sculpt->mode_type == OB_MODE_SCULPT); + const bool in_curves_sculpt_mode = (ob == draw_ctx->obact) && + (ob->mode == OB_MODE_SCULPT_CURVES); const bool has_surface = ELEM(ob->type, OB_MESH, OB_CURVES_LEGACY, OB_SURF, - OB_MBALL, OB_FONT, OB_GPENCIL, OB_CURVES, @@ -329,8 +334,8 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob) const bool draw_bones = (pd->overlay.flag & V3D_OVERLAY_HIDE_BONES) == 0; const bool draw_wires = draw_surface && has_surface && (pd->wireframe_mode || !pd->hide_overlays); - const bool draw_outlines = !in_edit_mode && !in_paint_mode && renderable && has_surface && - !instance_parent_in_edit_mode && + const bool draw_outlines = !in_edit_mode && !in_paint_mode && !in_sculpt_curve_mode && + renderable && has_surface && !instance_parent_in_edit_mode && (pd->v3d_flag & V3D_SELECT_OUTLINE) && (ob->base_flag & BASE_SELECTED); const bool draw_bone_selection = (ob->type == OB_MESH) && pd->armature.do_pose_fade_geom && @@ -428,6 +433,9 @@ static void OVERLAY_cache_populate(void *vedata, Object *ob) if (in_sculpt_mode) { OVERLAY_sculpt_cache_populate(vedata, ob); } + else if (in_curves_sculpt_mode) { + OVERLAY_sculpt_curves_cache_populate(vedata, ob); + } if (draw_motion_paths) { OVERLAY_motion_path_cache_populate(vedata, ob); @@ -591,6 +599,9 @@ static void OVERLAY_draw_scene(void *vedata) case CTX_MODE_SCULPT: OVERLAY_sculpt_draw(vedata); break; + case CTX_MODE_SCULPT_CURVES: + OVERLAY_sculpt_curves_draw(vedata); + break; case CTX_MODE_EDIT_MESH: case CTX_MODE_POSE: case CTX_MODE_PAINT_WEIGHT: diff --git a/source/blender/draw/engines/overlay/overlay_extra.c b/source/blender/draw/engines/overlay/overlay_extra.c index f875254a685..5d80ab3d0ea 100644 --- a/source/blender/draw/engines/overlay/overlay_extra.c +++ b/source/blender/draw/engines/overlay/overlay_extra.c @@ -1315,9 +1315,14 @@ static void OVERLAY_relationship_lines(OVERLAY_ExtraCallBuffers *cb, if ((curcon->ui_expand_flag & (1 << 0)) && BKE_constraint_targets_get(curcon, &targets)) { bConstraintTarget *ct; + BKE_constraint_custom_object_space_init(cob, curcon); + for (ct = targets.first; ct; ct = ct->next) { /* calculate target's matrix */ - if (cti->get_target_matrix) { + if (ct->flag & CONSTRAINT_TAR_CUSTOM_SPACE) { + copy_m4_m4(ct->matrix, cob->space_obj_world_matrix); + } + else if (cti->get_target_matrix) { cti->get_target_matrix(depsgraph, curcon, cob, ct, DEG_get_ctime(depsgraph)); } else { @@ -1353,7 +1358,7 @@ static void OVERLAY_volume_extra(OVERLAY_ExtraCallBuffers *cb, /* Don't show smoke before simulation starts, this could be made an option in the future. */ const bool draw_velocity = (fds->draw_velocity && fds->fluid && - CFRA >= fds->point_cache[0]->startframe); + scene->r.cfra >= fds->point_cache[0]->startframe); /* Show gridlines only for slices with no interpolation. */ const bool show_gridlines = (fds->show_gridlines && fds->fluid && @@ -1480,11 +1485,12 @@ static void OVERLAY_volume_extra(OVERLAY_ExtraCallBuffers *cb, static void OVERLAY_object_center(OVERLAY_ExtraCallBuffers *cb, Object *ob, OVERLAY_PrivateData *pd, + const Scene *scene, ViewLayer *view_layer) { const bool is_library = ID_REAL_USERS(&ob->id) > 1 || ID_IS_LINKED(ob); - - if (ob == OBACT(view_layer)) { + BKE_view_layer_synced_ensure(scene, view_layer); + if (ob == BKE_view_layer_active_object_get(view_layer)) { DRW_buffer_add_entry(cb->center_active, ob->obmat[3]); } else if (ob->base_flag & BASE_SELECTED) { @@ -1546,8 +1552,9 @@ void OVERLAY_extra_cache_populate(OVERLAY_Data *vedata, Object *ob) (md = BKE_modifiers_findby_type(ob, eModifierType_Fluid)) && (BKE_modifier_is_enabled(scene, md, eModifierMode_Realtime)) && (((FluidModifierData *)md)->domain != NULL) && - (CFRA >= (((FluidModifierData *)md)->domain->cache_frame_start)) && - (CFRA <= (((FluidModifierData *)md)->domain->cache_frame_end)); + (scene->r.cfra >= + (((FluidModifierData *)md)->domain->cache_frame_start)) && + (scene->r.cfra <= (((FluidModifierData *)md)->domain->cache_frame_end)); float *color; int theme_id = DRW_object_wire_theme_get(ob, view_layer, &color); @@ -1567,7 +1574,7 @@ void OVERLAY_extra_cache_populate(OVERLAY_Data *vedata, Object *ob) /* don't show object extras in set's */ if (!from_dupli) { if (draw_obcenters) { - OVERLAY_object_center(cb, ob, pd, view_layer); + OVERLAY_object_center(cb, ob, pd, scene, view_layer); } if (draw_relations) { OVERLAY_relationship_lines(cb, draw_ctx->depsgraph, draw_ctx->scene, ob); diff --git a/source/blender/draw/engines/overlay/overlay_outline.c b/source/blender/draw/engines/overlay/overlay_outline.c index eea9a1a1bef..f2e2acc98a9 100644 --- a/source/blender/draw/engines/overlay/overlay_outline.c +++ b/source/blender/draw/engines/overlay/overlay_outline.c @@ -133,6 +133,10 @@ void OVERLAY_outline_cache_init(OVERLAY_Data *vedata) pd->outlines_gpencil_grp = grp = DRW_shgroup_create(sh_gpencil, psl->outlines_prepass_ps); DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0); DRW_shgroup_uniform_float_copy(grp, "gpStrokeIndexOffset", 0.0); + + GPUShader *sh_curves = OVERLAY_shader_outline_prepass_curves(); + pd->outlines_curves_grp = grp = DRW_shgroup_create(sh_curves, psl->outlines_prepass_ps); + DRW_shgroup_uniform_bool_copy(grp, "isTransform", (G.moving & G_TRANSFORM_OBJ) != 0); } /* outlines_prepass_ps is still needed for selection of probes. */ @@ -267,6 +271,12 @@ static void OVERLAY_outline_volume(OVERLAY_PrivateData *pd, Object *ob) DRW_shgroup_call(shgroup, geom, ob); } +static void OVERLAY_outline_curves(OVERLAY_PrivateData *pd, Object *ob) +{ + DRWShadingGroup *shgroup = pd->outlines_curves_grp; + DRW_shgroup_curves_create_sub(ob, shgroup, NULL); +} + void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata, Object *ob, OVERLAY_DupliData *dupli, @@ -293,6 +303,11 @@ void OVERLAY_outline_cache_populate(OVERLAY_Data *vedata, return; } + if (ob->type == OB_CURVES) { + OVERLAY_outline_curves(pd, ob); + return; + } + if (ob->type == OB_POINTCLOUD && pd->wireframe_mode) { /* Looks bad in this case. Could be relaxed if we draw a * wireframe of some sort in the future. */ diff --git a/source/blender/draw/engines/overlay/overlay_private.h b/source/blender/draw/engines/overlay/overlay_private.h index 23c20a186a0..0a783c44029 100644 --- a/source/blender/draw/engines/overlay/overlay_private.h +++ b/source/blender/draw/engines/overlay/overlay_private.h @@ -78,8 +78,9 @@ typedef struct OVERLAY_PassList { DRWPass *edit_mesh_analysis_ps; DRWPass *edit_mesh_normals_ps; DRWPass *edit_particle_ps; - DRWPass *edit_text_overlay_ps; - DRWPass *edit_text_darken_ps; + DRWPass *edit_text_cursor_ps; + DRWPass *edit_text_selection_ps; + DRWPass *edit_text_highlight_ps; DRWPass *edit_text_wire_ps[2]; DRWPass *edit_uv_edges_ps; DRWPass *edit_uv_verts_ps; @@ -116,6 +117,7 @@ typedef struct OVERLAY_PassList { DRWPass *particle_ps; DRWPass *pointcloud_ps; DRWPass *sculpt_mask_ps; + DRWPass *sculpt_curves_selection_ps; DRWPass *volume_ps; DRWPass *wireframe_ps; DRWPass *wireframe_xray_ps; @@ -251,7 +253,8 @@ typedef struct OVERLAY_PrivateData { DRWShadingGroup *edit_mesh_analysis_grp; DRWShadingGroup *edit_particle_strand_grp; DRWShadingGroup *edit_particle_point_grp; - DRWShadingGroup *edit_text_overlay_grp; + DRWShadingGroup *edit_text_cursor_grp; + DRWShadingGroup *edit_text_selection_grp; DRWShadingGroup *edit_text_wire_grp[2]; DRWShadingGroup *edit_uv_verts_grp; DRWShadingGroup *edit_uv_edges_grp; @@ -267,6 +270,7 @@ typedef struct OVERLAY_PrivateData { DRWShadingGroup *motion_path_lines_grp; DRWShadingGroup *motion_path_points_grp; DRWShadingGroup *outlines_grp; + DRWShadingGroup *outlines_curves_grp; DRWShadingGroup *outlines_ptcloud_grp; DRWShadingGroup *outlines_gpencil_grp; DRWShadingGroup *paint_depth_grp; @@ -279,6 +283,7 @@ typedef struct OVERLAY_PrivateData { DRWShadingGroup *particle_shapes_grp; DRWShadingGroup *pointcloud_dots_grp; DRWShadingGroup *sculpt_mask_grp; + DRWShadingGroup *sculpt_curves_selection_grp; DRWShadingGroup *volume_selection_surface_grp; DRWShadingGroup *wires_grp[2][2]; /* With and without coloring. */ DRWShadingGroup *wires_all_grp[2][2]; /* With and without coloring. */ @@ -335,7 +340,8 @@ typedef struct OVERLAY_PrivateData { int handle_display; } edit_curve; struct { - float overlay_color[4]; + float cursor_color[4]; + float selection_color[4]; } edit_text; struct { bool do_zbufclip; @@ -381,7 +387,6 @@ typedef struct OVERLAY_PrivateData { eSpaceImage_UVDT_Stretch draw_type; ListBase totals; float total_area_ratio; - float total_area_ratio_inv; /* stencil overlay */ struct Image *stencil_image; @@ -669,6 +674,10 @@ void OVERLAY_sculpt_cache_init(OVERLAY_Data *vedata); void OVERLAY_sculpt_cache_populate(OVERLAY_Data *vedata, Object *ob); void OVERLAY_sculpt_draw(OVERLAY_Data *vedata); +void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata); +void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *ob); +void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata); + void OVERLAY_wireframe_init(OVERLAY_Data *vedata); void OVERLAY_wireframe_cache_init(OVERLAY_Data *vedata); void OVERLAY_wireframe_cache_populate(OVERLAY_Data *vedata, @@ -737,6 +746,7 @@ GPUShader *OVERLAY_shader_motion_path_line(void); GPUShader *OVERLAY_shader_motion_path_vert(void); GPUShader *OVERLAY_shader_uniform_color(void); GPUShader *OVERLAY_shader_outline_prepass(bool use_wire); +GPUShader *OVERLAY_shader_outline_prepass_curves(void); GPUShader *OVERLAY_shader_outline_prepass_gpencil(void); GPUShader *OVERLAY_shader_outline_prepass_pointcloud(void); GPUShader *OVERLAY_shader_extra_grid(void); @@ -750,6 +760,7 @@ GPUShader *OVERLAY_shader_paint_wire(void); GPUShader *OVERLAY_shader_particle_dot(void); GPUShader *OVERLAY_shader_particle_shape(void); GPUShader *OVERLAY_shader_sculpt_mask(void); +GPUShader *OVERLAY_shader_sculpt_curves_selection(void); GPUShader *OVERLAY_shader_volume_velocity(bool use_needle, bool use_mac); GPUShader *OVERLAY_shader_volume_gridlines(bool color_with_flags, bool color_range); GPUShader *OVERLAY_shader_wireframe(bool custom_bias); diff --git a/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc new file mode 100644 index 00000000000..b8021124f27 --- /dev/null +++ b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw_engine + */ + +#include "DRW_render.h" + +#include "draw_cache_impl.h" +#include "overlay_private.h" + +#include "BKE_curves.hh" + +void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata) +{ + OVERLAY_PassList *psl = vedata->psl; + OVERLAY_PrivateData *pd = vedata->stl->pd; + + const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_BLEND_ALPHA; + DRW_PASS_CREATE(psl->sculpt_curves_selection_ps, state | pd->clipping_state); + + GPUShader *sh = OVERLAY_shader_sculpt_curves_selection(); + pd->sculpt_curves_selection_grp = DRW_shgroup_create(sh, psl->sculpt_curves_selection_ps); + DRWShadingGroup *grp = pd->sculpt_curves_selection_grp; + + /* Reuse the same mask opacity from sculpt mode, since it wasn't worth it to add a different + * property yet. */ + DRW_shgroup_uniform_float_copy(grp, "selection_opacity", pd->overlay.sculpt_mode_mask_opacity); +} + +static bool everything_selected(const Curves &curves_id) +{ + if (!(curves_id.flag & CV_SCULPT_SELECTION_ENABLED)) { + /* When the selection is disabled, conceptually everything is selected. */ + return true; + } + const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap( + curves_id.geometry); + blender::VArray<float> selection; + switch (curves_id.selection_domain) { + case ATTR_DOMAIN_POINT: + selection = curves.selection_point_float(); + break; + case ATTR_DOMAIN_CURVE: + selection = curves.selection_curve_float(); + break; + } + return selection.is_single() && selection.get_internal_single() == 1.0f; +} + +void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object) +{ + OVERLAY_PrivateData *pd = vedata->stl->pd; + Curves *curves = static_cast<Curves *>(object->data); + + /* As an optimization, return early if everything is selected. */ + if (everything_selected(*curves)) { + return; + } + + /* Retrieve the location of the texture. */ + const char *name = curves->selection_domain == ATTR_DOMAIN_POINT ? ".selection_point_float" : + ".selection_curve_float"; + + bool is_point_domain; + GPUTexture **texture = DRW_curves_texture_for_evaluated_attribute( + curves, name, &is_point_domain); + if (texture == nullptr) { + return; + } + + /* Evaluate curves and their attributes if necessary. */ + DRWShadingGroup *grp = DRW_shgroup_curves_create_sub( + object, pd->sculpt_curves_selection_grp, nullptr); + if (*texture == nullptr) { + return; + } + + DRW_shgroup_uniform_bool_copy(grp, "is_point_domain", is_point_domain); + DRW_shgroup_uniform_texture(grp, "selection_tx", *texture); +} + +void OVERLAY_sculpt_curves_draw(OVERLAY_Data *vedata) +{ + OVERLAY_PassList *psl = vedata->psl; + OVERLAY_PrivateData *pd = vedata->stl->pd; + OVERLAY_FramebufferList *fbl = vedata->fbl; + + if (DRW_state_is_fbo()) { + GPU_framebuffer_bind(pd->painting.in_front ? fbl->overlay_in_front_fb : + fbl->overlay_default_fb); + } + + DRW_draw_pass(psl->sculpt_curves_selection_ps); +} diff --git a/source/blender/draw/engines/overlay/overlay_shader.c b/source/blender/draw/engines/overlay/overlay_shader.c index 48146fbddfb..2373363ab9d 100644 --- a/source/blender/draw/engines/overlay/overlay_shader.c +++ b/source/blender/draw/engines/overlay/overlay_shader.c @@ -76,6 +76,7 @@ typedef struct OVERLAY_Shaders { GPUShader *motion_path_line; GPUShader *motion_path_vert; GPUShader *outline_prepass; + GPUShader *outline_prepass_curves; GPUShader *outline_prepass_gpencil; GPUShader *outline_prepass_pointcloud; GPUShader *outline_prepass_wire; @@ -90,6 +91,7 @@ typedef struct OVERLAY_Shaders { GPUShader *particle_shape; GPUShader *pointcloud_dot; GPUShader *sculpt_mask; + GPUShader *sculpt_curves_selection; GPUShader *uniform_color; GPUShader *volume_velocity_needle_sh; GPUShader *volume_velocity_mac_sh; @@ -650,6 +652,18 @@ GPUShader *OVERLAY_shader_outline_prepass(bool use_wire) return use_wire ? sh_data->outline_prepass_wire : sh_data->outline_prepass; } +GPUShader *OVERLAY_shader_outline_prepass_curves() +{ + const DRWContextState *draw_ctx = DRW_context_state_get(); + OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg]; + if (!sh_data->outline_prepass_curves) { + sh_data->outline_prepass_curves = GPU_shader_create_from_info_name( + draw_ctx->sh_cfg ? "overlay_outline_prepass_curves_clipped" : + "overlay_outline_prepass_curves"); + } + return sh_data->outline_prepass_curves; +} + GPUShader *OVERLAY_shader_outline_prepass_gpencil(void) { const DRWContextState *draw_ctx = DRW_context_state_get(); @@ -792,6 +806,18 @@ GPUShader *OVERLAY_shader_sculpt_mask(void) return sh_data->sculpt_mask; } +GPUShader *OVERLAY_shader_sculpt_curves_selection(void) +{ + const DRWContextState *draw_ctx = DRW_context_state_get(); + OVERLAY_Shaders *sh_data = &e_data.sh_data[draw_ctx->sh_cfg]; + if (!sh_data->sculpt_curves_selection) { + sh_data->sculpt_curves_selection = GPU_shader_create_from_info_name( + draw_ctx->sh_cfg == GPU_SHADER_CFG_CLIPPED ? "overlay_sculpt_curves_selection_clipped" : + "overlay_sculpt_curves_selection"); + } + return sh_data->sculpt_curves_selection; +} + struct GPUShader *OVERLAY_shader_uniform_color(void) { const DRWContextState *draw_ctx = DRW_context_state_get(); diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh index 58f96110887..9396a6d3f2f 100644 --- a/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh +++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_edit_mode_info.hh @@ -293,7 +293,6 @@ GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_area) .do_static_compilation(true) .vertex_in(1, Type::FLOAT, "ratio") .push_constant(Type::FLOAT, "totalAreaRatio") - .push_constant(Type::FLOAT, "totalAreaRatioInv") .additional_info("overlay_edit_uv_stretching"); GPU_SHADER_CREATE_INFO(overlay_edit_uv_stretching_angle) diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh index 6f6a9c1622d..288fb3b3cbd 100644 --- a/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh +++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_outline_info.hh @@ -29,6 +29,16 @@ GPU_SHADER_CREATE_INFO(overlay_outline_prepass_mesh_clipped) GPU_SHADER_INTERFACE_INFO(overlay_outline_prepass_wire_iface, "vert").flat(Type::VEC3, "pos"); +GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves) + .do_static_compilation(true) + .vertex_source("overlay_outline_prepass_curves_vert.glsl") + .additional_info("draw_hair", "overlay_outline_prepass") + .additional_info("draw_object_infos"); + +GPU_SHADER_CREATE_INFO(overlay_outline_prepass_curves_clipped) + .do_static_compilation(true) + .additional_info("overlay_outline_prepass_curves", "drw_clipped"); + GPU_SHADER_CREATE_INFO(overlay_outline_prepass_wire) .do_static_compilation(true) .define("USE_GEOM") diff --git a/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh new file mode 100644 index 00000000000..46e3943b293 --- /dev/null +++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_INTERFACE_INFO(overlay_sculpt_curves_selection_iface, "") + .smooth(Type::FLOAT, "mask_weight"); + +GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection) + .do_static_compilation(true) + .push_constant(Type::BOOL, "is_point_domain") + .push_constant(Type::FLOAT, "selection_opacity") + .sampler(0, ImageType::FLOAT_BUFFER, "selection_tx") + .vertex_out(overlay_sculpt_curves_selection_iface) + .vertex_source("overlay_sculpt_curves_selection_vert.glsl") + .fragment_source("overlay_sculpt_curves_selection_frag.glsl") + .fragment_out(0, Type::VEC4, "out_color") + .additional_info("draw_hair", "draw_globals"); + +GPU_SHADER_CREATE_INFO(overlay_sculpt_curves_selection_clipped) + .do_static_compilation(true) + .additional_info("overlay_sculpt_curves_selection", "drw_clipped"); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl index f28a809fdab..606292bbe83 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_antialiasing_frag.glsl @@ -96,7 +96,7 @@ void main() float dist_raw = texelFetch(lineTex, center_texel, 0).b; float dist = decode_line_dist(dist_raw); - /* TODO: Opti: use textureGather. */ + /* TODO: Optimization: use textureGather. */ vec4 neightbor_col0 = texelFetchOffset(colorTex, center_texel, 0, ivec2(1, 0)); vec4 neightbor_col1 = texelFetchOffset(colorTex, center_texel, 0, ivec2(-1, 0)); vec4 neightbor_col2 = texelFetchOffset(colorTex, center_texel, 0, ivec2(0, 1)); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl index 0a8e279e9b0..ca5a6aff2ca 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_outline_vert.glsl @@ -17,8 +17,8 @@ vec2 compute_dir(vec2 v0, vec2 v1, vec2 v2) mat3 compute_mat(vec4 sphere, vec3 bone_vec, out float z_ofs) { - bool is_persp = (ProjectionMatrix[3][3] == 0.0); - vec3 cam_ray = (is_persp) ? sphere.xyz - ViewMatrixInverse[3].xyz : -ViewMatrixInverse[2].xyz; + bool is_persp = (drw_view.winmat[3][3] == 0.0); + vec3 cam_ray = (is_persp) ? sphere.xyz - drw_view.viewinv[3].xyz : -drw_view.viewinv[2].xyz; /* Sphere center distance from the camera (persp) in world space. */ float cam_dist = length(cam_ray); @@ -88,13 +88,13 @@ vec3 get_outline_point(vec2 pos, void main() { - float dst_head = distance(headSphere.xyz, ViewMatrixInverse[3].xyz); - float dst_tail = distance(tailSphere.xyz, ViewMatrixInverse[3].xyz); - // float dst_head = -dot(headSphere.xyz, ViewMatrix[2].xyz); - // float dst_tail = -dot(tailSphere.xyz, ViewMatrix[2].xyz); + float dst_head = distance(headSphere.xyz, drw_view.viewinv[3].xyz); + float dst_tail = distance(tailSphere.xyz, drw_view.viewinv[3].xyz); + // float dst_head = -dot(headSphere.xyz, drw_view.viewmat[2].xyz); + // float dst_tail = -dot(tailSphere.xyz, drw_view.viewmat[2].xyz); vec4 sph_near, sph_far; - if ((dst_head > dst_tail) && (ProjectionMatrix[3][3] == 0.0)) { + if ((dst_head > dst_tail) && (drw_view.winmat[3][3] == 0.0)) { sph_near = tailSphere; sph_far = headSphere; } @@ -130,7 +130,7 @@ void main() gl_Position = p1; /* compute position from 3 vertex because the change in direction - * can happen very quicky and lead to very thin edges. */ + * can happen very quickly and lead to very thin edges. */ vec2 ss0 = proj(p0); vec2 ss1 = proj(p1); vec2 ss2 = proj(p2); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl index 2dd86a57dfd..4d21ffd96b5 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl @@ -30,7 +30,7 @@ void main() sp = bone_mat * sp.xzy + headSphere.xyz; nor = bone_mat * nor.xzy; - normalView = mat3(ViewMatrix) * nor; + normalView = mat3(drw_view.viewmat) * nor; finalStateColor = stateColor; finalBoneColor = boneColor; @@ -38,5 +38,5 @@ void main() view_clipping_distances(sp); vec4 pos_4d = vec4(sp, 1.0); - gl_Position = ViewProjectionMatrix * pos_4d; + gl_Position = drw_view.persmat * pos_4d; } diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl index 47c5dada708..b485b0a7807 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_geom.glsl @@ -5,7 +5,7 @@ void main(void) { finalColor = vec4(geom_in[0].vColSize.rgb, 1.0); - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); vec3 view_vec = (is_persp) ? normalize(geom_in[1].vPos) : vec3(0.0, 0.0, -1.0); vec3 v10 = geom_in[0].vPos - geom_in[1].vPos; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl index 29319b3f7ac..91eb6265192 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl @@ -14,10 +14,10 @@ void main() mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color); vec4 world_pos = model_mat * vec4(pos, 1.0); - vec4 view_pos = ViewMatrix * world_pos; + vec4 view_pos = drw_view.viewmat * world_pos; geom_in.vPos = view_pos.xyz; - geom_in.pPos = ProjectionMatrix * view_pos; + geom_in.pPos = drw_view.winmat * view_pos; geom_in.inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl index cdbe8c3d7df..68f7e75673f 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl @@ -25,7 +25,7 @@ void main() finalColor.a = 1.0; vec4 world_pos = model_mat * vec4(pos, 1.0); - gl_Position = ViewProjectionMatrix * world_pos; + gl_Position = drw_view.persmat * world_pos; view_clipping_distances(world_pos.xyz); } diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl index 31369e0c3df..4d79fab718f 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl @@ -13,10 +13,10 @@ void main() vec4 bone_color, state_color; mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color); - mat4 model_view_matrix = ViewMatrix * model_mat; + mat4 model_view_matrix = drw_view.viewmat * model_mat; mat4 sphereMatrix = inverse(model_view_matrix); - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); /* This is the local space camera ray (not normalize). * In perspective mode it's also the viewspace position @@ -58,8 +58,8 @@ void main() vec3 cam_pos0 = x_axis * pos.x + y_axis * pos.y + z_axis * z_ofs; vec4 V = model_view_matrix * vec4(cam_pos0, 1.0); - gl_Position = ProjectionMatrix * V; - vec4 center = ProjectionMatrix * vec4(model_view_matrix[3].xyz, 1.0); + gl_Position = drw_view.winmat * V; + vec4 center = drw_view.winmat * vec4(model_view_matrix[3].xyz, 1.0); /* Offset away from the center to avoid overlap with solid shape. */ vec2 ofs_dir = normalize(proj(gl_Position) - proj(center)); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl index e60b6e94492..150701b78df 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_frag.glsl @@ -5,7 +5,7 @@ void main() { const float sphere_radius = 0.05; - bool is_perp = (ProjectionMatrix[3][3] == 0.0); + bool is_perp = (drw_view.winmat[3][3] == 0.0); vec3 ray_ori_view = (is_perp) ? vec3(0.0) : viewPosition.xyz; vec3 ray_dir_view = (is_perp) ? viewPosition : vec3(0.0, 0.0, -1.0); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl index abbaad8cd10..3d2dfc018bb 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl @@ -10,10 +10,10 @@ void main() vec4 bone_color, state_color; mat4 model_mat = extract_matrix_packed_data(inst_obmat, state_color, bone_color); - mat4 model_view_matrix = ViewMatrix * model_mat; + mat4 model_view_matrix = drw_view.viewmat * model_mat; sphereMatrix = inverse(model_view_matrix); - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); /* This is the local space camera ray (not normalize). * In perspective mode it's also the viewspace position @@ -65,7 +65,7 @@ void main() vec4 pos_4d = vec4(cam_pos, 1.0); vec4 V = model_view_matrix * pos_4d; - gl_Position = ProjectionMatrix * V; + gl_Position = drw_view.winmat * V; viewPosition = V.xyz; finalStateColor = state_color.xyz; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl index b5edcd2858b..e7917a46312 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_stick_vert.glsl @@ -31,12 +31,12 @@ void main() vec4 boneStart_4d = vec4(boneStart, 1.0); vec4 boneEnd_4d = vec4(boneEnd, 1.0); - vec4 v0 = ViewMatrix * boneStart_4d; - vec4 v1 = ViewMatrix * boneEnd_4d; + vec4 v0 = drw_view.viewmat * boneStart_4d; + vec4 v1 = drw_view.viewmat * boneEnd_4d; /* Clip the bone to the camera origin plane (not the clip plane) * to avoid glitches if one end is behind the camera origin (in persp). */ - float clip_dist = (ProjectionMatrix[3][3] == 0.0) ? + float clip_dist = (drw_view.winmat[3][3] == 0.0) ? -1e-7 : 1e20; /* hardcoded, -1e-8 is giving gliches. */ vec3 bvec = v1.xyz - v0.xyz; @@ -48,8 +48,8 @@ void main() v1.xyz = clip_pt; } - vec4 p0 = ProjectionMatrix * v0; - vec4 p1 = ProjectionMatrix * v1; + vec4 p0 = drw_view.winmat * v0; + vec4 p1 = drw_view.winmat * v1; float h = (is_head) ? p0.w : p1.w; @@ -58,7 +58,7 @@ void main() /* 2D screen aligned pos at the point */ vec2 vpos = pos.x * x_screen_vec + pos.y * y_screen_vec; - vpos *= (ProjectionMatrix[3][3] == 0.0) ? h : 1.0; + vpos *= (drw_view.winmat[3][3] == 0.0) ? h : 1.0; vpos *= (do_wire) ? 1.0 : 0.5; if (finalInnerColor.a > 0.0) { diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl index 6ff8d0665d1..dc5c43f417e 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_normal_vert.glsl @@ -45,7 +45,7 @@ void main() if (gl_VertexID == 0) { if (isConstantScreenSizeNormals) { - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); if (is_persp) { float dist_fac = length(cameraPos - world_pos); float cos_fac = dot(cameraForward, cameraVec(world_pos)); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl index f1fbdac7847..76a944c6987 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl @@ -9,7 +9,7 @@ void main() vec3 up = normalize(imat * screenVecs[1].xyz); vec3 screen_pos = (right * pos.x + up * pos.z) * size; vec4 pos_4d = ModelMatrix * vec4(local_pos + screen_pos, 1.0); - gl_Position = ViewProjectionMatrix * pos_4d; + gl_Position = drw_view.persmat * pos_4d; /* Manual stipple: one segment out of 2 is transparent. */ finalColor = ((gl_VertexID & 1) == 0) ? colorSkinRoot : vec4(0.0); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl index 374fb50af75..a50bc5e6e68 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_vert.glsl @@ -74,7 +74,7 @@ void main() finalColor = EDIT_MESH_facedot_color(norAndFlag.w); /* Bias Facedot Z position in clipspace. */ - gl_Position.z -= (ProjectionMatrix[3][3] == 0.0) ? 0.00035 : 1e-6; + gl_Position.z -= (drw_view.winmat[3][3] == 0.0) ? 0.00035 : 1e-6; gl_PointSize = sizeFaceDot; bool occluded = test_occlusion(); @@ -87,7 +87,7 @@ void main() /* Facing based color blend */ vec3 vpos = point_world_to_view(world_pos); vec3 view_normal = normalize(normal_object_to_view(vnor) + 1e-4); - vec3 view_vec = (ProjectionMatrix[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0); + vec3 view_vec = (drw_view.winmat[3][3] == 0.0) ? normalize(vpos) : vec3(0.0, 0.0, 1.0); float facing = dot(view_vec, view_normal); facing = 1.0 - abs(facing) * 0.2; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl index bb086e8d9f5..9a3036d5940 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_uv_stretching_vert.glsl @@ -55,9 +55,9 @@ float angle_normalized_v2v2(vec2 v1, vec2 v2) return (q) ? a : M_PI - a; } -float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio) +float area_ratio_to_stretch(float ratio, float tot_ratio) { - ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio; + ratio *= tot_ratio; return (ratio > 1.0f) ? (1.0f / ratio) : ratio; } @@ -74,7 +74,7 @@ void main() stretch = stretch; stretch = 1.0 - stretch * stretch; #else - float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio, totalAreaRatioInv); + float stretch = 1.0 - area_ratio_to_stretch(ratio, totalAreaRatio); #endif diff --git a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl index b2578970c9b..acaf04219c0 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_extra_vert.glsl @@ -198,8 +198,8 @@ void main() vec3 edge = obmat[3].xyz - world_pos; vec3 n0 = normalize(cross(edge, p0 - world_pos)); vec3 n1 = normalize(cross(edge, world_pos - p1)); - bool persp = (ProjectionMatrix[3][3] == 0.0); - vec3 V = (persp) ? normalize(ViewMatrixInverse[3].xyz - world_pos) : ViewMatrixInverse[2].xyz; + bool persp = (drw_view.winmat[3][3] == 0.0); + vec3 V = (persp) ? normalize(drw_view.viewinv[3].xyz - world_pos) : drw_view.viewinv[2].xyz; /* Discard non-silhouette edges. */ bool facing0 = dot(n0, V) > 0.0; bool facing1 = dot(n1, V) > 0.0; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl index 25f4984f119..54a4231590e 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_frag.glsl @@ -53,7 +53,7 @@ void main() P += cameraPos * plane_axes; float dist, fade; - bool is_persp = ProjectionMatrix[3][3] == 0.0; + bool is_persp = drw_view.winmat[3][3] == 0.0; if (is_persp) { vec3 V = cameraPos - P; dist = length(V); @@ -83,7 +83,7 @@ void main() dist = 1.0; /* Avoid branch after. */ if (flag_test(grid_flag, PLANE_XY)) { - float angle = 1.0 - abs(ViewMatrixInverse[2].z); + float angle = 1.0 - abs(drw_view.viewinv[2].z); dist = 1.0 + angle * 2.0; angle *= angle; fade *= 1.0 - angle * angle; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl index b81f1a24358..b43b1eb4a52 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_grid_vert.glsl @@ -39,5 +39,5 @@ void main() local_pos.z = clamp(local_pos.z, -1.0, 0.0); } - gl_Position = ViewProjectionMatrix * vec4(real_pos, 1.0); + gl_Position = drw_view.persmat * vec4(real_pos, 1.0); } diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl index 29346a44863..25e13e7c212 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_geom.glsl @@ -15,7 +15,7 @@ void main(void) vec2 edge_dir = compute_dir(interp_in[0].ss_pos, interp_in[1].ss_pos) * drw_view.viewport_size_inverse; - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); float line_size = float(lineThickness) * sizePixel; view_clipping_distances_set(gl_in[0]); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl index bc74a436f5e..e6281f75b8f 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_line_vert.glsl @@ -18,7 +18,7 @@ vec2 proj(vec4 pos) void main() { - gl_Position = ViewProjectionMatrix * vec4(pos, 1.0); + gl_Position = drw_view.persmat * vec4(pos, 1.0); interp.ss_pos = proj(gl_Position); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl index 5027525b9b3..70892954cd8 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_motion_path_point_vert.glsl @@ -9,7 +9,7 @@ void main() { - gl_Position = ViewProjectionMatrix * vec4(pos, 1.0); + gl_Position = drw_view.persmat * vec4(pos, 1.0); gl_PointSize = float(pointSize + 2); int frame = gl_VertexID + cacheStart; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl new file mode 100644 index 00000000000..f9ec475d21f --- /dev/null +++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl @@ -0,0 +1,81 @@ + +#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_hair_lib.glsl) + +uint outline_colorid_get(void) +{ + int flag = int(abs(ObjectInfo.w)); + bool is_active = (flag & DRW_BASE_ACTIVE) != 0; + + if (isTransform) { + return 0u; /* colorTransform */ + } + else if (is_active) { + return 3u; /* colorActive */ + } + else { + return 1u; /* colorSelect */ + } + + return 0u; +} + +/* Replace top 2 bits (of the 16bit output) by outlineId. + * This leaves 16K different IDs to create outlines between objects. + vec3 world_pos = point_object_to_world(pos); + * SHIFT = (32 - (16 - 2)) */ +#define SHIFT 18u + +void main() +{ + bool is_persp = (drw_view.winmat[3][3] == 0.0); + float time, thickness; + vec3 center_wpos, tan, binor; + + hair_get_center_pos_tan_binor_time(is_persp, + ModelMatrixInverse, + drw_view.viewinv[3].xyz, + drw_view.viewinv[2].xyz, + center_wpos, + tan, + binor, + time, + thickness); + vec3 world_pos; + if (hairThicknessRes > 1) { + /* Calculate the thickness, thicktime, worldpos taken into account the outline. */ + float outline_width = point_world_to_ndc(center_wpos).w * 1.25 * + drw_view.viewport_size_inverse.y * drw_view.wininv[1][1]; + thickness += outline_width; + float thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1); + thick_time = thickness * (thick_time * 2.0 - 1.0); + /* Take object scale into account. + * NOTE: This only works fine with uniform scaling. */ + float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor); + world_pos = center_wpos + binor * thick_time * scale; + } + else { + world_pos = center_wpos; + } + + gl_Position = point_world_to_ndc(world_pos); + +#ifdef USE_GEOM + vert.pos = point_world_to_view(world_pos); +#endif + + /* Small bias to always be on top of the geom. */ + gl_Position.z -= 1e-3; + + /* ID 0 is nothing (background) */ + interp.ob_id = uint(resource_handle + 1); + + /* Should be 2 bits only [0..3]. */ + uint outline_id = outline_colorid_get(); + + /* Combine for 16bit uint target. */ + interp.ob_id = (outline_id << 14u) | ((interp.ob_id << SHIFT) >> SHIFT); + + view_clipping_distances(world_pos); +} diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl index 8a196620af9..5e0074e9f0b 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_geom.glsl @@ -11,7 +11,7 @@ void vert_from_gl_in(int v) void main() { - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); vec3 view_vec = (is_persp) ? normalize(vert[1].pos) : vec3(0.0, 0.0, -1.0); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl index b6d5cd96c12..92be9ec3bcb 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_gpencil_frag.glsl @@ -22,14 +22,14 @@ void main() if (!gpStrokeOrder3d) { /* Stroke order 2D. Project to gpDepthPlane. */ - bool is_persp = ProjectionMatrix[3][3] == 0.0; + bool is_persp = drw_view.winmat[3][3] == 0.0; vec2 uvs = vec2(gl_FragCoord.xy) * drw_view.viewport_size_inverse; vec3 pos_ndc = vec3(uvs, gl_FragCoord.z) * 2.0 - 1.0; - vec4 pos_world = ViewProjectionMatrixInverse * vec4(pos_ndc, 1.0); + vec4 pos_world = drw_view.persinv * vec4(pos_ndc, 1.0); vec3 pos = pos_world.xyz / pos_world.w; vec3 ray_ori = pos; - vec3 ray_dir = (is_persp) ? (ViewMatrixInverse[3].xyz - pos) : ViewMatrixInverse[2].xyz; + vec3 ray_dir = (is_persp) ? (drw_view.viewinv[3].xyz - pos) : drw_view.viewinv[2].xyz; vec3 isect = ray_plane_intersection(ray_ori, ray_dir, gpDepthPlane); vec4 ndc = point_world_to_ndc(isect); gl_FragDepth = (ndc.z / ndc.w) * 0.5 + 0.5; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl index fb981a8167a..c48e7cce550 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_particle_vert.glsl @@ -23,7 +23,7 @@ void main() #ifdef USE_DOTS gl_Position = point_world_to_ndc(world_pos); /* World sized points. */ - gl_PointSize = sizePixel * draw_size * ProjectionMatrix[1][1] * sizeViewport.y / gl_Position.w; + gl_PointSize = sizePixel * draw_size * drw_view.winmat[1][1] * sizeViewport.y / gl_Position.w; #else if ((vclass & VCLASS_SCREENALIGNED) != 0) { diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl new file mode 100644 index 00000000000..7af6bdb9fdb --- /dev/null +++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_frag.glsl @@ -0,0 +1,5 @@ + +void main() +{ + out_color = vec4(vec3(0.0), 1.0 - mask_weight); +} diff --git a/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl new file mode 100644 index 00000000000..7be3c8e6dfb --- /dev/null +++ b/source/blender/draw/engines/overlay/shaders/overlay_sculpt_curves_selection_vert.glsl @@ -0,0 +1,34 @@ +#pragma BLENDER_REQUIRE(common_hair_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_clipping_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) + +float retrieve_selection() +{ + if (is_point_domain) { + return texelFetch(selection_tx, hair_get_base_id()).r; + } + return texelFetch(selection_tx, hair_get_strand_id()).r; +} + +void main() +{ + bool is_persp = (ProjectionMatrix[3][3] == 0.0); + float time, thick_time, thickness; + vec3 world_pos, tan, binor; + hair_get_pos_tan_binor_time(is_persp, + ModelMatrixInverse, + ViewMatrixInverse[3].xyz, + ViewMatrixInverse[2].xyz, + world_pos, + tan, + binor, + time, + thickness, + thick_time); + + gl_Position = point_world_to_ndc(world_pos); + + mask_weight = 1.0 - (selection_opacity - retrieve_selection() * selection_opacity); + + view_clipping_distances(world_pos); +} diff --git a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl index 0a498471b46..e1a4a3602e3 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_uniform_color_frag.glsl @@ -1,4 +1,4 @@ void main() { fragColor = color; -}
\ No newline at end of file +} diff --git a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl index 41bd7791dd7..d189ab1b72c 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl @@ -72,7 +72,7 @@ void wire_object_color_get(out vec3 rim_col, out vec3 wire_col) void main() { bool no_attr = all(equal(nor, vec3(0))); - vec3 wnor = no_attr ? ViewMatrixInverse[2].xyz : normalize(normal_object_to_world(nor)); + vec3 wnor = no_attr ? drw_view.viewinv[2].xyz : normalize(normal_object_to_world(nor)); vec3 wpos = point_object_to_world(pos); if (isHair) { @@ -81,8 +81,8 @@ void main() wnor = -normalize(mat3(obmat) * nor); } - bool is_persp = (ProjectionMatrix[3][3] == 0.0); - vec3 V = (is_persp) ? normalize(ViewMatrixInverse[3].xyz - wpos) : ViewMatrixInverse[2].xyz; + bool is_persp = (drw_view.winmat[3][3] == 0.0); + vec3 V = (is_persp) ? normalize(drw_view.viewinv[3].xyz - wpos) : drw_view.viewinv[2].xyz; float facing = dot(wnor, V); diff --git a/source/blender/draw/engines/select/select_engine.c b/source/blender/draw/engines/select/select_engine.c index 88ae5ac707e..026a1f52ac1 100644 --- a/source/blender/draw/engines/select/select_engine.c +++ b/source/blender/draw/engines/select/select_engine.c @@ -201,7 +201,7 @@ static void select_cache_populate(void *vedata, Object *ob) if (!e_data.context.is_dirty && sel_data && sel_data->is_drawn) { /* The object indices have already been drawn. Fill depth pass. - * Opti: Most of the time this depth pass is not used. */ + * Optimization: Most of the time this depth pass is not used. */ struct Mesh *me = ob->data; if (e_data.context.select_mode & SCE_SELECT_FACE) { struct GPUBatch *geom_faces = DRW_mesh_batch_cache_get_triangles_with_select_id(me); diff --git a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl index 880f17b0c9d..e7ca868a4ff 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_cavity_lib.glsl @@ -31,9 +31,9 @@ void cavity_compute(vec2 screenco, /* find the offset in screen space by multiplying a point * in camera space at the depth of the point by the projection matrix. */ vec2 offset; - float homcoord = ProjectionMatrix[2][3] * position.z + ProjectionMatrix[3][3]; - offset.x = ProjectionMatrix[0][0] * world_data.cavity_distance / homcoord; - offset.y = ProjectionMatrix[1][1] * world_data.cavity_distance / homcoord; + float homcoord = drw_view.winmat[2][3] * position.z + drw_view.winmat[3][3]; + offset.x = drw_view.winmat[0][0] * world_data.cavity_distance / homcoord; + offset.y = drw_view.winmat[1][1] * world_data.cavity_distance / homcoord; /* convert from -1.0...1.0 range to 0.0..1.0 for easy use with texture coordinates */ offset *= 0.5; diff --git a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl index d8f8a1cc03f..11d7c85d43a 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_effect_dof_frag.glsl @@ -16,7 +16,7 @@ (dof_aperturesize * (dof_distance / zdepth - 1.0) * dof_invsensorsize) #define linear_depth(z) \ - ((ProjectionMatrix[3][3] == 0.0) ? \ + ((drw_view.winmat[3][3] == 0.0) ? \ (nearFar.x * nearFar.y) / (z * (nearFar.x - nearFar.y) + nearFar.y) : \ (z * 2.0 - 1.0) * nearFar.y) diff --git a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl index cfc94ef7c9a..04fef8d8b32 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_prepass_hair_vert.glsl @@ -43,13 +43,13 @@ void workbench_hair_random_material(float rand, void main() { - bool is_persp = (ProjectionMatrix[3][3] == 0.0); + bool is_persp = (drw_view.winmat[3][3] == 0.0); float time, thick_time, thickness; vec3 world_pos, tan, binor; hair_get_pos_tan_binor_time(is_persp, ModelMatrixInverse, - ViewMatrixInverse[3].xyz, - ViewMatrixInverse[2].xyz, + drw_view.viewinv[3].xyz, + drw_view.viewinv[2].xyz, world_pos, tan, binor, diff --git a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl index d8f1b83d747..213279b1913 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_transparent_accum_frag.glsl @@ -24,7 +24,7 @@ float linear_zdepth(float depth, vec4 viewvecs[2], mat4 proj_mat) */ float calculate_transparent_weight(void) { - float z = linear_zdepth(gl_FragCoord.z, ViewVecs, ProjectionMatrix); + float z = linear_zdepth(gl_FragCoord.z, drw_view.viewvecs, drw_view.winmat); #if 0 /* Eq 10 : Good for surfaces with varying opacity (like particles) */ float a = min(1.0, alpha * 10.0) + 0.01; diff --git a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl index 49e26cd3e0c..afba3a0d784 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl @@ -237,7 +237,7 @@ void main() fragColor = vec4(Lscat, Tr); #else vec2 screen_uv = gl_FragCoord.xy / vec2(textureSize(depthBuffer, 0).xy); - bool is_persp = ProjectionMatrix[3][3] == 0.0; + bool is_persp = drw_view.winmat[3][3] == 0.0; vec3 volume_center = ModelMatrix[3].xyz; diff --git a/source/blender/draw/engines/workbench/workbench_engine.c b/source/blender/draw/engines/workbench/workbench_engine.c index 9eb35c25bf4..a0459a967f3 100644 --- a/source/blender/draw/engines/workbench/workbench_engine.c +++ b/source/blender/draw/engines/workbench/workbench_engine.c @@ -409,7 +409,7 @@ void workbench_cache_populate(void *ved, Object *ob) return; } - if (ELEM(ob->type, OB_MESH, OB_SURF, OB_MBALL, OB_POINTCLOUD)) { + if (ELEM(ob->type, OB_MESH, OB_POINTCLOUD)) { bool use_sculpt_pbvh, use_texpaint_mode, draw_shadow, has_transp_mat = false; eV3DShadingColorType color_type = workbench_color_type_get( wpd, ob, &use_sculpt_pbvh, &use_texpaint_mode, &draw_shadow); diff --git a/source/blender/draw/engines/workbench/workbench_render.c b/source/blender/draw/engines/workbench/workbench_render.c index e5dcf6c5624..931f6a2dc92 100644 --- a/source/blender/draw/engines/workbench/workbench_render.c +++ b/source/blender/draw/engines/workbench/workbench_render.c @@ -17,6 +17,7 @@ #include "ED_view3d.h" +#include "GPU_context.h" #include "GPU_shader.h" #include "DEG_depsgraph.h" @@ -188,6 +189,10 @@ void workbench_render(void *ved, RenderEngine *engine, RenderLayer *render_layer workbench_draw_finish(data); + /* Perform render step between samples to allow + * flushing of freed GPUBackend resources. */ + GPU_render_step(); + /* Write render output. */ const char *viewname = RE_GetActiveRenderView(engine->re); RenderPass *rp = RE_pass_find_by_name(render_layer, RE_PASSNAME_COMBINED, viewname); diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh index 257f01a5562..890cd588527 100644 --- a/source/blender/draw/intern/DRW_gpu_wrapper.hh +++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh @@ -50,13 +50,13 @@ * * `draw::Framebuffer` * Simple wrapper to #GPUFramebuffer that can be moved. - * */ #include "DRW_render.h" #include "MEM_guardedalloc.h" +#include "draw_manager.h" #include "draw_texture_pool.h" #include "BLI_math_vec_types.hh" @@ -182,7 +182,7 @@ class UniformCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable GPU_uniformbuf_free(ubo_); } - void push_update(void) + void push_update() { GPU_uniformbuf_update(ubo_, this->data_); } @@ -227,12 +227,22 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable GPU_storagebuf_free(ssbo_); } - void push_update(void) + void push_update() { BLI_assert(device_only == false); GPU_storagebuf_update(ssbo_, this->data_); } + void clear_to_zero() + { + GPU_storagebuf_clear_to_zero(ssbo_); + } + + void read() + { + GPU_storagebuf_read(ssbo_, this->data_); + } + operator GPUStorageBuf *() const { return ssbo_; @@ -319,6 +329,7 @@ class StorageArrayBuffer : public detail::StorageCommon<T, len, device_only> { MEM_freeN(this->data_); } + /* Resize to \a new_size elements. */ void resize(int64_t new_size) { BLI_assert(new_size > 0); @@ -392,10 +403,10 @@ class Texture : NonCopyable { int extent, float *data = nullptr, bool cubemap = false, - int mips = 1) + int mip_len = 1) : name_(name) { - tx_ = create(extent, 0, 0, mips, format, data, false, cubemap); + tx_ = create(extent, 0, 0, mip_len, format, data, false, cubemap); } Texture(const char *name, @@ -404,17 +415,20 @@ class Texture : NonCopyable { int layers, float *data = nullptr, bool cubemap = false, - int mips = 1) + int mip_len = 1) : name_(name) { - tx_ = create(extent, layers, 0, mips, format, data, true, cubemap); + tx_ = create(extent, layers, 0, mip_len, format, data, true, cubemap); } - Texture( - const char *name, eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1) + Texture(const char *name, + eGPUTextureFormat format, + int2 extent, + float *data = nullptr, + int mip_len = 1) : name_(name) { - tx_ = create(UNPACK2(extent), 0, mips, format, data, false, false); + tx_ = create(UNPACK2(extent), 0, mip_len, format, data, false, false); } Texture(const char *name, @@ -422,17 +436,20 @@ class Texture : NonCopyable { int2 extent, int layers, float *data = nullptr, - int mips = 1) + int mip_len = 1) : name_(name) { - tx_ = create(UNPACK2(extent), layers, mips, format, data, true, false); + tx_ = create(UNPACK2(extent), layers, mip_len, format, data, true, false); } - Texture( - const char *name, eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1) + Texture(const char *name, + eGPUTextureFormat format, + int3 extent, + float *data = nullptr, + int mip_len = 1) : name_(name) { - tx_ = create(UNPACK3(extent), mips, format, data, false, false); + tx_ = create(UNPACK3(extent), mip_len, format, data, false, false); } ~Texture() @@ -467,9 +484,9 @@ class Texture : NonCopyable { * Ensure the texture has the correct properties. Recreating it if needed. * Return true if a texture has been created. */ - bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1) + bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1) { - return ensure_impl(extent, 0, 0, mips, format, data, false, false); + return ensure_impl(extent, 0, 0, mip_len, format, data, false, false); } /** @@ -477,18 +494,18 @@ class Texture : NonCopyable { * Return true if a texture has been created. */ bool ensure_1d_array( - eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1) + eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1) { - return ensure_impl(extent, layers, 0, mips, format, data, true, false); + return ensure_impl(extent, layers, 0, mip_len, format, data, true, false); } /** * Ensure the texture has the correct properties. Recreating it if needed. * Return true if a texture has been created. */ - bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1) + bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mip_len = 1) { - return ensure_impl(UNPACK2(extent), 0, mips, format, data, false, false); + return ensure_impl(UNPACK2(extent), 0, mip_len, format, data, false, false); } /** @@ -496,27 +513,27 @@ class Texture : NonCopyable { * Return true if a texture has been created. */ bool ensure_2d_array( - eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mips = 1) + eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mip_len = 1) { - return ensure_impl(UNPACK2(extent), layers, mips, format, data, true, false); + return ensure_impl(UNPACK2(extent), layers, mip_len, format, data, true, false); } /** * Ensure the texture has the correct properties. Recreating it if needed. * Return true if a texture has been created. */ - bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1) + bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mip_len = 1) { - return ensure_impl(UNPACK3(extent), mips, format, data, false, false); + return ensure_impl(UNPACK3(extent), mip_len, format, data, false, false); } /** * Ensure the texture has the correct properties. Recreating it if needed. * Return true if a texture has been created. */ - bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1) + bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1) { - return ensure_impl(extent, extent, 0, mips, format, data, false, true); + return ensure_impl(extent, extent, 0, mip_len, format, data, false, true); } /** @@ -524,9 +541,9 @@ class Texture : NonCopyable { * Return true if a texture has been created. */ bool ensure_cube_array( - eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1) + eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1) { - return ensure_impl(extent, extent, layers, mips, format, data, false, true); + return ensure_impl(extent, extent, layers, mip_len, format, data, false, true); } /** @@ -555,9 +572,15 @@ class Texture : NonCopyable { return mip_views_[miplvl]; } + int mip_count() const + { + return GPU_texture_mip_count(tx_); + } + /** * Ensure the availability of mipmap views. * Layer views covers all layers of array textures. + * Returns true if the views were (re)created. */ bool ensure_layer_views(bool cube_as_array = false) { @@ -594,42 +617,47 @@ class Texture : NonCopyable { /** * Returns true if the texture has been allocated or acquired from the pool. */ - bool is_valid(void) const + bool is_valid() const { return tx_ != nullptr; } - int width(void) const + int width() const { return GPU_texture_width(tx_); } - int height(void) const + int height() const { return GPU_texture_height(tx_); } - bool depth(void) const + int pixel_count() const + { + return GPU_texture_width(tx_) * GPU_texture_height(tx_); + } + + bool depth() const { return GPU_texture_depth(tx_); } - bool is_stencil(void) const + bool is_stencil() const { return GPU_texture_stencil(tx_); } - bool is_integer(void) const + bool is_integer() const { return GPU_texture_integer(tx_); } - bool is_cube(void) const + bool is_cube() const { return GPU_texture_cube(tx_); } - bool is_array(void) const + bool is_array() const { return GPU_texture_array(tx_); } @@ -708,7 +736,7 @@ class Texture : NonCopyable { bool ensure_impl(int w, int h = 0, int d = 0, - int mips = 1, + int mip_len = 1, eGPUTextureFormat format = GPU_RGBA8, float *data = nullptr, bool layered = false, @@ -721,11 +749,11 @@ class Texture : NonCopyable { int3 size = this->size(); if (size != int3(w, h, d) || GPU_texture_format(tx_) != format || GPU_texture_cube(tx_) != cubemap || GPU_texture_array(tx_) != layered) { - GPU_TEXTURE_FREE_SAFE(tx_); + free(); } } if (tx_ == nullptr) { - tx_ = create(w, h, d, mips, format, data, layered, cubemap); + tx_ = create(w, h, d, mip_len, format, data, layered, cubemap); return true; } return false; @@ -734,87 +762,82 @@ class Texture : NonCopyable { GPUTexture *create(int w, int h, int d, - int mips, + int mip_len, eGPUTextureFormat format, float *data, bool layered, bool cubemap) { if (h == 0) { - return GPU_texture_create_1d(name_, w, mips, format, data); + return GPU_texture_create_1d(name_, w, mip_len, format, data); } else if (cubemap) { if (layered) { - return GPU_texture_create_cube_array(name_, w, d, mips, format, data); + return GPU_texture_create_cube_array(name_, w, d, mip_len, format, data); } else { - return GPU_texture_create_cube(name_, w, mips, format, data); + return GPU_texture_create_cube(name_, w, mip_len, format, data); } } else if (d == 0) { if (layered) { - return GPU_texture_create_1d_array(name_, w, h, mips, format, data); + return GPU_texture_create_1d_array(name_, w, h, mip_len, format, data); } else { - return GPU_texture_create_2d(name_, w, h, mips, format, data); + return GPU_texture_create_2d(name_, w, h, mip_len, format, data); } } else { if (layered) { - return GPU_texture_create_2d_array(name_, w, h, d, mips, format, data); + return GPU_texture_create_2d_array(name_, w, h, d, mip_len, format, data); } else { - return GPU_texture_create_3d(name_, w, h, d, mips, format, GPU_DATA_FLOAT, data); + return GPU_texture_create_3d(name_, w, h, d, mip_len, format, GPU_DATA_FLOAT, data); } } } }; class TextureFromPool : public Texture, NonMovable { - private: - GPUTexture *tx_tmp_saved_ = nullptr; - public: TextureFromPool(const char *name = "gpu::Texture") : Texture(name){}; - /* Always use `release()` after rendering and `sync()` in sync phase. */ - void acquire(int2 extent, eGPUTextureFormat format, void *owner_) + /* Always use `release()` after rendering. */ + void acquire(int2 extent, eGPUTextureFormat format) { BLI_assert(this->tx_ == nullptr); - if (this->tx_ != nullptr) { - return; - } - if (tx_tmp_saved_ != nullptr) { - if (GPU_texture_width(tx_tmp_saved_) != extent.x || - GPU_texture_height(tx_tmp_saved_) != extent.y || - GPU_texture_format(tx_tmp_saved_) != format) { - this->tx_tmp_saved_ = nullptr; - } - else { - this->tx_ = tx_tmp_saved_; - return; - } - } - DrawEngineType *owner = (DrawEngineType *)owner_; - this->tx_ = DRW_texture_pool_query_2d(UNPACK2(extent), format, owner); + + this->tx_ = DRW_texture_pool_texture_acquire( + DST.vmempool->texture_pool, UNPACK2(extent), format); } - void release(void) + void release() { /* Allows multiple release. */ - if (this->tx_ != nullptr) { - tx_tmp_saved_ = this->tx_; - this->tx_ = nullptr; + if (this->tx_ == nullptr) { + return; } + DRW_texture_pool_texture_release(DST.vmempool->texture_pool, this->tx_); + this->tx_ = nullptr; } /** - * Clears any reference. Workaround for pool texture not being able to release on demand. - * Needs to be called at during the sync phase. + * Swap the content of the two textures. + * Also change ownership accordingly if needed. */ - void sync(void) + static void swap(TextureFromPool &a, Texture &b) + { + Texture::swap(a, b); + DRW_texture_pool_give_texture_ownership(DST.vmempool->texture_pool, a); + DRW_texture_pool_take_texture_ownership(DST.vmempool->texture_pool, b); + } + static void swap(Texture &a, TextureFromPool &b) { - tx_tmp_saved_ = nullptr; + swap(b, a); + } + static void swap(TextureFromPool &a, TextureFromPool &b) + { + Texture::swap(a, b); } /** Remove methods that are forbidden with this type of textures. */ @@ -832,6 +855,33 @@ class TextureFromPool : public Texture, NonMovable { GPUTexture *stencil_view() = delete; }; +/** + * Dummy type to bind texture as image. + * It is just a GPUTexture in disguise. + */ +class Image { +}; + +static inline Image *as_image(GPUTexture *tex) +{ + return reinterpret_cast<Image *>(tex); +} + +static inline Image **as_image(GPUTexture **tex) +{ + return reinterpret_cast<Image **>(tex); +} + +static inline GPUTexture *as_texture(Image *img) +{ + return reinterpret_cast<GPUTexture *>(img); +} + +static inline GPUTexture **as_texture(Image **img) +{ + return reinterpret_cast<GPUTexture **>(img); +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -901,45 +951,47 @@ class Framebuffer : NonCopyable { template<typename T, int64_t len> class SwapChain { private: + BLI_STATIC_ASSERT(len > 1, "A swap-chain needs more than 1 unit in length."); std::array<T, len> chain_; - int64_t index_ = 0; public: void swap() { - index_ = (index_ + 1) % len; + for (auto i : IndexRange(len - 1)) { + T::swap(chain_[i], chain_[(i + 1) % len]); + } } T ¤t() { - return chain_[index_]; + return chain_[0]; } T &previous() { /* Avoid modulo operation with negative numbers. */ - return chain_[(index_ + len - 1) % len]; + return chain_[(0 + len - 1) % len]; } T &next() { - return chain_[(index_ + 1) % len]; + return chain_[(0 + 1) % len]; } const T ¤t() const { - return chain_[index_]; + return chain_[0]; } const T &previous() const { /* Avoid modulo operation with negative numbers. */ - return chain_[(index_ + len - 1) % len]; + return chain_[(0 + len - 1) % len]; } const T &next() const { - return chain_[(index_ + 1) % len]; + return chain_[(0 + 1) % len]; } }; diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index fa4a1d93d3e..b49203d85f6 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -41,6 +41,7 @@ #include "draw_debug.h" #include "draw_manager_profiling.h" +#include "draw_state.h" #include "draw_view_data.h" #include "MEM_guardedalloc.h" @@ -206,6 +207,10 @@ struct GPUShader *DRW_shader_create_with_lib_ex(const char *vert, const char *lib, const char *defines, const char *name); +struct GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp, + const DRWShaderLibrary *lib, + const char *defines, + const char *name); struct GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert, const char *geom, const char *frag, @@ -288,83 +293,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib); /* Batches */ -/** - * DRWState is a bit-mask that stores the current render state and the desired render state. Based - * on the differences the minimum state changes can be invoked to setup the desired render state. - * - * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive - * therefore they aren't ordered as a bit mask. - */ -typedef enum { - /** To be used for compute passes. */ - DRW_STATE_NO_DRAW = 0, - /** Write mask */ - DRW_STATE_WRITE_DEPTH = (1 << 0), - DRW_STATE_WRITE_COLOR = (1 << 1), - /* Write Stencil. These options are mutual exclusive and packed into 2 bits */ - DRW_STATE_WRITE_STENCIL = (1 << 2), - DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2), - DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2), - /** Depth test. These options are mutual exclusive and packed into 3 bits */ - DRW_STATE_DEPTH_ALWAYS = (1 << 4), - DRW_STATE_DEPTH_LESS = (2 << 4), - DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4), - DRW_STATE_DEPTH_EQUAL = (4 << 4), - DRW_STATE_DEPTH_GREATER = (5 << 4), - DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4), - /** Culling test */ - DRW_STATE_CULL_BACK = (1 << 7), - DRW_STATE_CULL_FRONT = (1 << 8), - /** Stencil test. These options are mutually exclusive and packed into 2 bits. */ - DRW_STATE_STENCIL_ALWAYS = (1 << 9), - DRW_STATE_STENCIL_EQUAL = (2 << 9), - DRW_STATE_STENCIL_NEQUAL = (3 << 9), - - /** Blend state. These options are mutual exclusive and packed into 4 bits */ - DRW_STATE_BLEND_ADD = (1 << 11), - /** Same as additive but let alpha accumulate without pre-multiply. */ - DRW_STATE_BLEND_ADD_FULL = (2 << 11), - /** Standard alpha blending. */ - DRW_STATE_BLEND_ALPHA = (3 << 11), - /** Use that if color is already pre-multiply by alpha. */ - DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11), - DRW_STATE_BLEND_BACKGROUND = (5 << 11), - DRW_STATE_BLEND_OIT = (6 << 11), - DRW_STATE_BLEND_MUL = (7 << 11), - DRW_STATE_BLEND_SUB = (8 << 11), - /** Use dual source blending. WARNING: Only one color buffer allowed. */ - DRW_STATE_BLEND_CUSTOM = (9 << 11), - DRW_STATE_LOGIC_INVERT = (10 << 11), - DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11), - - DRW_STATE_IN_FRONT_SELECT = (1 << 27), - DRW_STATE_SHADOW_OFFSET = (1 << 28), - DRW_STATE_CLIP_PLANES = (1 << 29), - DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30), - /** DO NOT USE. Assumed always enabled. Only used internally. */ - DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31), -} DRWState; - -ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE); - -#define DRW_STATE_DEFAULT \ - (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL) -#define DRW_STATE_BLEND_ENABLED \ - (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \ - DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \ - DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT) -#define DRW_STATE_RASTERIZER_ENABLED \ - (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \ - DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) -#define DRW_STATE_DEPTH_TEST_ENABLED \ - (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \ - DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL) -#define DRW_STATE_STENCIL_TEST_ENABLED \ - (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL) -#define DRW_STATE_WRITE_STENCIL_ENABLED \ - (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \ - DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) - typedef enum { DRW_ATTR_INT, DRW_ATTR_FLOAT, @@ -409,7 +337,7 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup, void *user_data); /** - * If ob is NULL, unit modelmatrix is assumed and culling is bypassed. + * If ob is NULL, unit model-matrix is assumed and culling is bypassed. */ #define DRW_shgroup_call(shgroup, geom, ob) \ DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, NULL) @@ -420,8 +348,8 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup, #define DRW_shgroup_call_obmat(shgroup, geom, obmat) \ DRW_shgroup_call_ex(shgroup, NULL, obmat, geom, false, NULL) -/* TODO(fclem): remove this when we have DRWView */ -/* user_data is used by DRWCallVisibilityFn defined in DRWView. */ +/* TODO(fclem): remove this when we have #DRWView */ +/* user_data is used by #DRWCallVisibilityFn defined in #DRWView. */ #define DRW_shgroup_call_with_callback(shgroup, geom, ob, user_data) \ DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, user_data) @@ -454,6 +382,10 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf * void DRW_shgroup_call_procedural_points(DRWShadingGroup *sh, Object *ob, uint point_count); void DRW_shgroup_call_procedural_lines(DRWShadingGroup *sh, Object *ob, uint line_count); void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *sh, Object *ob, uint tri_count); +void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup, + GPUPrimType primitive_type, + Object *ob, + GPUStorageBuf *indirect_buf); /** * \warning Only use with Shaders that have `IN_PLACE_INSTANCES` defined. * TODO: Should be removed. @@ -639,10 +571,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup, DRW_shgroup_uniform_block_ex(shgroup, name, ubo, __FILE__, __LINE__) # define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \ DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__) -# define DRW_shgroup_storage_block(shgroup, name, ubo) \ - DRW_shgroup_storage_block_ex(shgroup, name, ubo, __FILE__, __LINE__) -# define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \ - DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__) +# define DRW_shgroup_storage_block(shgroup, name, ssbo) \ + DRW_shgroup_storage_block_ex(shgroup, name, ssbo, __FILE__, __LINE__) +# define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \ + DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo, __FILE__, __LINE__) #else # define DRW_shgroup_vertex_buffer(shgroup, name, vert) \ DRW_shgroup_vertex_buffer_ex(shgroup, name, vert) @@ -652,10 +584,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup, DRW_shgroup_uniform_block_ex(shgroup, name, ubo) # define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \ DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo) -# define DRW_shgroup_storage_block(shgroup, name, ubo) \ - DRW_shgroup_storage_block_ex(shgroup, name, ubo) -# define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \ - DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo) +# define DRW_shgroup_storage_block(shgroup, name, ssbo) \ + DRW_shgroup_storage_block_ex(shgroup, name, ssbo) +# define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \ + DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo) #endif bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup); @@ -791,7 +723,7 @@ bool DRW_culling_box_test(const DRWView *view, const BoundBox *bbox); bool DRW_culling_plane_test(const DRWView *view, const float plane[4]); /** * Return True if the given box intersect the current view frustum. - * This function will have to be replaced when world space bb per objects is implemented. + * This function will have to be replaced when world space bounding-box per objects is implemented. */ bool DRW_culling_min_max_test(const DRWView *view, float obmat[4][4], float min[3], float max[3]); @@ -887,7 +819,6 @@ bool DRW_object_is_in_edit_mode(const struct Object *ob); * we are rendering or drawing in the viewport. */ int DRW_object_visibility_in_active_context(const struct Object *ob); -bool DRW_object_is_flat_normal(const struct Object *ob); bool DRW_object_use_hide_faces(const struct Object *ob); bool DRW_object_is_visible_psys_in_active_context(const struct Object *object, @@ -981,7 +912,7 @@ typedef struct DRWContextState { struct ViewLayer *view_layer; /* 'CTX_data_view_layer(C)' */ /* Use 'object_edit' for edit-mode */ - struct Object *obact; /* 'OBACT' */ + struct Object *obact; struct RenderEngineType *engine_type; diff --git a/source/blender/draw/intern/draw_attributes.cc b/source/blender/draw/intern/draw_attributes.cc index 8fb4210901f..011d72e9e8f 100644 --- a/source/blender/draw/intern/draw_attributes.cc +++ b/source/blender/draw/intern/draw_attributes.cc @@ -65,9 +65,10 @@ bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b) } DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs, - eCustomDataType type, - int layer, - eAttrDomain domain) + const char *name, + const eCustomDataType type, + const int layer_index, + const eAttrDomain domain) { if (attrs->num_requests >= GPU_MAX_ATTR) { return nullptr; @@ -75,7 +76,8 @@ DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs, DRW_AttributeRequest *req = &attrs->requests[attrs->num_requests]; req->cd_type = type; - req->layer_index = layer; + BLI_strncpy(req->attribute_name, name, sizeof(req->attribute_name)); + req->layer_index = layer_index; req->domain = domain; attrs->num_requests += 1; return req; @@ -86,7 +88,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data, int *r_layer_index, eCustomDataType *r_type) { - const eCustomDataType possible_attribute_types[7] = { + const eCustomDataType possible_attribute_types[8] = { CD_PROP_BOOL, CD_PROP_INT8, CD_PROP_INT32, @@ -94,6 +96,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data, CD_PROP_FLOAT2, CD_PROP_FLOAT3, CD_PROP_COLOR, + CD_PROP_BYTE_COLOR, }; for (int i = 0; i < ARRAY_SIZE(possible_attribute_types); i++) { diff --git a/source/blender/draw/intern/draw_attributes.h b/source/blender/draw/intern/draw_attributes.h index 4f82f3b94e9..b577c6c4162 100644 --- a/source/blender/draw/intern/draw_attributes.h +++ b/source/blender/draw/intern/draw_attributes.h @@ -46,8 +46,9 @@ void drw_attributes_merge(DRW_Attributes *dst, bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b); DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs, - eCustomDataType type, - int layer, + const char *name, + eCustomDataType data_type, + int layer_index, eAttrDomain domain); bool drw_custom_data_match_attribute(const CustomData *custom_data, diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index f846251c66b..6537490c06c 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -90,6 +90,7 @@ static struct DRWShapeCache { GPUBatch *drw_procedural_verts; GPUBatch *drw_procedural_lines; GPUBatch *drw_procedural_tris; + GPUBatch *drw_procedural_tri_strips; GPUBatch *drw_cursor; GPUBatch *drw_cursor_only_circle; GPUBatch *drw_fullscreen_quad; @@ -208,6 +209,21 @@ GPUBatch *drw_cache_procedural_triangles_get(void) return SHC.drw_procedural_tris; } +GPUBatch *drw_cache_procedural_triangle_strips_get() +{ + if (!SHC.drw_procedural_tri_strips) { + /* TODO(fclem): get rid of this dummy VBO. */ + GPUVertFormat format = {0}; + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); + GPU_vertbuf_data_alloc(vbo, 1); + + SHC.drw_procedural_tri_strips = GPU_batch_create_ex( + GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO); + } + return SHC.drw_procedural_tri_strips; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -764,6 +780,39 @@ GPUBatch *DRW_cache_normal_arrow_get(void) return SHC.drw_normal_arrow; } +void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len) +{ + static GPUVertFormat format = {0}; + static struct { + uint wd; + } attr_id; + if (format.attr_len == 0) { + /* initialize vertex format */ + if (!GPU_crappy_amd_driver()) { + /* Some AMD drivers strangely crash with a vbo with this format. */ + attr_id.wd = GPU_vertformat_attr_add( + &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + } + else { + attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); + } + } + + GPU_vertbuf_init_with_format(vbo, &format); + GPU_vertbuf_data_alloc(vbo, vert_len); + + if (GPU_vertbuf_get_format(vbo)->stride == 1) { + memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len); + } + else { + GPUVertBufRaw wd_step; + GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step); + for (int i = 0; i < vert_len; i++) { + *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f; + } + } +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -777,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void) { if (SHC.drw_gpencil_dummy_quad == NULL) { GPUVertFormat format = {0}; - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT); + /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 4); @@ -802,7 +852,6 @@ GPUBatch *DRW_cache_object_all_edges_get(Object *ob) switch (ob->type) { case OB_MESH: return DRW_cache_mesh_all_edges_get(ob); - /* TODO: should match #DRW_cache_object_surface_get. */ default: return NULL; @@ -814,20 +863,6 @@ GPUBatch *DRW_cache_object_edge_detection_get(Object *ob, bool *r_is_manifold) switch (ob->type) { case OB_MESH: return DRW_cache_mesh_edge_detection_get(ob, r_is_manifold); - case OB_CURVES_LEGACY: - return NULL; - case OB_SURF: - return NULL; - case OB_FONT: - return NULL; - case OB_MBALL: - return DRW_cache_mball_edge_detection_get(ob, r_is_manifold); - case OB_CURVES: - return NULL; - case OB_POINTCLOUD: - return NULL; - case OB_VOLUME: - return NULL; default: return NULL; } @@ -838,23 +873,12 @@ GPUBatch *DRW_cache_object_face_wireframe_get(Object *ob) switch (ob->type) { case OB_MESH: return DRW_cache_mesh_face_wireframe_get(ob); - case OB_CURVES_LEGACY: - return NULL; - case OB_SURF: - return NULL; - case OB_FONT: - return NULL; - case OB_MBALL: - return DRW_cache_mball_face_wireframe_get(ob); - case OB_CURVES: - return NULL; case OB_POINTCLOUD: return DRW_pointcloud_batch_cache_get_dots(ob); case OB_VOLUME: return DRW_cache_volume_face_wireframe_get(ob); - case OB_GPENCIL: { + case OB_GPENCIL: return DRW_cache_gpencil_face_wireframe_get(ob); - } default: return NULL; } @@ -865,20 +889,6 @@ GPUBatch *DRW_cache_object_loose_edges_get(struct Object *ob) switch (ob->type) { case OB_MESH: return DRW_cache_mesh_loose_edges_get(ob); - case OB_CURVES_LEGACY: - return NULL; - case OB_SURF: - return NULL; - case OB_FONT: - return NULL; - case OB_MBALL: - return NULL; - case OB_CURVES: - return NULL; - case OB_POINTCLOUD: - return NULL; - case OB_VOLUME: - return NULL; default: return NULL; } @@ -889,20 +899,8 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob) switch (ob->type) { case OB_MESH: return DRW_cache_mesh_surface_get(ob); - case OB_CURVES_LEGACY: - return NULL; - case OB_SURF: - return NULL; - case OB_FONT: - return NULL; - case OB_MBALL: - return DRW_cache_mball_surface_get(ob); - case OB_CURVES: - return NULL; case OB_POINTCLOUD: return DRW_cache_pointcloud_surface_get(ob); - case OB_VOLUME: - return NULL; default: return NULL; } @@ -916,18 +914,6 @@ GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob) switch (type) { case OB_MESH: return DRW_mesh_batch_cache_pos_vertbuf_get((me != NULL) ? me : ob->data); - case OB_CURVES_LEGACY: - case OB_SURF: - case OB_FONT: - return NULL; - case OB_MBALL: - return DRW_mball_batch_cache_pos_vertbuf_get(ob); - case OB_CURVES: - return NULL; - case OB_POINTCLOUD: - return NULL; - case OB_VOLUME: - return NULL; default: return NULL; } @@ -952,8 +938,6 @@ int DRW_cache_object_material_count_get(struct Object *ob) case OB_SURF: case OB_FONT: return DRW_curve_material_count_get(ob->data); - case OB_MBALL: - return DRW_metaball_material_count_get(ob->data); case OB_CURVES: return DRW_curves_material_count_get(ob->data); case OB_POINTCLOUD: @@ -975,20 +959,8 @@ GPUBatch **DRW_cache_object_surface_material_get(struct Object *ob, switch (ob->type) { case OB_MESH: return DRW_cache_mesh_surface_shaded_get(ob, gpumat_array, gpumat_array_len); - case OB_CURVES_LEGACY: - return NULL; - case OB_SURF: - return NULL; - case OB_FONT: - return NULL; - case OB_MBALL: - return DRW_cache_mball_surface_shaded_get(ob, gpumat_array, gpumat_array_len); - case OB_CURVES: - return NULL; case OB_POINTCLOUD: return DRW_cache_pointcloud_surface_shaded_get(ob, gpumat_array, gpumat_array_len); - case OB_VOLUME: - return NULL; default: return NULL; } @@ -2956,39 +2928,6 @@ GPUBatch *DRW_cache_curve_vert_overlay_get(Object *ob) /** \} */ /* -------------------------------------------------------------------- */ -/** \name MetaBall - * \{ */ - -GPUBatch *DRW_cache_mball_surface_get(Object *ob) -{ - BLI_assert(ob->type == OB_MBALL); - return DRW_metaball_batch_cache_get_triangles_with_normals(ob); -} - -GPUBatch *DRW_cache_mball_edge_detection_get(Object *ob, bool *r_is_manifold) -{ - BLI_assert(ob->type == OB_MBALL); - return DRW_metaball_batch_cache_get_edge_detection(ob, r_is_manifold); -} - -GPUBatch *DRW_cache_mball_face_wireframe_get(Object *ob) -{ - BLI_assert(ob->type == OB_MBALL); - return DRW_metaball_batch_cache_get_wireframes_face(ob); -} - -GPUBatch **DRW_cache_mball_surface_shaded_get(Object *ob, - struct GPUMaterial **gpumat_array, - uint gpumat_array_len) -{ - BLI_assert(ob->type == OB_MBALL); - MetaBall *mb = ob->data; - return DRW_metaball_batch_cache_get_surface_shaded(ob, mb, gpumat_array, gpumat_array_len); -} - -/** \} */ - -/* -------------------------------------------------------------------- */ /** \name Font * \{ */ @@ -3306,9 +3245,6 @@ void drw_batch_cache_validate(Object *ob) case OB_SURF: DRW_curve_batch_cache_validate((Curve *)ob->data); break; - case OB_MBALL: - DRW_mball_batch_cache_validate((MetaBall *)ob->data); - break; case OB_LATTICE: DRW_lattice_batch_cache_validate((Lattice *)ob->data); break; diff --git a/source/blender/draw/intern/draw_cache.h b/source/blender/draw/intern/draw_cache.h index a107eb7c75c..4e8788ada08 100644 --- a/source/blender/draw/intern/draw_cache.h +++ b/source/blender/draw/intern/draw_cache.h @@ -213,15 +213,6 @@ struct GPUBatch *DRW_cache_particles_get_edit_tip_points(struct Object *object, struct PTCacheEdit *edit); struct GPUBatch *DRW_cache_particles_get_prim(int type); -/* Metaball */ - -struct GPUBatch *DRW_cache_mball_surface_get(struct Object *ob); -struct GPUBatch **DRW_cache_mball_surface_shaded_get(struct Object *ob, - struct GPUMaterial **gpumat_array, - uint gpumat_array_len); -struct GPUBatch *DRW_cache_mball_face_wireframe_get(struct Object *ob); -struct GPUBatch *DRW_cache_mball_edge_detection_get(struct Object *ob, bool *r_is_manifold); - /* Curves */ struct GPUBatch *DRW_cache_curves_surface_get(struct Object *ob); diff --git a/source/blender/draw/intern/draw_cache_extract.hh b/source/blender/draw/intern/draw_cache_extract.hh index c7127d169e1..203da22406c 100644 --- a/source/blender/draw/intern/draw_cache_extract.hh +++ b/source/blender/draw/intern/draw_cache_extract.hh @@ -55,7 +55,6 @@ enum { struct DRW_MeshCDMask { uint32_t uv : 8; uint32_t tan : 8; - uint32_t vcol : 8; uint32_t orco : 1; uint32_t tan_orco : 1; uint32_t sculpt_overlays : 1; @@ -111,7 +110,6 @@ struct MeshBufferList { GPUVertBuf *weights; /* extend */ GPUVertBuf *uv; GPUVertBuf *tan; - GPUVertBuf *vcol; GPUVertBuf *sculpt_data; GPUVertBuf *orco; /* Only for edit mode. */ diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc index 00005fd7b4c..b1d1631cb6d 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh.cc +++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc @@ -155,7 +155,7 @@ struct ExtractTaskData { bool use_threading = false; ExtractTaskData(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, ExtractorRunDatas *extractors, MeshBufferList *mbuflist, const bool use_threading) @@ -193,7 +193,7 @@ static void extract_task_data_free(void *data) * \{ */ BLI_INLINE void extract_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, ExtractorRunDatas &extractors, MeshBufferList *mbuflist, void *data_stack) @@ -209,7 +209,7 @@ BLI_INLINE void extract_init(const MeshRenderData *mr, } BLI_INLINE void extract_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, const ExtractorRunDatas &extractors, void *data_stack) { @@ -619,7 +619,6 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, EXTRACT_ADD_REQUESTED(vbo, lnor); EXTRACT_ADD_REQUESTED(vbo, uv); EXTRACT_ADD_REQUESTED(vbo, tan); - EXTRACT_ADD_REQUESTED(vbo, vcol); EXTRACT_ADD_REQUESTED(vbo, sculpt_data); EXTRACT_ADD_REQUESTED(vbo, orco); EXTRACT_ADD_REQUESTED(vbo, edge_fac); @@ -848,7 +847,6 @@ void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache, EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle); EXTRACT_ADD_REQUESTED(ibo, lines_paint_mask); EXTRACT_ADD_REQUESTED(ibo, lines_adjacency); - EXTRACT_ADD_REQUESTED(vbo, vcol); EXTRACT_ADD_REQUESTED(vbo, weights); EXTRACT_ADD_REQUESTED(vbo, sculpt_data); diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc index baea0c7b646..eea19cbebf3 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc +++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc @@ -14,6 +14,7 @@ #include "BLI_math.h" #include "BLI_task.h" +#include "BKE_attribute.hh" #include "BKE_editmesh.h" #include "BKE_editmesh_cache.h" #include "BKE_mesh.h" @@ -228,10 +229,10 @@ static void mesh_render_data_polys_sorted_build(MeshRenderData *mr, MeshBufferCa } } else { - const MPoly *mp = &mr->mpoly[0]; - for (int i = 0; i < mr->poly_len; i++, mp++) { - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { - const int mat = min_ii(mp->mat_nr, mat_last); + for (int i = 0; i < mr->poly_len; i++) { + if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[i])) { + const MPoly *mp = &mr->mpoly[i]; + const int mat = min_ii(mr->material_indices ? mr->material_indices[i] : 0, mat_last); tri_first_index[i] = mat_tri_offs[mat]; mat_tri_offs[mat] += mp->totloop - 2; } @@ -269,8 +270,8 @@ static void mesh_render_data_mat_tri_len_mesh_range_fn(void *__restrict userdata int *mat_tri_len = static_cast<int *>(tls->userdata_chunk); const MPoly *mp = &mr->mpoly[iter]; - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { - int mat = min_ii(mp->mat_nr, mr->mat_len - 1); + if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[iter])) { + int mat = min_ii(mr->material_indices ? mr->material_indices[iter] : 0, mr->mat_len - 1); mat_tri_len[mat] += mp->totloop - 2; } } @@ -332,15 +333,15 @@ void mesh_render_data_update_looptris(MeshRenderData *mr, if (mr->extract_type != MR_EXTRACT_BMESH) { /* Mesh */ if ((iter_type & MR_ITER_LOOPTRI) || (data_flag & MR_DATA_LOOPTRI)) { - /* NOTE(campbell): It's possible to skip allocating tessellation, + /* NOTE(@campbellbarton): It's possible to skip allocating tessellation, * the tessellation can be calculated as part of the iterator, see: P2188. * The overall advantage is small (around 1%), so keep this as-is. */ mr->mlooptri = static_cast<MLoopTri *>( MEM_mallocN(sizeof(*mr->mlooptri) * mr->tri_len, "MR_DATATYPE_LOOPTRI")); if (mr->poly_normals != nullptr) { - BKE_mesh_recalc_looptri_with_normals(me->mloop, - me->mpoly, - me->mvert, + BKE_mesh_recalc_looptri_with_normals(mr->mloop, + mr->mpoly, + mr->mvert, me->totloop, me->totpoly, mr->mlooptri, @@ -348,7 +349,7 @@ void mesh_render_data_update_looptris(MeshRenderData *mr, } else { BKE_mesh_recalc_looptri( - me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri); + mr->mloop, mr->mpoly, mr->mvert, me->totloop, me->totpoly, mr->mlooptri); } } } @@ -378,15 +379,15 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_ MEM_mallocN(sizeof(*mr->loop_normals) * mr->loop_len, __func__)); short(*clnors)[2] = static_cast<short(*)[2]>( CustomData_get_layer(&mr->me->ldata, CD_CUSTOMLOOPNORMAL)); - BKE_mesh_normals_loop_split(mr->me->mvert, + BKE_mesh_normals_loop_split(mr->mvert, mr->vert_normals, mr->vert_len, - mr->me->medge, + mr->medge, mr->edge_len, - mr->me->mloop, + mr->mloop, mr->loop_normals, mr->loop_len, - mr->me->mpoly, + mr->mpoly, mr->poly_normals, mr->poly_len, is_auto_smooth, @@ -431,6 +432,30 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_ } } +static void retrieve_active_attribute_names(MeshRenderData &mr, + const Object &object, + const Mesh &mesh) +{ + const Mesh *mesh_final = editmesh_final_or_this(&object, &mesh); + const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(mesh_final); + const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(mesh_final); + + /* Necessary because which attributes are active/default is stored in #CustomData. */ + Mesh me_query = blender::dna::shallow_zero_initialize(); + BKE_id_attribute_copy_domains_temp( + ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id); + + mr.active_color_name = nullptr; + mr.default_color_name = nullptr; + + if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) { + mr.active_color_name = active->name; + } + if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) { + mr.default_color_name = render->name; + } +} + MeshRenderData *mesh_render_data_create(Object *object, Mesh *me, const bool is_editmode, @@ -470,17 +495,6 @@ MeshRenderData *mesh_render_data_create(Object *object, mr->bm_poly_centers = mr->edit_data->polyCos; } - /* A subdivision wrapper may be created in edit mode when X-ray is turned on to ensure that the - * topology seen by the user matches the one used for the selection routines. This wrapper - * seemingly takes precedence over the MDATA one, however the mesh we use for rendering is not - * the subdivided one, but the one where the MDATA wrapper would have been added. So consider - * the subdivision wrapper as well for the `has_mdata` case. */ - bool has_mdata = is_mode_active && ELEM(mr->me->runtime.wrapper_type, - ME_WRAPPER_TYPE_MDATA, - ME_WRAPPER_TYPE_SUBD); - bool use_mapped = is_mode_active && - (has_mdata && !do_uvedit && mr->me && !mr->me->runtime.is_original); - int bm_ensure_types = BM_VERT | BM_EDGE | BM_LOOP | BM_FACE; BM_mesh_elem_index_ensure(mr->bm, bm_ensure_types); @@ -499,43 +513,51 @@ MeshRenderData *mesh_render_data_create(Object *object, mr->freestyle_face_ofs = CustomData_get_offset(&mr->bm->pdata, CD_FREESTYLE_FACE); #endif - if (use_mapped) { - mr->v_origindex = static_cast<const int *>( - CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX)); - mr->e_origindex = static_cast<const int *>( - CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX)); - mr->p_origindex = static_cast<const int *>( - CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX)); - - use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex); + /* Use bmesh directly when the object is in edit mode unchanged by any modifiers. + * For non-final UVs, always use original bmesh since the UV editor does not support + * using the cage mesh with deformed coordinates. */ + if ((is_mode_active && mr->me->runtime.is_original_bmesh && + mr->me->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH) || + (do_uvedit && !do_final)) { + mr->extract_type = MR_EXTRACT_BMESH; } - - mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_BMESH; - - /* Seems like the mesh_eval_final do not have the right origin indices. - * Force not mapped in this case. */ - if (has_mdata && do_final && editmesh_eval_final != editmesh_eval_cage) { - // mr->edit_bmesh = nullptr; + else { mr->extract_type = MR_EXTRACT_MESH; + + /* Use mapping from final to original mesh when the object is in edit mode. */ + if (is_mode_active && do_final) { + mr->v_origindex = static_cast<const int *>( + CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX)); + mr->e_origindex = static_cast<const int *>( + CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX)); + mr->p_origindex = static_cast<const int *>( + CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX)); + } + else { + mr->v_origindex = nullptr; + mr->e_origindex = nullptr; + mr->p_origindex = nullptr; + } } } else { mr->me = me; mr->edit_bmesh = nullptr; + mr->extract_type = MR_EXTRACT_MESH; - bool use_mapped = is_paint_mode && mr->me && !mr->me->runtime.is_original; - if (use_mapped) { + if (is_paint_mode && mr->me) { mr->v_origindex = static_cast<const int *>( CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX)); mr->e_origindex = static_cast<const int *>( CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX)); mr->p_origindex = static_cast<const int *>( CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX)); - - use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex); } - - mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_MESH; + else { + mr->v_origindex = nullptr; + mr->e_origindex = nullptr; + mr->p_origindex = nullptr; + } } if (mr->extract_type != MR_EXTRACT_BMESH) { @@ -546,14 +568,24 @@ MeshRenderData *mesh_render_data_create(Object *object, mr->poly_len = mr->me->totpoly; mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len); - mr->mvert = static_cast<MVert *>(CustomData_get_layer(&mr->me->vdata, CD_MVERT)); - mr->medge = static_cast<MEdge *>(CustomData_get_layer(&mr->me->edata, CD_MEDGE)); - mr->mloop = static_cast<MLoop *>(CustomData_get_layer(&mr->me->ldata, CD_MLOOP)); - mr->mpoly = static_cast<MPoly *>(CustomData_get_layer(&mr->me->pdata, CD_MPOLY)); + mr->mvert = BKE_mesh_verts(mr->me); + mr->medge = BKE_mesh_edges(mr->me); + mr->mpoly = BKE_mesh_polys(mr->me); + mr->mloop = BKE_mesh_loops(mr->me); mr->v_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX)); mr->e_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX)); mr->p_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX)); + + mr->material_indices = static_cast<const int *>( + CustomData_get_layer_named(&me->pdata, CD_PROP_INT32, "material_index")); + + mr->hide_vert = static_cast<const bool *>( + CustomData_get_layer_named(&me->vdata, CD_PROP_BOOL, ".hide_vert")); + mr->hide_edge = static_cast<const bool *>( + CustomData_get_layer_named(&me->edata, CD_PROP_BOOL, ".hide_edge")); + mr->hide_poly = static_cast<const bool *>( + CustomData_get_layer_named(&me->pdata, CD_PROP_BOOL, ".hide_poly")); } else { /* #BMesh */ @@ -566,6 +598,8 @@ MeshRenderData *mesh_render_data_create(Object *object, mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len); } + retrieve_active_attribute_names(*mr, *object, *me); + return mr; } diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h index 4fa5813d476..7f7d0a7613f 100644 --- a/source/blender/draw/intern/draw_cache_impl.h +++ b/source/blender/draw/intern/draw_cache_impl.h @@ -36,10 +36,6 @@ extern "C" { /** \name Expose via BKE callbacks * \{ */ -void DRW_mball_batch_cache_dirty_tag(struct MetaBall *mb, int mode); -void DRW_mball_batch_cache_validate(struct MetaBall *mb); -void DRW_mball_batch_cache_free(struct MetaBall *mb); - void DRW_curve_batch_cache_dirty_tag(struct Curve *cu, int mode); void DRW_curve_batch_cache_validate(struct Curve *cu); void DRW_curve_batch_cache_free(struct Curve *cu); @@ -111,39 +107,6 @@ struct GPUBatch *DRW_curve_batch_cache_get_edit_verts(struct Curve *cu); /** \} */ /* -------------------------------------------------------------------- */ -/** \name Metaball - * \{ */ - -int DRW_metaball_material_count_get(struct MetaBall *mb); - -struct GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(struct Object *ob); -struct GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(struct Object *ob, - struct MetaBall *mb, - struct GPUMaterial **gpumat_array, - uint gpumat_array_len); -struct GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(struct Object *ob); -struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob, - bool *r_is_manifold); - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name DispList - * \{ */ - -void DRW_displist_vertbuf_create_pos_and_nor(struct ListBase *lb, - struct GPUVertBuf *vbo, - const struct Scene *scene); -void DRW_displist_vertbuf_create_wiredata(struct ListBase *lb, struct GPUVertBuf *vbo); -void DRW_displist_indexbuf_create_lines_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo); -void DRW_displist_indexbuf_create_triangles_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo); -void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb, - struct GPUIndexBuf *ibo, - bool *r_is_manifold); - -/** \} */ - -/* -------------------------------------------------------------------- */ /** \name Lattice * \{ */ @@ -161,6 +124,16 @@ struct GPUBatch *DRW_lattice_batch_cache_get_edit_verts(struct Lattice *lt); int DRW_curves_material_count_get(struct Curves *curves); +/** + * Provide GPU access to a specific evaluated attribute on curves. + * + * \return A pointer to location where the texture will be + * stored, which will be filled by #DRW_shgroup_curves_create_sub. + */ +struct GPUTexture **DRW_curves_texture_for_evaluated_attribute(struct Curves *curves, + const char *name, + bool *r_is_point_domain); + struct GPUBatch *DRW_curves_batch_cache_get_edit_points(struct Curves *curves); void DRW_curves_batch_cache_create_requested(struct Object *ob); @@ -299,7 +272,6 @@ struct GPUBatch *DRW_mesh_batch_cache_get_edit_mesh_analysis(struct Mesh *me); * \{ */ struct GPUVertBuf *DRW_mesh_batch_cache_pos_vertbuf_get(struct Mesh *me); -struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(struct Object *ob); int DRW_mesh_material_count_get(const struct Object *object, const struct Mesh *me); diff --git a/source/blender/draw/intern/draw_cache_impl_curve.cc b/source/blender/draw/intern/draw_cache_impl_curve.cc index ebcdabe4942..695c348d8e2 100644 --- a/source/blender/draw/intern/draw_cache_impl_curve.cc +++ b/source/blender/draw/intern/draw_cache_impl_curve.cc @@ -108,7 +108,7 @@ static void curve_eval_render_wire_verts_edges_len_get(const blender::bke::Curve const blender::VArray<bool> cyclic = curves.cyclic(); for (const int i : curves.curves_range()) { const IndexRange points = curves.evaluated_points_for_curve(i); - *r_edge_len += blender::bke::curves::curve_segment_num(points.size(), cyclic[i]); + *r_edge_len += blender::bke::curves::segments_num(points.size(), cyclic[i]); } } diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc index 68ca1153c96..3bca17d9c56 100644 --- a/source/blender/draw/intern/draw_cache_impl_curves.cc +++ b/source/blender/draw/intern/draw_cache_impl_curves.cc @@ -75,13 +75,14 @@ static void curves_batch_cache_init(Curves &curves) if (!cache) { cache = MEM_cnew<CurvesBatchCache>(__func__); - BLI_mutex_init(&cache->render_mutex); curves.batch_cache = cache; } else { memset(cache, 0, sizeof(*cache)); } + BLI_mutex_init(&cache->render_mutex); + cache->is_dirty = false; } @@ -258,7 +259,7 @@ static void curves_batch_cache_fill_segments_proc_pos( } } -static void curves_batch_cache_ensure_procedural_pos(Curves &curves, +static void curves_batch_cache_ensure_procedural_pos(const Curves &curves, CurvesEvalCache &cache, GPUMaterial *gpu_material) { @@ -268,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves, GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); GPU_vertformat_alias_add(&format, "pos"); - cache.proc_point_buf = GPU_vertbuf_create_with_format(&format); + cache.proc_point_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len); MutableSpan posTime_data{ @@ -278,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves, GPUVertFormat length_format = {0}; GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache.proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len); MutableSpan hairLength_data{ @@ -311,12 +314,15 @@ void drw_curves_get_attribute_sampler_name(const char *layer_name, char r_sample BLI_snprintf(r_sampler_name, 32, "a%s", attr_safe_name); } -static void curves_batch_cache_ensure_procedural_final_attr( - CurvesEvalCache &cache, GPUVertFormat *format, int subdiv, int index, const char *name) +static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cache, + const GPUVertFormat *format, + const int subdiv, + const int index, + const char *name) { CurvesEvalFinalCache &final_cache = cache.final[subdiv]; - final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format, - GPU_USAGE_DEVICE_ONLY); + final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -333,8 +339,8 @@ static void curves_batch_cache_ensure_procedural_final_attr( static void curves_batch_ensure_attribute(const Curves &curves, CurvesEvalCache &cache, const DRW_AttributeRequest &request, - int subdiv, - int index) + const int subdiv, + const int index) { GPU_VERTBUF_DISCARD_SAFE(cache.proc_attributes_buf[index]); DRW_TEXTURE_FREE_SAFE(cache.proc_attributes_tex[index]); @@ -347,27 +353,28 @@ static void curves_batch_ensure_attribute(const Curves &curves, /* All attributes use vec4, see comment below. */ GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format); + cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index]; GPU_vertbuf_data_alloc(attr_vbo, request.domain == ATTR_DOMAIN_POINT ? curves.geometry.point_num : curves.geometry.curve_num); - CurveComponent component; - component.replace(const_cast<Curves *>(&curves), GeometryOwnershipType::ReadOnly); + const blender::bke::AttributeAccessor attributes = + blender::bke::CurvesGeometry::wrap(curves.geometry).attributes(); /* TODO(@kevindietrich): float4 is used for scalar attributes as the implicit conversion done * by OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following * the Blender convention, it should be `vec4(s, s, s, 1)`. This could be resolved using a * similar texture state swizzle to map the attribute correctly as for volume attributes, so we * can control the conversion ourselves. */ - blender::VArray<ColorGeometry4f> attribute = component.attribute_get_for_read<ColorGeometry4f>( + blender::VArray<ColorGeometry4f> attribute = attributes.lookup_or_default<ColorGeometry4f>( request.attribute_name, request.domain, {0.0f, 0.0f, 0.0f, 1.0f}); MutableSpan<ColorGeometry4f> vbo_span{ static_cast<ColorGeometry4f *>(GPU_vertbuf_get_data(attr_vbo)), - component.attribute_domain_num(request.domain)}; + attributes.domain_size(request.domain)}; attribute.materialize(vbo_span); @@ -393,10 +400,10 @@ static void curves_batch_cache_fill_strands_data(const Curves &curves_id, curves_id.geometry); for (const int i : IndexRange(curves.curves_num())) { - const IndexRange curve_range = curves.points_for_curve(i); + const IndexRange points = curves.points_for_curve(i); - *(uint *)GPU_vertbuf_raw_step(&data_step) = curve_range.start(); - *(ushort *)GPU_vertbuf_raw_step(&seg_step) = curve_range.size() - 1; + *(uint *)GPU_vertbuf_raw_step(&data_step) = points.start(); + *(ushort *)GPU_vertbuf_raw_step(&seg_step) = points.size() - 1; } } @@ -412,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves, uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT); /* Curve Data. */ - cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step); - cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len); GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step); @@ -437,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY); + cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -509,58 +519,41 @@ static bool curves_ensure_attributes(const Curves &curves, ThreadMutex *render_mutex = &cache.render_mutex; const CustomData *cd_curve = &curves.geometry.curve_data; const CustomData *cd_point = &curves.geometry.point_data; + CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv]; - DRW_Attributes attrs_needed; - drw_attributes_clear(&attrs_needed); - ListBase gpu_attrs = GPU_material_attributes(gpu_material); - LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) { - const char *name = gpu_attr->name; - - int layer_index; - eCustomDataType type; - eAttrDomain domain; - if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) { - domain = ATTR_DOMAIN_CURVE; - } - else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) { - domain = ATTR_DOMAIN_POINT; - } - else { - continue; - } - - switch (type) { - case CD_PROP_BOOL: - case CD_PROP_INT8: - case CD_PROP_INT32: - case CD_PROP_FLOAT: - case CD_PROP_FLOAT2: - case CD_PROP_FLOAT3: - case CD_PROP_COLOR: { - DRW_AttributeRequest *request = drw_attributes_add_request( - &attrs_needed, type, layer_index, domain); - if (request) { - BLI_strncpy(request->attribute_name, name, sizeof(request->attribute_name)); - } - - break; + if (gpu_material) { + DRW_Attributes attrs_needed; + drw_attributes_clear(&attrs_needed); + ListBase gpu_attrs = GPU_material_attributes(gpu_material); + LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) { + const char *name = gpu_attr->name; + + int layer_index; + eCustomDataType type; + eAttrDomain domain; + if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) { + domain = ATTR_DOMAIN_CURVE; + } + else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) { + domain = ATTR_DOMAIN_POINT; + } + else { + continue; } - default: - break; - } - } - CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv]; + drw_attributes_add_request(&attrs_needed, name, type, layer_index, domain); + } - if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) { - /* Some new attributes have been added, free all and start over. */ - for (const int i : IndexRange(GPU_MAX_ATTR)) { - GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]); - DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]); + if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) { + /* Some new attributes have been added, free all and start over. */ + for (const int i : IndexRange(GPU_MAX_ATTR)) { + GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]); + DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]); + } + drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex); } - drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex); + drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex); } - drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex); bool need_tf_update = false; @@ -581,16 +574,15 @@ static bool curves_ensure_attributes(const Curves &curves, return need_tf_update; } -bool curves_ensure_procedural_data(Object *object, +bool curves_ensure_procedural_data(Curves *curves, CurvesEvalCache **r_hair_cache, GPUMaterial *gpu_material, const int subdiv, const int thickness_res) { bool need_ft_update = false; - Curves &curves = *static_cast<Curves *>(object->data); - CurvesBatchCache &cache = curves_batch_cache_get(curves); + CurvesBatchCache &cache = curves_batch_cache_get(*curves); *r_hair_cache = &cache.curves_cache; const int steps = 3; /* TODO: don't hard-code? */ @@ -598,14 +590,14 @@ bool curves_ensure_procedural_data(Object *object, /* Refreshed on combing and simulation. */ if ((*r_hair_cache)->proc_point_buf == nullptr) { - ensure_seg_pt_count(curves, cache.curves_cache); - curves_batch_cache_ensure_procedural_pos(curves, cache.curves_cache, gpu_material); + ensure_seg_pt_count(*curves, cache.curves_cache); + curves_batch_cache_ensure_procedural_pos(*curves, cache.curves_cache, gpu_material); need_ft_update = true; } /* Refreshed if active layer or custom data changes. */ if ((*r_hair_cache)->strand_tex == nullptr) { - curves_batch_cache_ensure_procedural_strand_data(curves, cache.curves_cache); + curves_batch_cache_ensure_procedural_strand_data(*curves, cache.curves_cache); } /* Refreshed only on subdiv count change. */ @@ -615,12 +607,10 @@ bool curves_ensure_procedural_data(Object *object, } if ((*r_hair_cache)->final[subdiv].proc_hairs[thickness_res - 1] == nullptr) { curves_batch_cache_ensure_procedural_indices( - curves, cache.curves_cache, thickness_res, subdiv); + *curves, cache.curves_cache, thickness_res, subdiv); } - if (gpu_material) { - need_ft_update |= curves_ensure_attributes(curves, cache, gpu_material, subdiv); - } + need_ft_update |= curves_ensure_attributes(*curves, cache, gpu_material, subdiv); return need_ft_update; } @@ -636,6 +626,70 @@ GPUBatch *DRW_curves_batch_cache_get_edit_points(Curves *curves) return DRW_batch_request(&cache.edit_points); } +static void request_attribute(Curves &curves, const char *name) +{ + CurvesBatchCache &cache = curves_batch_cache_get(curves); + const DRWContextState *draw_ctx = DRW_context_state_get(); + const Scene *scene = draw_ctx->scene; + const int subdiv = scene->r.hair_subdiv; + CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv]; + + DRW_Attributes attributes{}; + + blender::bke::CurvesGeometry &curves_geometry = blender::bke::CurvesGeometry::wrap( + curves.geometry); + std::optional<blender::bke::AttributeMetaData> meta_data = + curves_geometry.attributes().lookup_meta_data(name); + if (!meta_data) { + return; + } + const eAttrDomain domain = meta_data->domain; + const eCustomDataType type = meta_data->data_type; + const CustomData &custom_data = domain == ATTR_DOMAIN_POINT ? curves.geometry.point_data : + curves.geometry.curve_data; + + drw_attributes_add_request( + &attributes, name, type, CustomData_get_named_layer(&custom_data, type, name), domain); + + drw_attributes_merge(&final_cache.attr_used, &attributes, &cache.render_mutex); +} + +GPUTexture **DRW_curves_texture_for_evaluated_attribute(Curves *curves, + const char *name, + bool *r_is_point_domain) +{ + CurvesBatchCache &cache = curves_batch_cache_get(*curves); + const DRWContextState *draw_ctx = DRW_context_state_get(); + const Scene *scene = draw_ctx->scene; + const int subdiv = scene->r.hair_subdiv; + CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv]; + + request_attribute(*curves, name); + + int request_i = -1; + for (const int i : IndexRange(final_cache.attr_used.num_requests)) { + if (STREQ(final_cache.attr_used.requests[i].attribute_name, name)) { + request_i = i; + break; + } + } + if (request_i == -1) { + *r_is_point_domain = false; + return nullptr; + } + switch (final_cache.attr_used.requests[request_i].domain) { + case ATTR_DOMAIN_POINT: + *r_is_point_domain = true; + return &final_cache.attributes_tex[request_i]; + case ATTR_DOMAIN_CURVE: + *r_is_point_domain = false; + return &cache.curves_cache.proc_attributes_tex[request_i]; + default: + BLI_assert_unreachable(); + return nullptr; + } +} + void DRW_curves_batch_cache_create_requested(Object *ob) { Curves *curves = static_cast<Curves *>(ob->data); diff --git a/source/blender/draw/intern/draw_cache_impl_displist.c b/source/blender/draw/intern/draw_cache_impl_displist.c deleted file mode 100644 index 96c088c3ee9..00000000000 --- a/source/blender/draw/intern/draw_cache_impl_displist.c +++ /dev/null @@ -1,354 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2017 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup draw - * - * \brief DispList API for render engines - * - * \note DispList may be removed soon! This is a utility for object types that use render. - */ - -#include "BLI_edgehash.h" -#include "BLI_listbase.h" -#include "BLI_math_vector.h" -#include "BLI_utildefines.h" - -#include "DNA_curve_types.h" -#include "DNA_scene_types.h" - -#include "BKE_displist.h" - -#include "GPU_batch.h" -#include "GPU_capabilities.h" - -#include "draw_cache_inline.h" - -#include "draw_cache_impl.h" /* own include */ - -static int dl_vert_len(const DispList *dl) -{ - switch (dl->type) { - case DL_INDEX3: - case DL_INDEX4: - return dl->nr; - case DL_SURF: - return dl->parts * dl->nr; - } - return 0; -} - -static int dl_tri_len(const DispList *dl) -{ - switch (dl->type) { - case DL_INDEX3: - return dl->parts; - case DL_INDEX4: - return dl->parts * 2; - case DL_SURF: - return dl->totindex * 2; - } - return 0; -} - -/* see: displist_vert_coords_alloc */ -static int curve_render_surface_vert_len_get(const ListBase *lb) -{ - int vert_len = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - vert_len += dl_vert_len(dl); - } - return vert_len; -} - -static int curve_render_surface_tri_len_get(const ListBase *lb) -{ - int tri_len = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - tri_len += dl_tri_len(dl); - } - return tri_len; -} - -typedef void(SetTriIndicesFn)(void *thunk, uint v1, uint v2, uint v3); - -static void displist_indexbufbuilder_set( - SetTriIndicesFn *set_tri_indices, - SetTriIndicesFn *set_quad_tri_indices, /* meh, find a better solution. */ - void *thunk, - const DispList *dl, - const int ofs) -{ - if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) { - const int *idx = dl->index; - if (dl->type == DL_INDEX3) { - const int i_end = dl->parts; - for (int i = 0; i < i_end; i++, idx += 3) { - set_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs); - } - } - else if (dl->type == DL_SURF) { - const int i_end = dl->totindex; - for (int i = 0; i < i_end; i++, idx += 4) { - set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs); - set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[3] + ofs); - } - } - else { - BLI_assert(dl->type == DL_INDEX4); - const int i_end = dl->parts; - for (int i = 0; i < i_end; i++, idx += 4) { - if (idx[2] != idx[3]) { - set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs); - set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[3] + ofs); - } - else { - set_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs); - } - } - } - } -} - -void DRW_displist_vertbuf_create_pos_and_nor(ListBase *lb, GPUVertBuf *vbo, const Scene *scene) -{ - const bool do_hq_normals = (scene->r.perf_flag & SCE_PERF_HQ_NORMALS) != 0 || - GPU_use_hq_normals_workaround(); - - static GPUVertFormat format = {0}; - static GPUVertFormat format_hq = {0}; - static struct { - uint pos, nor; - uint pos_hq, nor_hq; - } attr_id; - if (format.attr_len == 0) { - /* initialize vertex format */ - attr_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); - attr_id.nor = GPU_vertformat_attr_add( - &format, "nor", GPU_COMP_I10, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); - /* initialize vertex format */ - attr_id.pos_hq = GPU_vertformat_attr_add(&format_hq, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); - attr_id.nor_hq = GPU_vertformat_attr_add( - &format_hq, "nor", GPU_COMP_I16, 3, GPU_FETCH_INT_TO_FLOAT_UNIT); - } - - uint pos_id = do_hq_normals ? attr_id.pos_hq : attr_id.pos; - uint nor_id = do_hq_normals ? attr_id.nor_hq : attr_id.nor; - - GPU_vertbuf_init_with_format(vbo, do_hq_normals ? &format_hq : &format); - GPU_vertbuf_data_alloc(vbo, curve_render_surface_vert_len_get(lb)); - - BKE_displist_normals_add(lb); - - int vbo_len_used = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - const bool ndata_is_single = dl->type == DL_INDEX3; - if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) { - const float *fp_co = dl->verts; - const float *fp_no = dl->nors; - const int vbo_end = vbo_len_used + dl_vert_len(dl); - while (vbo_len_used < vbo_end) { - GPU_vertbuf_attr_set(vbo, pos_id, vbo_len_used, fp_co); - if (fp_no) { - GPUNormal vnor_pack; - GPU_normal_convert_v3(&vnor_pack, fp_no, do_hq_normals); - GPU_vertbuf_attr_set(vbo, nor_id, vbo_len_used, &vnor_pack); - if (ndata_is_single == false) { - fp_no += 3; - } - } - fp_co += 3; - vbo_len_used += 1; - } - } - } -} - -void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len) -{ - static GPUVertFormat format = {0}; - static struct { - uint wd; - } attr_id; - if (format.attr_len == 0) { - /* initialize vertex format */ - if (!GPU_crappy_amd_driver()) { - /* Some AMD drivers strangely crash with a vbo with this format. */ - attr_id.wd = GPU_vertformat_attr_add( - &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); - } - else { - attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - } - } - - GPU_vertbuf_init_with_format(vbo, &format); - GPU_vertbuf_data_alloc(vbo, vert_len); - - if (GPU_vertbuf_get_format(vbo)->stride == 1) { - memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len); - } - else { - GPUVertBufRaw wd_step; - GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step); - for (int i = 0; i < vert_len; i++) { - *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f; - } - } -} - -void DRW_displist_vertbuf_create_wiredata(ListBase *lb, GPUVertBuf *vbo) -{ - const int vert_len = curve_render_surface_vert_len_get(lb); - DRW_vertbuf_create_wiredata(vbo, vert_len); -} - -void DRW_displist_indexbuf_create_triangles_in_order(ListBase *lb, GPUIndexBuf *ibo) -{ - const int tri_len = curve_render_surface_tri_len_get(lb); - const int vert_len = curve_render_surface_vert_len_get(lb); - - GPUIndexBufBuilder elb; - GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tri_len, vert_len); - - int ofs = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - displist_indexbufbuilder_set((SetTriIndicesFn *)GPU_indexbuf_add_tri_verts, - (SetTriIndicesFn *)GPU_indexbuf_add_tri_verts, - &elb, - dl, - ofs); - ofs += dl_vert_len(dl); - } - - GPU_indexbuf_build_in_place(&elb, ibo); -} - -static void set_overlay_wires_tri_indices(void *thunk, uint v1, uint v2, uint v3) -{ - GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk; - GPU_indexbuf_add_line_verts(eld, v1, v2); - GPU_indexbuf_add_line_verts(eld, v2, v3); - GPU_indexbuf_add_line_verts(eld, v3, v1); -} - -static void set_overlay_wires_quad_tri_indices(void *thunk, uint v1, uint v2, uint v3) -{ - GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk; - GPU_indexbuf_add_line_verts(eld, v1, v3); - GPU_indexbuf_add_line_verts(eld, v3, v2); -} - -void DRW_displist_indexbuf_create_lines_in_order(ListBase *lb, GPUIndexBuf *ibo) -{ - const int tri_len = curve_render_surface_tri_len_get(lb); - const int vert_len = curve_render_surface_vert_len_get(lb); - - GPUIndexBufBuilder elb; - GPU_indexbuf_init(&elb, GPU_PRIM_LINES, tri_len * 3, vert_len); - - int ofs = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - displist_indexbufbuilder_set( - set_overlay_wires_tri_indices, set_overlay_wires_quad_tri_indices, &elb, dl, ofs); - ofs += dl_vert_len(dl); - } - - GPU_indexbuf_build_in_place(&elb, ibo); -} - -/* Edge detection/adjacency. */ -#define NO_EDGE INT_MAX -static void set_edge_adjacency_lines_indices( - EdgeHash *eh, GPUIndexBufBuilder *elb, bool *r_is_manifold, uint v1, uint v2, uint v3) -{ - bool inv_indices = (v2 > v3); - void **pval; - bool value_is_init = BLI_edgehash_ensure_p(eh, v2, v3, &pval); - int v_data = POINTER_AS_INT(*pval); - if (!value_is_init || v_data == NO_EDGE) { - /* Save the winding order inside the sign bit. Because the - * edgehash sort the keys and we need to compare winding later. */ - int value = (int)v1 + 1; /* Int 0 bm_looptricannot be signed */ - *pval = POINTER_FROM_INT((inv_indices) ? -value : value); - } - else { - /* HACK Tag as not used. Prevent overhead of BLI_edgehash_remove. */ - *pval = POINTER_FROM_INT(NO_EDGE); - bool inv_opposite = (v_data < 0); - uint v_opposite = (uint)abs(v_data) - 1; - - if (inv_opposite == inv_indices) { - /* Don't share edge if triangles have non matching winding. */ - GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v1); - GPU_indexbuf_add_line_adj_verts(elb, v_opposite, v2, v3, v_opposite); - *r_is_manifold = false; - } - else { - GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v_opposite); - } - } -} - -static void set_edges_adjacency_lines_indices(void *thunk, uint v1, uint v2, uint v3) -{ - void **packed = (void **)thunk; - GPUIndexBufBuilder *elb = (GPUIndexBufBuilder *)packed[0]; - EdgeHash *eh = (EdgeHash *)packed[1]; - bool *r_is_manifold = (bool *)packed[2]; - - set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v1, v2, v3); - set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v2, v3, v1); - set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v3, v1, v2); -} - -void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb, - struct GPUIndexBuf *ibo, - bool *r_is_manifold) -{ - const int tri_len = curve_render_surface_tri_len_get(lb); - const int vert_len = curve_render_surface_vert_len_get(lb); - - *r_is_manifold = true; - - /* Allocate max but only used indices are sent to GPU. */ - GPUIndexBufBuilder elb; - GPU_indexbuf_init(&elb, GPU_PRIM_LINES_ADJ, tri_len * 3, vert_len); - - EdgeHash *eh = BLI_edgehash_new_ex(__func__, tri_len * 3); - - /* pack values to pass to `set_edges_adjacency_lines_indices` function. */ - void *thunk[3] = {&elb, eh, r_is_manifold}; - int v_idx = 0; - LISTBASE_FOREACH (const DispList *, dl, lb) { - displist_indexbufbuilder_set((SetTriIndicesFn *)set_edges_adjacency_lines_indices, - (SetTriIndicesFn *)set_edges_adjacency_lines_indices, - thunk, - dl, - v_idx); - v_idx += dl_vert_len(dl); - } - - /* Create edges for remaining non manifold edges. */ - EdgeHashIterator *ehi; - for (ehi = BLI_edgehashIterator_new(eh); BLI_edgehashIterator_isDone(ehi) == false; - BLI_edgehashIterator_step(ehi)) { - uint v1, v2; - int v_data = POINTER_AS_INT(BLI_edgehashIterator_getValue(ehi)); - if (v_data == NO_EDGE) { - continue; - } - BLI_edgehashIterator_getKey(ehi, &v1, &v2); - uint v0 = (uint)abs(v_data) - 1; - if (v_data < 0) { /* inv_opposite */ - SWAP(uint, v1, v2); - } - GPU_indexbuf_add_line_adj_verts(&elb, v0, v1, v2, v0); - *r_is_manifold = false; - } - BLI_edgehashIterator_free(ehi); - BLI_edgehash_free(eh, NULL); - - GPU_indexbuf_build_in_place(&elb, ibo); -} -#undef NO_EDGE diff --git a/source/blender/draw/intern/draw_cache_impl_lattice.c b/source/blender/draw/intern/draw_cache_impl_lattice.c index cb621c6ceb9..0f12e78d60e 100644 --- a/source/blender/draw/intern/draw_cache_impl_lattice.c +++ b/source/blender/draw/intern/draw_cache_impl_lattice.c @@ -27,12 +27,6 @@ #define SELECT 1 -/** - * TODO - * - 'DispList' is currently not used - * (we could avoid using since it will be removed) - */ - static void lattice_batch_cache_clear(Lattice *lt); /* ---------------------------------------------------------------------- */ diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.cc b/source/blender/draw/intern/draw_cache_impl_mesh.cc index 7c02ee2c033..c22382b3e09 100644 --- a/source/blender/draw/intern/draw_cache_impl_mesh.cc +++ b/source/blender/draw/intern/draw_cache_impl_mesh.cc @@ -21,6 +21,7 @@ #include "BLI_math_vector.h" #include "BLI_span.hh" #include "BLI_string.h" +#include "BLI_string_ref.hh" #include "BLI_task.h" #include "BLI_utildefines.h" @@ -67,6 +68,7 @@ using blender::IndexRange; using blender::Map; using blender::Span; +using blender::StringRefNull; /* ---------------------------------------------------------------------- */ /** \name Dependencies between buffer and batch @@ -115,8 +117,6 @@ static constexpr DRWBatchFlag batches_that_use_buffer(const int buffer_index) MBC_SURFACE_PER_MAT; case BUFFER_INDEX(vbo.tan): return MBC_SURFACE_PER_MAT; - case BUFFER_INDEX(vbo.vcol): - return MBC_SURFACE | MBC_SURFACE_PER_MAT; case BUFFER_INDEX(vbo.sculpt_data): return MBC_SCULPT_OVERLAYS; case BUFFER_INDEX(vbo.orco): @@ -236,87 +236,11 @@ BLI_INLINE void mesh_cd_layers_type_clear(DRW_MeshCDMask *a) *((uint32_t *)a) = 0; } -BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me) -{ - if (me->edit_mesh != nullptr) { - Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object); - if (editmesh_eval_final != nullptr) { - return editmesh_eval_final; - } - } - - return me; -} - static void mesh_cd_calc_edit_uv_layer(const Mesh *UNUSED(me), DRW_MeshCDMask *cd_used) { cd_used->edit_uv = 1; } -BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me) -{ - switch ((eMeshWrapperType)me->runtime.wrapper_type) { - case ME_WRAPPER_TYPE_SUBD: - case ME_WRAPPER_TYPE_MDATA: - return &me->ldata; - break; - case ME_WRAPPER_TYPE_BMESH: - return &me->edit_mesh->bm->ldata; - break; - } - - BLI_assert(0); - return &me->ldata; -} - -BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me) -{ - switch ((eMeshWrapperType)me->runtime.wrapper_type) { - case ME_WRAPPER_TYPE_SUBD: - case ME_WRAPPER_TYPE_MDATA: - return &me->pdata; - break; - case ME_WRAPPER_TYPE_BMESH: - return &me->edit_mesh->bm->pdata; - break; - } - - BLI_assert(0); - return &me->pdata; -} - -BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me) -{ - switch ((eMeshWrapperType)me->runtime.wrapper_type) { - case ME_WRAPPER_TYPE_SUBD: - case ME_WRAPPER_TYPE_MDATA: - return &me->edata; - break; - case ME_WRAPPER_TYPE_BMESH: - return &me->edit_mesh->bm->edata; - break; - } - - BLI_assert(0); - return &me->edata; -} - -BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me) -{ - switch ((eMeshWrapperType)me->runtime.wrapper_type) { - case ME_WRAPPER_TYPE_SUBD: - case ME_WRAPPER_TYPE_MDATA: - return &me->vdata; - break; - case ME_WRAPPER_TYPE_BMESH: - return &me->edit_mesh->bm->vdata; - break; - } - - BLI_assert(0); - return &me->vdata; -} - static void mesh_cd_calc_active_uv_layer(const Object *object, const Mesh *me, DRW_MeshCDMask *cd_used) @@ -341,75 +265,6 @@ static void mesh_cd_calc_active_mask_uv_layer(const Object *object, } } -static void mesh_cd_calc_active_mloopcol_layer(const Object *object, - const Mesh *me, - DRW_MeshCDMask *cd_used) -{ - const Mesh *me_final = editmesh_final_or_this(object, me); - Mesh me_query = blender::dna::shallow_zero_initialize(); - - const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final); - const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final); - - BKE_id_attribute_copy_domains_temp( - ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id); - - const CustomDataLayer *layer = BKE_id_attributes_active_color_get(&me_query.id); - int layer_i = BKE_id_attribute_to_index( - &me_query.id, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL); - - if (layer_i != -1) { - cd_used->vcol |= (1UL << (uint)layer_i); - } -} - -static uint mesh_cd_calc_gpu_layers_vcol_used(const Mesh *me_query, - const CustomData *cd_vdata, - const CustomData *cd_ldata, - const char name[]) -{ - const CustomDataLayer *layer = nullptr; - eAttrDomain domain; - - if (name[0]) { - int layer_i = 0; - - domain = ATTR_DOMAIN_POINT; - layer_i = CustomData_get_named_layer_index(cd_vdata, CD_PROP_COLOR, name); - layer_i = layer_i == -1 ? - CustomData_get_named_layer_index(cd_vdata, CD_PROP_BYTE_COLOR, name) : - layer_i; - - if (layer_i == -1) { - domain = ATTR_DOMAIN_CORNER; - layer_i = layer_i == -1 ? CustomData_get_named_layer_index(cd_ldata, CD_PROP_COLOR, name) : - layer_i; - layer_i = layer_i == -1 ? - CustomData_get_named_layer_index(cd_ldata, CD_PROP_BYTE_COLOR, name) : - layer_i; - } - - /* NOTE: this is not the same as the layer_i below. */ - if (layer_i != -1) { - layer = (domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata)->layers + layer_i; - } - } - else { - layer = BKE_id_attributes_render_color_get(&me_query->id); - } - - if (!layer) { - return -1; - } - - /* NOTE: this is the logical index into the color attribute list, - * not the customdata index. */ - int vcol_i = BKE_id_attribute_to_index( - (ID *)me_query, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL); - - return vcol_i; -} - static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object, const Mesh *me, struct GPUMaterial **gpumat_array, @@ -433,56 +288,33 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object, DRW_MeshCDMask cd_used; mesh_cd_layers_type_clear(&cd_used); + const CustomDataLayer *default_color = BKE_id_attributes_render_color_get(&me_query.id); + const StringRefNull default_color_name = default_color ? default_color->name : ""; + for (int i = 0; i < gpumat_array_len; i++) { GPUMaterial *gpumat = gpumat_array[i]; - if (gpumat) { - ListBase gpu_attrs = GPU_material_attributes(gpumat); - LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) { - const char *name = gpu_attr->name; - eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type); - int layer = -1; - std::optional<eAttrDomain> domain; - - if (type == CD_AUTO_FROM_NAME) { - /* We need to deduce what exact layer is used. - * - * We do it based on the specified name. - */ - if (name[0] != '\0') { - layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name); - type = CD_MTFACE; - - if (layer == -1) { - layer = CustomData_get_named_layer(cd_vdata, CD_PROP_COLOR, name); - if (layer != -1) { - type = CD_PROP_COLOR; - domain = ATTR_DOMAIN_POINT; - } - } - - if (layer == -1) { - layer = CustomData_get_named_layer(cd_ldata, CD_PROP_COLOR, name); - if (layer != -1) { - type = CD_PROP_COLOR; - domain = ATTR_DOMAIN_CORNER; - } - } - - if (layer == -1) { - layer = CustomData_get_named_layer(cd_vdata, CD_PROP_BYTE_COLOR, name); - if (layer != -1) { - type = CD_PROP_BYTE_COLOR; - domain = ATTR_DOMAIN_POINT; - } - } + if (gpumat == nullptr) { + continue; + } + ListBase gpu_attrs = GPU_material_attributes(gpumat); + LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) { + const char *name = gpu_attr->name; + eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type); + int layer = -1; + std::optional<eAttrDomain> domain; + + if (gpu_attr->is_default_color) { + name = default_color_name.c_str(); + } - if (layer == -1) { - layer = CustomData_get_named_layer(cd_ldata, CD_PROP_BYTE_COLOR, name); - if (layer != -1) { - type = CD_PROP_BYTE_COLOR; - domain = ATTR_DOMAIN_CORNER; - } - } + if (type == CD_AUTO_FROM_NAME) { + /* We need to deduce what exact layer is used. + * + * We do it based on the specified name. + */ + if (name[0] != '\0') { + layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name); + type = CD_MTFACE; #if 0 /* Tangents are always from UV's - this will never happen. */ if (layer == -1) { @@ -490,108 +322,87 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object, type = CD_TANGENT; } #endif - if (layer == -1) { - /* Try to match a generic attribute, we use the first attribute domain with a - * matching name. */ - if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) { - domain = ATTR_DOMAIN_POINT; - } - else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) { - domain = ATTR_DOMAIN_CORNER; - } - else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) { - domain = ATTR_DOMAIN_FACE; - } - else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) { - domain = ATTR_DOMAIN_EDGE; - } - else { - layer = -1; - } + if (layer == -1) { + /* Try to match a generic attribute, we use the first attribute domain with a + * matching name. */ + if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) { + domain = ATTR_DOMAIN_POINT; } - - if (layer == -1) { - continue; + else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) { + domain = ATTR_DOMAIN_CORNER; } - } - else { - /* Fall back to the UV layer, which matches old behavior. */ - type = CD_MTFACE; - } - } - - switch (type) { - case CD_MTFACE: { - if (layer == -1) { - layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) : - CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); + else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) { + domain = ATTR_DOMAIN_FACE; } - if (layer != -1) { - cd_used.uv |= (1 << layer); - } - break; - } - case CD_TANGENT: { - if (layer == -1) { - layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) : - CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); - - /* Only fallback to orco (below) when we have no UV layers, see: T56545 */ - if (layer == -1 && name[0] != '\0') { - layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); - } - } - if (layer != -1) { - cd_used.tan |= (1 << layer); + else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) { + domain = ATTR_DOMAIN_EDGE; } else { - /* no UV layers at all => requesting orco */ - cd_used.tan_orco = 1; - cd_used.orco = 1; + layer = -1; } - break; } - case CD_ORCO: { - cd_used.orco = 1; - break; + if (layer == -1) { + continue; } + } + else { + /* Fall back to the UV layer, which matches old behavior. */ + type = CD_MTFACE; + } + } - /* NOTE: attr->type will always be CD_PROP_COLOR even for - * CD_PROP_BYTE_COLOR layers, see node_shader_gpu_vertex_color in - * node_shader_vertex_color.cc. - */ - case CD_MCOL: - case CD_PROP_BYTE_COLOR: - case CD_PROP_COLOR: { - /* First check Color attributes, when not found check mesh attributes. Geometry nodes - * can generate those layers. */ - int vcol_bit = mesh_cd_calc_gpu_layers_vcol_used(&me_query, cd_vdata, cd_ldata, name); - - if (vcol_bit != -1) { - cd_used.vcol |= 1UL << (uint)vcol_bit; - break; - } - - if (layer != -1 && domain.has_value()) { - drw_attributes_add_request(attributes, type, layer, *domain); - } - break; + switch (type) { + case CD_MTFACE: { + if (layer == -1) { + layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) : + CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); + } + if (layer != -1) { + cd_used.uv |= (1 << layer); } - case CD_PROP_FLOAT3: - case CD_PROP_BOOL: - case CD_PROP_INT8: - case CD_PROP_INT32: - case CD_PROP_FLOAT: - case CD_PROP_FLOAT2: { - if (layer != -1 && domain.has_value()) { - drw_attributes_add_request(attributes, type, layer, *domain); + break; + } + case CD_TANGENT: { + if (layer == -1) { + layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) : + CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); + + /* Only fallback to orco (below) when we have no UV layers, see: T56545 */ + if (layer == -1 && name[0] != '\0') { + layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV); } - break; } - default: - break; + if (layer != -1) { + cd_used.tan |= (1 << layer); + } + else { + /* no UV layers at all => requesting orco */ + cd_used.tan_orco = 1; + cd_used.orco = 1; + } + break; } + + case CD_ORCO: { + cd_used.orco = 1; + break; + } + case CD_PROP_BYTE_COLOR: + case CD_PROP_COLOR: + case CD_PROP_FLOAT3: + case CD_PROP_BOOL: + case CD_PROP_INT8: + case CD_PROP_INT32: + case CD_PROP_FLOAT: + case CD_PROP_FLOAT2: { + if (layer != -1 && domain.has_value()) { + drw_attributes_add_request(attributes, name, type, layer, *domain); + } + break; + } + default: + break; } } } @@ -745,8 +556,7 @@ static bool mesh_batch_cache_valid(Object *object, Mesh *me) } if (object->sculpt && object->sculpt->pbvh) { - if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh) || - BKE_pbvh_draw_cache_invalid(object->sculpt->pbvh)) { + if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh)) { return false; } @@ -863,10 +673,9 @@ static void mesh_batch_cache_discard_shaded_tri(MeshBatchCache *cache) FOREACH_MESH_BUFFER_CACHE (cache, mbc) { GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.uv); GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.tan); - GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol); GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.orco); } - DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.vcol, vbo.orco); + DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.orco); mesh_batch_cache_discard_batch(cache, batch_map); mesh_cd_layers_type_clear(&cache->cd_used); } @@ -1070,42 +879,35 @@ static void texpaint_request_active_uv(MeshBatchCache *cache, Object *object, Me mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed); } -static void texpaint_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me) +static void request_active_and_default_color_attributes(const Object &object, + const Mesh &mesh, + DRW_Attributes &attributes) { - DRW_MeshCDMask cd_needed; - mesh_cd_layers_type_clear(&cd_needed); - mesh_cd_calc_active_mloopcol_layer(object, me, &cd_needed); - - BLI_assert(cd_needed.vcol != 0 && - "No MLOOPCOL layer available in vertpaint, but batches requested anyway!"); - - mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed); -} - -static void sculpt_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me) -{ - const Mesh *me_final = editmesh_final_or_this(object, me); + const Mesh *me_final = editmesh_final_or_this(&object, &mesh); const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final); const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final); + /* Necessary because which attributes are active/default is stored in #CustomData. */ Mesh me_query = blender::dna::shallow_zero_initialize(); BKE_id_attribute_copy_domains_temp( ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id); - const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id); - const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id); - - int active_i = BKE_id_attribute_to_index( - &me_query.id, active, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL); - int render_i = BKE_id_attribute_to_index( - &me_query.id, render, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL); + auto request_color_attribute = [&](const char *name) { + int layer_index; + eCustomDataType type; + if (drw_custom_data_match_attribute(cd_vdata, name, &layer_index, &type)) { + drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_POINT); + } + else if (drw_custom_data_match_attribute(cd_ldata, name, &layer_index, &type)) { + drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_CORNER); + } + }; - if (active_i >= 0) { - cache->cd_needed.vcol |= 1UL << (uint)active_i; + if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) { + request_color_attribute(active->name); } - - if (render_i >= 0) { - cache->cd_needed.vcol |= 1UL << (uint)render_i; + if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) { + request_color_attribute(render->name); } } @@ -1214,7 +1016,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_texpaint_single(Object *object, Mesh GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me) { MeshBatchCache *cache = mesh_batch_cache_get(me); - texpaint_request_active_vcol(cache, object, me); + + DRW_Attributes attrs_needed{}; + request_active_and_default_color_attributes(*object, *me, attrs_needed); + + ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex; + drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex); + mesh_batch_cache_request_surface_batches(cache); return cache->batch.surface; } @@ -1222,7 +1030,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me) GPUBatch *DRW_mesh_batch_cache_get_surface_sculpt(Object *object, Mesh *me) { MeshBatchCache *cache = mesh_batch_cache_get(me); - sculpt_request_active_vcol(cache, object, me); + + DRW_Attributes attrs_needed{}; + request_active_and_default_color_attributes(*object, *me, attrs_needed); + + ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex; + drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex); + mesh_batch_cache_request_surface_batches(cache); return cache->batch.surface; } @@ -1621,9 +1435,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, if (cache->cd_used.sculpt_overlays != cache->cd_needed.sculpt_overlays) { GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.sculpt_data); } - if ((cache->cd_used.vcol & cache->cd_needed.vcol) != cache->cd_needed.vcol) { - GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol); - } if (!drw_attributes_overlap(&cache->attr_used, &cache->attr_needed)) { for (int i = 0; i < GPU_MAX_ATTR; i++) { GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.attr[i]); @@ -1697,12 +1508,13 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, cache->batch_ready |= batch_requested; bool do_cage = false, do_uvcage = false; - if (is_editmode) { + if (is_editmode && is_mode_active) { Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(ob); Mesh *editmesh_eval_cage = BKE_object_get_editmesh_eval_cage(ob); do_cage = editmesh_eval_final != editmesh_eval_cage; - do_uvcage = !editmesh_eval_final->runtime.is_original; + do_uvcage = !(editmesh_eval_final->runtime.is_original_bmesh && + editmesh_eval_final->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH); } const bool do_subdivision = BKE_subsurf_modifier_has_gpu_subdiv(me); @@ -1710,15 +1522,26 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, MeshBufferList *mbuflist = &cache->final.buff; /* Initialize batches and request VBO's & IBO's. */ - assert_deps_valid( - MBC_SURFACE, - {BUFFER_INDEX(ibo.tris), BUFFER_INDEX(vbo.lnor), BUFFER_INDEX(vbo.pos_nor), - BUFFER_INDEX(vbo.uv), BUFFER_INDEX(vbo.vcol), BUFFER_INDEX(vbo.attr[0]), - BUFFER_INDEX(vbo.attr[1]), BUFFER_INDEX(vbo.attr[2]), BUFFER_INDEX(vbo.attr[3]), - BUFFER_INDEX(vbo.attr[4]), BUFFER_INDEX(vbo.attr[5]), BUFFER_INDEX(vbo.attr[6]), - BUFFER_INDEX(vbo.attr[7]), BUFFER_INDEX(vbo.attr[8]), BUFFER_INDEX(vbo.attr[9]), - BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]), - BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])}); + assert_deps_valid(MBC_SURFACE, + {BUFFER_INDEX(ibo.tris), + BUFFER_INDEX(vbo.lnor), + BUFFER_INDEX(vbo.pos_nor), + BUFFER_INDEX(vbo.uv), + BUFFER_INDEX(vbo.attr[0]), + BUFFER_INDEX(vbo.attr[1]), + BUFFER_INDEX(vbo.attr[2]), + BUFFER_INDEX(vbo.attr[3]), + BUFFER_INDEX(vbo.attr[4]), + BUFFER_INDEX(vbo.attr[5]), + BUFFER_INDEX(vbo.attr[6]), + BUFFER_INDEX(vbo.attr[7]), + BUFFER_INDEX(vbo.attr[8]), + BUFFER_INDEX(vbo.attr[9]), + BUFFER_INDEX(vbo.attr[10]), + BUFFER_INDEX(vbo.attr[11]), + BUFFER_INDEX(vbo.attr[12]), + BUFFER_INDEX(vbo.attr[13]), + BUFFER_INDEX(vbo.attr[14])}); if (DRW_batch_requested(cache->batch.surface, GPU_PRIM_TRIS)) { DRW_ibo_request(cache->batch.surface, &mbuflist->ibo.tris); /* Order matters. First ones override latest VBO's attributes. */ @@ -1727,9 +1550,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, if (cache->cd_used.uv != 0) { DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.uv); } - if (cache->cd_used.vcol != 0) { - DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.vcol); - } drw_add_attributes_vbo(cache->batch.surface, mbuflist, &cache->attr_used); } assert_deps_valid(MBC_ALL_VERTS, {BUFFER_INDEX(vbo.pos_nor)}); @@ -1807,12 +1627,12 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, assert_deps_valid( MBC_SURFACE_PER_MAT, {BUFFER_INDEX(vbo.lnor), BUFFER_INDEX(vbo.pos_nor), BUFFER_INDEX(vbo.uv), - BUFFER_INDEX(vbo.tan), BUFFER_INDEX(vbo.vcol), BUFFER_INDEX(vbo.orco), - BUFFER_INDEX(vbo.attr[0]), BUFFER_INDEX(vbo.attr[1]), BUFFER_INDEX(vbo.attr[2]), - BUFFER_INDEX(vbo.attr[3]), BUFFER_INDEX(vbo.attr[4]), BUFFER_INDEX(vbo.attr[5]), - BUFFER_INDEX(vbo.attr[6]), BUFFER_INDEX(vbo.attr[7]), BUFFER_INDEX(vbo.attr[8]), - BUFFER_INDEX(vbo.attr[9]), BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), - BUFFER_INDEX(vbo.attr[12]), BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])}); + BUFFER_INDEX(vbo.tan), BUFFER_INDEX(vbo.orco), BUFFER_INDEX(vbo.attr[0]), + BUFFER_INDEX(vbo.attr[1]), BUFFER_INDEX(vbo.attr[2]), BUFFER_INDEX(vbo.attr[3]), + BUFFER_INDEX(vbo.attr[4]), BUFFER_INDEX(vbo.attr[5]), BUFFER_INDEX(vbo.attr[6]), + BUFFER_INDEX(vbo.attr[7]), BUFFER_INDEX(vbo.attr[8]), BUFFER_INDEX(vbo.attr[9]), + BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]), + BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])}); assert_deps_valid(MBC_SURFACE_PER_MAT, {TRIS_PER_MAT_INDEX}); for (int i = 0; i < cache->mat_len; i++) { if (DRW_batch_requested(cache->surface_per_mat[i], GPU_PRIM_TRIS)) { @@ -1826,9 +1646,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, if ((cache->cd_used.tan != 0) || (cache->cd_used.tan_orco != 0)) { DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.tan); } - if (cache->cd_used.vcol != 0) { - DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.vcol); - } if (cache->cd_used.orco != 0) { DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.orco); } @@ -1994,7 +1811,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, assert_final_deps_valid(BUFFER_INDEX(vbo.lnor)); assert_final_deps_valid(BUFFER_INDEX(vbo.pos_nor)); assert_final_deps_valid(BUFFER_INDEX(vbo.uv)); - assert_final_deps_valid(BUFFER_INDEX(vbo.vcol)); assert_final_deps_valid(BUFFER_INDEX(vbo.sculpt_data)); assert_final_deps_valid(BUFFER_INDEX(vbo.weights)); assert_final_deps_valid(BUFFER_INDEX(vbo.edge_fac)); @@ -2078,6 +1894,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, ob->obmat, true, false, + do_cage, ts, use_hide); } diff --git a/source/blender/draw/intern/draw_cache_impl_metaball.c b/source/blender/draw/intern/draw_cache_impl_metaball.c deleted file mode 100644 index 1408dc91069..00000000000 --- a/source/blender/draw/intern/draw_cache_impl_metaball.c +++ /dev/null @@ -1,294 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2017 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup draw - * - * \brief MetaBall API for render engines - */ - -#include "MEM_guardedalloc.h" - -#include "BLI_math_base.h" -#include "BLI_utildefines.h" - -#include "DNA_meta_types.h" -#include "DNA_object_types.h" - -#include "BKE_curve.h" -#include "BKE_mball.h" - -#include "GPU_batch.h" - -#include "DRW_render.h" -#include "draw_cache_impl.h" /* own include */ - -static void metaball_batch_cache_clear(MetaBall *mb); - -/* -------------------------------------------------------------------- */ -/** \name MetaBall GPUBatch Cache - * \{ */ - -typedef struct MetaBallBatchCache { - GPUBatch *batch; - GPUBatch **shaded_triangles; - - int mat_len; - - /* Shared */ - GPUVertBuf *pos_nor_in_order; - - /* Wireframe */ - struct { - GPUBatch *batch; - } face_wire; - - /* Edge detection */ - GPUBatch *edge_detection; - GPUIndexBuf *edges_adj_lines; - - /* settings to determine if cache is invalid */ - bool is_dirty; - - /* Valid only if edge_detection is up to date. */ - bool is_manifold; -} MetaBallBatchCache; - -/* GPUBatch cache management. */ - -static bool metaball_batch_cache_valid(MetaBall *mb) -{ - MetaBallBatchCache *cache = mb->batch_cache; - - if (cache == NULL) { - return false; - } - - return cache->is_dirty == false; -} - -static void metaball_batch_cache_init(MetaBall *mb) -{ - MetaBallBatchCache *cache = mb->batch_cache; - - if (!cache) { - cache = mb->batch_cache = MEM_mallocN(sizeof(*cache), __func__); - } - cache->batch = NULL; - cache->mat_len = 0; - cache->shaded_triangles = NULL; - cache->is_dirty = false; - cache->pos_nor_in_order = NULL; - cache->face_wire.batch = NULL; - cache->edge_detection = NULL; - cache->edges_adj_lines = NULL; - cache->is_manifold = false; -} - -void DRW_mball_batch_cache_validate(MetaBall *mb) -{ - if (!metaball_batch_cache_valid(mb)) { - metaball_batch_cache_clear(mb); - metaball_batch_cache_init(mb); - } -} - -static MetaBallBatchCache *metaball_batch_cache_get(MetaBall *mb) -{ - return mb->batch_cache; -} - -void DRW_mball_batch_cache_dirty_tag(MetaBall *mb, int mode) -{ - MetaBallBatchCache *cache = mb->batch_cache; - if (cache == NULL) { - return; - } - switch (mode) { - case BKE_MBALL_BATCH_DIRTY_ALL: - cache->is_dirty = true; - break; - default: - BLI_assert(0); - } -} - -static void metaball_batch_cache_clear(MetaBall *mb) -{ - MetaBallBatchCache *cache = mb->batch_cache; - if (!cache) { - return; - } - - GPU_BATCH_DISCARD_SAFE(cache->face_wire.batch); - GPU_BATCH_DISCARD_SAFE(cache->batch); - GPU_BATCH_DISCARD_SAFE(cache->edge_detection); - GPU_VERTBUF_DISCARD_SAFE(cache->pos_nor_in_order); - GPU_INDEXBUF_DISCARD_SAFE(cache->edges_adj_lines); - /* NOTE: shaded_triangles[0] is already freed by `cache->batch`. */ - MEM_SAFE_FREE(cache->shaded_triangles); - cache->mat_len = 0; - cache->is_manifold = false; -} - -void DRW_mball_batch_cache_free(MetaBall *mb) -{ - metaball_batch_cache_clear(mb); - MEM_SAFE_FREE(mb->batch_cache); -} - -static GPUVertBuf *mball_batch_cache_get_pos_and_normals(Object *ob, - MetaBallBatchCache *cache, - const struct Scene *scene) -{ - if (cache->pos_nor_in_order == NULL) { - ListBase *lb = &ob->runtime.curve_cache->disp; - cache->pos_nor_in_order = GPU_vertbuf_calloc(); - DRW_displist_vertbuf_create_pos_and_nor(lb, cache->pos_nor_in_order, scene); - } - return cache->pos_nor_in_order; -} - -static GPUIndexBuf *mball_batch_cache_get_edges_adj_lines(Object *ob, MetaBallBatchCache *cache) -{ - if (cache->edges_adj_lines == NULL) { - ListBase *lb = &ob->runtime.curve_cache->disp; - cache->edges_adj_lines = GPU_indexbuf_calloc(); - DRW_displist_indexbuf_create_edges_adjacency_lines( - lb, cache->edges_adj_lines, &cache->is_manifold); - } - return cache->edges_adj_lines; -} - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Public Object/MetaBall API - * \{ */ - -GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(Object *ob) -{ - if (!BKE_mball_is_basis(ob)) { - return NULL; - } - - MetaBall *mb = ob->data; - MetaBallBatchCache *cache = metaball_batch_cache_get(mb); - const DRWContextState *draw_ctx = DRW_context_state_get(); - const struct Scene *scene = draw_ctx->scene; - - if (cache->batch == NULL) { - ListBase *lb = &ob->runtime.curve_cache->disp; - GPUIndexBuf *ibo = GPU_indexbuf_calloc(); - DRW_displist_indexbuf_create_triangles_in_order(lb, ibo); - cache->batch = GPU_batch_create_ex(GPU_PRIM_TRIS, - mball_batch_cache_get_pos_and_normals(ob, cache, scene), - ibo, - GPU_BATCH_OWNS_INDEX); - } - - return cache->batch; -} - -GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(Object *ob, - MetaBall *mb, - struct GPUMaterial **UNUSED(gpumat_array), - uint gpumat_array_len) -{ - if (!BKE_mball_is_basis(ob)) { - return NULL; - } - - BLI_assert(gpumat_array_len == DRW_metaball_material_count_get(mb)); - - MetaBallBatchCache *cache = metaball_batch_cache_get(mb); - if (cache->shaded_triangles == NULL) { - cache->mat_len = gpumat_array_len; - cache->shaded_triangles = MEM_callocN(sizeof(*cache->shaded_triangles) * cache->mat_len, - __func__); - cache->shaded_triangles[0] = DRW_metaball_batch_cache_get_triangles_with_normals(ob); - for (int i = 1; i < cache->mat_len; i++) { - cache->shaded_triangles[i] = NULL; - } - } - return cache->shaded_triangles; -} - -GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(Object *ob) -{ - if (!BKE_mball_is_basis(ob)) { - return NULL; - } - - MetaBall *mb = ob->data; - MetaBallBatchCache *cache = metaball_batch_cache_get(mb); - const DRWContextState *draw_ctx = DRW_context_state_get(); - const struct Scene *scene = draw_ctx->scene; - - if (cache->face_wire.batch == NULL) { - ListBase *lb = &ob->runtime.curve_cache->disp; - - GPUVertBuf *vbo_wiredata = GPU_vertbuf_calloc(); - DRW_displist_vertbuf_create_wiredata(lb, vbo_wiredata); - - GPUIndexBuf *ibo = GPU_indexbuf_calloc(); - DRW_displist_indexbuf_create_lines_in_order(lb, ibo); - - cache->face_wire.batch = GPU_batch_create_ex( - GPU_PRIM_LINES, - mball_batch_cache_get_pos_and_normals(ob, cache, scene), - ibo, - GPU_BATCH_OWNS_INDEX); - - GPU_batch_vertbuf_add_ex(cache->face_wire.batch, vbo_wiredata, true); - } - - return cache->face_wire.batch; -} - -struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob, - bool *r_is_manifold) -{ - if (!BKE_mball_is_basis(ob)) { - return NULL; - } - - MetaBall *mb = ob->data; - MetaBallBatchCache *cache = metaball_batch_cache_get(mb); - const DRWContextState *draw_ctx = DRW_context_state_get(); - const struct Scene *scene = draw_ctx->scene; - - if (cache->edge_detection == NULL) { - cache->edge_detection = GPU_batch_create( - GPU_PRIM_LINES_ADJ, - mball_batch_cache_get_pos_and_normals(ob, cache, scene), - mball_batch_cache_get_edges_adj_lines(ob, cache)); - } - - if (r_is_manifold) { - *r_is_manifold = cache->is_manifold; - } - - return cache->edge_detection; -} - -struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(Object *ob) -{ - if (!BKE_mball_is_basis(ob)) { - return NULL; - } - - MetaBall *mb = ob->data; - MetaBallBatchCache *cache = metaball_batch_cache_get(mb); - const DRWContextState *draw_ctx = DRW_context_state_get(); - const struct Scene *scene = draw_ctx->scene; - - return mball_batch_cache_get_pos_and_normals(ob, cache, scene); -} - -int DRW_metaball_material_count_get(MetaBall *mb) -{ - return max_ii(1, mb->totcol); -} - -/** \} */ diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c index c1d609bf648..9c1784b1de2 100644 --- a/source/blender/draw/intern/draw_cache_impl_particles.c +++ b/source/blender/draw/intern/draw_cache_impl_particles.c @@ -11,6 +11,7 @@ #include "MEM_guardedalloc.h" +#include "BLI_alloca.h" #include "BLI_ghash.h" #include "BLI_math_vector.h" #include "BLI_string.h" @@ -24,12 +25,15 @@ #include "BKE_customdata.h" #include "BKE_mesh.h" +#include "BKE_mesh_legacy_convert.h" #include "BKE_particle.h" #include "BKE_pointcache.h" #include "ED_particle.h" #include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_context.h" #include "GPU_material.h" #include "DEG_depsgraph_query.h" @@ -181,10 +185,11 @@ static void particle_batch_cache_clear_hair(ParticleHairCache *hair_cache) GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_uv_buf[i]); DRW_TEXTURE_FREE_SAFE(hair_cache->uv_tex[i]); } - for (int i = 0; i < MAX_MCOL; i++) { + for (int i = 0; i < hair_cache->num_col_layers; i++) { GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_col_buf[i]); DRW_TEXTURE_FREE_SAFE(hair_cache->col_tex[i]); } + for (int i = 0; i < MAX_HAIR_SUBDIV; i++) { GPU_VERTBUF_DISCARD_SAFE(hair_cache->final[i].proc_buf); DRW_TEXTURE_FREE_SAFE(hair_cache->final[i].proc_tex); @@ -217,9 +222,24 @@ static void particle_batch_cache_clear(ParticleSystem *psys) GPU_VERTBUF_DISCARD_SAFE(cache->edit_tip_pos); } +static void particle_batch_cache_free_hair(ParticleHairCache *hair) +{ + MEM_SAFE_FREE(hair->proc_col_buf); + MEM_SAFE_FREE(hair->col_tex); + MEM_SAFE_FREE(hair->col_layer_names); +} + void DRW_particle_batch_cache_free(ParticleSystem *psys) { particle_batch_cache_clear(psys); + + ParticleBatchCache *cache = psys->batch_cache; + + if (cache) { + particle_batch_cache_free_hair(&cache->hair); + particle_batch_cache_free_hair(&cache->edit_hair); + } + MEM_SAFE_FREE(psys->batch_cache); } @@ -295,7 +315,8 @@ static void particle_calculate_parent_uvs(ParticleSystem *psys, } } if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) { - MFace *mface = &psmd->mesh_final->mface[num]; + MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE); + MFace *mface = &mfaces[num]; for (int j = 0; j < num_uv_layers; j++) { psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]); } @@ -324,7 +345,8 @@ static void particle_calculate_parent_mcol(ParticleSystem *psys, } } if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) { - MFace *mface = &psmd->mesh_final->mface[num]; + MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE); + MFace *mface = &mfaces[num]; for (int j = 0; j < num_col_layers; j++) { /* CustomDataLayer CD_MCOL has 4 structs per face. */ psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]); @@ -350,7 +372,8 @@ static void particle_interpolate_children_uvs(ParticleSystem *psys, ChildParticle *particle = &psys->child[child_index]; int num = particle->num; if (num != DMCACHE_NOTFOUND) { - MFace *mface = &psmd->mesh_final->mface[num]; + MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE); + MFace *mface = &mfaces[num]; for (int j = 0; j < num_uv_layers; j++) { psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]); } @@ -374,7 +397,8 @@ static void particle_interpolate_children_mcol(ParticleSystem *psys, ChildParticle *particle = &psys->child[child_index]; int num = particle->num; if (num != DMCACHE_NOTFOUND) { - MFace *mface = &psmd->mesh_final->mface[num]; + MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE); + MFace *mface = &mfaces[num]; for (int j = 0; j < num_col_layers; j++) { /* CustomDataLayer CD_MCOL has 4 structs per face. */ psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]); @@ -790,7 +814,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach GPUVertFormat format = {0}; GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format); + /* Transform feedback buffer only needs to be resident in device memory. */ + GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC; + cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex( + &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); /* Create a destination buffer for the transform feedback. Sized appropriately */ /* Those are points! not line segments. */ @@ -832,10 +859,10 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit GPUVertBufRaw data_step, seg_step; GPUVertBufRaw uv_step[MAX_MTFACE]; - GPUVertBufRaw col_step[MAX_MCOL]; + GPUVertBufRaw *col_step = BLI_array_alloca(col_step, cache->num_col_layers); const MTFace *mtfaces[MAX_MTFACE] = {NULL}; - const MCol *mcols[MAX_MCOL] = {NULL}; + const MCol **mcols = BLI_array_alloca(mcols, cache->num_col_layers); float(**parent_uvs)[2] = NULL; MCol **parent_mcol = NULL; @@ -853,20 +880,22 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit &format_col, "col", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names)); - memset(cache->col_layer_names, 0, sizeof(cache->col_layer_names)); /* Strand Data */ - cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data); + cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex( + &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step); - cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg); + cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex( + &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step); /* UV layers */ for (int i = 0; i < cache->num_uv_layers; i++) { - cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv); + cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]); @@ -884,9 +913,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit BLI_strncpy(cache->uv_layer_names[i][n++], "a", MAX_LAYER_NAME_LEN); } } + + MEM_SAFE_FREE(cache->proc_col_buf); + MEM_SAFE_FREE(cache->col_tex); + MEM_SAFE_FREE(cache->col_layer_names); + + cache->proc_col_buf = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "proc_col_buf"); + cache->col_tex = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "col_tex"); + cache->col_layer_names = MEM_calloc_arrayN( + cache->num_col_layers, sizeof(*cache->col_layer_names), "col_layer_names"); + /* Vertex colors */ for (int i = 0; i < cache->num_col_layers; i++) { - cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col); + cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex( + &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len); GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]); @@ -1032,8 +1072,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit, static GPUVertFormat format = {0}; GPU_vertformat_clear(&format); - /* initialize vertex format */ - GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum + * stride requirement. */ + GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format); GPU_vertbuf_data_alloc(vbo, 1); @@ -1074,7 +1115,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint pos_id = GPU_vertformat_attr_add( &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format); + cache->proc_point_buf = GPU_vertbuf_create_with_format_ex( + &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len); GPUVertBufRaw pos_step; @@ -1084,7 +1126,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit, uint length_id = GPU_vertformat_attr_add( &length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); - cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format); + cache->proc_length_buf = GPU_vertbuf_create_with_format_ex( + &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len); GPUVertBufRaw length_step; diff --git a/source/blender/draw/intern/draw_cache_impl_pointcloud.c b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc index d715899e291..a43b23c8969 100644 --- a/source/blender/draw/intern/draw_cache_impl_pointcloud.c +++ b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc @@ -13,23 +13,23 @@ #include "BLI_math_base.h" #include "BLI_math_vector.h" +#include "BLI_task.hh" #include "BLI_utildefines.h" #include "DNA_object_types.h" #include "DNA_pointcloud_types.h" +#include "BKE_attribute.hh" #include "BKE_pointcloud.h" #include "GPU_batch.h" #include "draw_cache_impl.h" /* own include */ -static void pointcloud_batch_cache_clear(PointCloud *pointcloud); - /* ---------------------------------------------------------------------- */ /* PointCloud GPUBatch Cache */ -typedef struct PointCloudBatchCache { +struct PointCloudBatchCache { GPUVertBuf *pos; /* Position and radius. */ GPUVertBuf *geom; /* Instanced geometry for each point in the cloud (small sphere). */ GPUIndexBuf *geom_indices; @@ -42,58 +42,51 @@ typedef struct PointCloudBatchCache { bool is_dirty; int mat_len; -} PointCloudBatchCache; +}; /* GPUBatch cache management. */ -static bool pointcloud_batch_cache_valid(PointCloud *pointcloud) +static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud &pointcloud) +{ + return static_cast<PointCloudBatchCache *>(pointcloud.batch_cache); +} + +static bool pointcloud_batch_cache_valid(PointCloud &pointcloud) { - PointCloudBatchCache *cache = pointcloud->batch_cache; + PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); - if (cache == NULL) { + if (cache == nullptr) { return false; } - if (cache->mat_len != DRW_pointcloud_material_count_get(pointcloud)) { + if (cache->mat_len != DRW_pointcloud_material_count_get(&pointcloud)) { return false; } return cache->is_dirty == false; } -static void pointcloud_batch_cache_init(PointCloud *pointcloud) +static void pointcloud_batch_cache_init(PointCloud &pointcloud) { - PointCloudBatchCache *cache = pointcloud->batch_cache; + PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); if (!cache) { - cache = pointcloud->batch_cache = MEM_callocN(sizeof(*cache), __func__); + cache = MEM_cnew<PointCloudBatchCache>(__func__); + pointcloud.batch_cache = cache; } else { memset(cache, 0, sizeof(*cache)); } - cache->mat_len = DRW_pointcloud_material_count_get(pointcloud); - cache->surface_per_mat = MEM_callocN(sizeof(GPUBatch *) * cache->mat_len, - "pointcloud suface_per_mat"); + cache->mat_len = DRW_pointcloud_material_count_get(&pointcloud); + cache->surface_per_mat = static_cast<GPUBatch **>( + MEM_callocN(sizeof(GPUBatch *) * cache->mat_len, __func__)); cache->is_dirty = false; } -void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud) -{ - if (!pointcloud_batch_cache_valid(pointcloud)) { - pointcloud_batch_cache_clear(pointcloud); - pointcloud_batch_cache_init(pointcloud); - } -} - -static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud *pointcloud) -{ - return pointcloud->batch_cache; -} - void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode) { - PointCloudBatchCache *cache = pointcloud->batch_cache; - if (cache == NULL) { + PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud); + if (cache == nullptr) { return; } switch (mode) { @@ -105,9 +98,9 @@ void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode) } } -static void pointcloud_batch_cache_clear(PointCloud *pointcloud) +static void pointcloud_batch_cache_clear(PointCloud &pointcloud) { - PointCloudBatchCache *cache = pointcloud->batch_cache; + PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); if (!cache) { return; } @@ -126,50 +119,65 @@ static void pointcloud_batch_cache_clear(PointCloud *pointcloud) MEM_SAFE_FREE(cache->surface_per_mat); } +void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud) +{ + if (!pointcloud_batch_cache_valid(*pointcloud)) { + pointcloud_batch_cache_clear(*pointcloud); + pointcloud_batch_cache_init(*pointcloud); + } +} + void DRW_pointcloud_batch_cache_free(PointCloud *pointcloud) { - pointcloud_batch_cache_clear(pointcloud); + pointcloud_batch_cache_clear(*pointcloud); MEM_SAFE_FREE(pointcloud->batch_cache); } -static void pointcloud_batch_cache_ensure_pos(Object *ob, PointCloudBatchCache *cache) +static void pointcloud_batch_cache_ensure_pos(const PointCloud &pointcloud, + PointCloudBatchCache &cache) { - if (cache->pos != NULL) { + using namespace blender; + if (cache.pos != nullptr) { return; } - PointCloud *pointcloud = ob->data; - const bool has_radius = pointcloud->radius != NULL; - - static GPUVertFormat format = {0}; - static GPUVertFormat format_no_radius = {0}; - static uint pos; - if (format.attr_len == 0) { - /* initialize vertex format */ - /* From the opengl wiki: - * Note that size does not have to exactly match the size used by the vertex shader. If the - * vertex shader has fewer components than the attribute provides, then the extras are ignored. - * If the vertex shader has more components than the array provides, the extras are given - * values from the vector (0, 0, 0, 1) for the missing XYZW components. - */ - pos = GPU_vertformat_attr_add(&format_no_radius, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); - pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - } - - cache->pos = GPU_vertbuf_create_with_format(has_radius ? &format : &format_no_radius); - GPU_vertbuf_data_alloc(cache->pos, pointcloud->totpoint); - - if (has_radius) { - float(*vbo_data)[4] = (float(*)[4])GPU_vertbuf_get_data(cache->pos); - for (int i = 0; i < pointcloud->totpoint; i++) { - copy_v3_v3(vbo_data[i], pointcloud->co[i]); - /* TODO(fclem): remove multiplication here. - * Here only for keeping the size correct for now. */ - vbo_data[i][3] = pointcloud->radius[i] * 100.0f; + const bke::AttributeAccessor attributes = pointcloud.attributes(); + const VArraySpan<float3> positions = attributes.lookup<float3>("position", ATTR_DOMAIN_POINT); + const VArray<float> radii = attributes.lookup<float>("radius", ATTR_DOMAIN_POINT); + /* From the opengl wiki: + * Note that size does not have to exactly match the size used by the vertex shader. If the + * vertex shader has fewer components than the attribute provides, then the extras are ignored. + * If the vertex shader has more components than the array provides, the extras are given + * values from the vector (0, 0, 0, 1) for the missing XYZW components. */ + if (radii) { + static GPUVertFormat format = {0}; + if (format.attr_len == 0) { + GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); } + cache.pos = GPU_vertbuf_create_with_format(&format); + GPU_vertbuf_data_alloc(cache.pos, positions.size()); + const VArraySpan<float> radii_span(radii); + MutableSpan<float4> vbo_data{static_cast<float4 *>(GPU_vertbuf_get_data(cache.pos)), + pointcloud.totpoint}; + threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) { + for (const int i : range) { + vbo_data[i].x = positions[i].x; + vbo_data[i].y = positions[i].y; + vbo_data[i].z = positions[i].z; + /* TODO(fclem): remove multiplication. Here only for keeping the size correct for now. */ + vbo_data[i].w = radii_span[i] * 100.0f; + } + }); } else { - GPU_vertbuf_attr_fill(cache->pos, pos, pointcloud->co); + static GPUVertFormat format = {0}; + static uint pos; + if (format.attr_len == 0) { + pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); + } + cache.pos = GPU_vertbuf_create_with_format(&format); + GPU_vertbuf_data_alloc(cache.pos, positions.size()); + GPU_vertbuf_attr_fill(cache.pos, pos, positions.data()); } } @@ -188,24 +196,23 @@ static const uint half_octahedron_tris[4][3] = { {0, 4, 1}, }; -static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBatchCache *cache) +static void pointcloud_batch_cache_ensure_geom(PointCloudBatchCache &cache) { - if (cache->geom != NULL) { + if (cache.geom != nullptr) { return; } static GPUVertFormat format = {0}; static uint pos; if (format.attr_len == 0) { - /* initialize vertex format */ pos = GPU_vertformat_attr_add(&format, "pos_inst", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); GPU_vertformat_alias_add(&format, "nor"); } - cache->geom = GPU_vertbuf_create_with_format(&format); - GPU_vertbuf_data_alloc(cache->geom, ARRAY_SIZE(half_octahedron_normals)); + cache.geom = GPU_vertbuf_create_with_format(&format); + GPU_vertbuf_data_alloc(cache.geom, ARRAY_SIZE(half_octahedron_normals)); - GPU_vertbuf_attr_fill(cache->geom, pos, half_octahedron_normals); + GPU_vertbuf_attr_fill(cache.geom, pos, half_octahedron_normals); GPUIndexBufBuilder builder; GPU_indexbuf_init(&builder, @@ -217,17 +224,17 @@ static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBat GPU_indexbuf_add_tri_verts(&builder, UNPACK3(half_octahedron_tris[i])); } - cache->geom_indices = GPU_indexbuf_build(&builder); + cache.geom_indices = GPU_indexbuf_build(&builder); } GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob) { - PointCloud *pointcloud = ob->data; + PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data); PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); - if (cache->dots == NULL) { - pointcloud_batch_cache_ensure_pos(ob, cache); - cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, NULL); + if (cache->dots == nullptr) { + pointcloud_batch_cache_ensure_pos(pointcloud, *cache); + cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, nullptr); } return cache->dots; @@ -235,12 +242,12 @@ GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob) GPUBatch *DRW_pointcloud_batch_cache_get_surface(Object *ob) { - PointCloud *pointcloud = ob->data; + PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data); PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); - if (cache->surface == NULL) { - pointcloud_batch_cache_ensure_pos(ob, cache); - pointcloud_batch_cache_ensure_geom(ob, cache); + if (cache->surface == nullptr) { + pointcloud_batch_cache_ensure_pos(pointcloud, *cache); + pointcloud_batch_cache_ensure_geom(*cache); cache->surface = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices); GPU_batch_instbuf_add_ex(cache->surface, cache->pos, false); @@ -253,14 +260,14 @@ GPUBatch **DRW_cache_pointcloud_surface_shaded_get(Object *ob, struct GPUMaterial **UNUSED(gpumat_array), uint gpumat_array_len) { - PointCloud *pointcloud = ob->data; + PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data); PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud); BLI_assert(cache->mat_len == gpumat_array_len); UNUSED_VARS(gpumat_array_len); - if (cache->surface_per_mat[0] == NULL) { - pointcloud_batch_cache_ensure_pos(ob, cache); - pointcloud_batch_cache_ensure_geom(ob, cache); + if (cache->surface_per_mat[0] == nullptr) { + pointcloud_batch_cache_ensure_pos(pointcloud, *cache); + pointcloud_batch_cache_ensure_geom(*cache); cache->surface_per_mat[0] = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices); GPU_batch_instbuf_add_ex(cache->surface_per_mat[0], cache->pos, false); diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc index b37a420b555..ab935809f96 100644 --- a/source/blender/draw/intern/draw_cache_impl_subdivision.cc +++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc @@ -7,8 +7,10 @@ #include "DNA_object_types.h" #include "DNA_scene_types.h" +#include "BKE_attribute.hh" #include "BKE_editmesh.h" #include "BKE_mesh.h" +#include "BKE_mesh_mapping.h" #include "BKE_modifier.h" #include "BKE_object.h" #include "BKE_scene.h" @@ -19,8 +21,8 @@ #include "BKE_subdiv_modifier.h" #include "BLI_linklist.h" - #include "BLI_string.h" +#include "BLI_virtual_array.hh" #include "PIL_time.h" @@ -44,6 +46,8 @@ #include "draw_cache_inline.h" #include "mesh_extractors/extract_mesh.hh" +using blender::Span; + extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[]; extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[]; extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[]; @@ -668,20 +672,23 @@ static void draw_subdiv_cache_extra_coarse_face_data_bm(BMesh *bm, } } -static void draw_subdiv_cache_extra_coarse_face_data_mesh(Mesh *mesh, uint32_t *flags_data) +static void draw_subdiv_cache_extra_coarse_face_data_mesh(const MeshRenderData *mr, + Mesh *mesh, + uint32_t *flags_data) { - for (int i = 0; i < mesh->totpoly; i++) { + const Span<MPoly> polys = mesh->polys(); + for (const int i : polys.index_range()) { uint32_t flag = 0; - if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) { + if ((polys[i].flag & ME_SMOOTH) != 0) { flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH; } - if ((mesh->mpoly[i].flag & ME_FACE_SEL) != 0) { + if ((polys[i].flag & ME_FACE_SEL) != 0) { flag |= SUBDIV_COARSE_FACE_FLAG_SELECT; } - if ((mesh->mpoly[i].flag & ME_HIDE) != 0) { + if (mr->hide_poly && mr->hide_poly[i]) { flag |= SUBDIV_COARSE_FACE_FLAG_HIDDEN; } - flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET); + flags_data[i] = (uint)(polys[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET); } } @@ -691,7 +698,7 @@ static void draw_subdiv_cache_extra_coarse_face_data_mapped(Mesh *mesh, uint32_t *flags_data) { if (bm == nullptr) { - draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data); + draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data); return; } @@ -722,11 +729,11 @@ static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cach if (mr->extract_type == MR_EXTRACT_BMESH) { draw_subdiv_cache_extra_coarse_face_data_bm(cache->bm, mr->efa_act, flags_data); } - else if (mr->extract_type == MR_EXTRACT_MAPPED) { + else if (mr->p_origindex != nullptr) { draw_subdiv_cache_extra_coarse_face_data_mapped(mesh, cache->bm, mr, flags_data); } else { - draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data); + draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data); } /* Make sure updated data is re-uploaded. */ @@ -801,15 +808,15 @@ struct DRWCacheBuildingContext { }; static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context, - const int num_vertices, + const int num_verts, const int num_edges, const int num_loops, - const int num_polygons, + const int num_polys, const int *subdiv_polygon_offset) { /* num_loops does not take into account meshes with only loose geometry, which might be meshes - * used as custom bone shapes, so let's check the num_vertices also. */ - if (num_vertices == 0 && num_loops == 0) { + * used as custom bone shapes, so let's check the num_verts also. */ + if (num_verts == 0 && num_loops == 0) { return false; } @@ -820,12 +827,12 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con if (num_loops != 0) { cache->num_subdiv_edges = (uint)num_edges; cache->num_subdiv_loops = (uint)num_loops; - cache->num_subdiv_verts = (uint)num_vertices; - cache->num_subdiv_quads = (uint)num_polygons; + cache->num_subdiv_verts = (uint)num_verts; + cache->num_subdiv_quads = (uint)num_polys; cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset)); } - cache->may_have_loose_geom = num_vertices != 0 || num_edges != 0; + cache->may_have_loose_geom = num_verts != 0 || num_edges != 0; /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after * it was sent to the device, since we may use the data while building other buffers on the CPU @@ -876,7 +883,7 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con if (cache->num_subdiv_verts) { ctx->vert_origindex_map = static_cast<int *>( MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map")); - for (int i = 0; i < num_vertices; i++) { + for (int i = 0; i < num_verts; i++) { ctx->vert_origindex_map[i] = -1; } } @@ -1089,6 +1096,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache, } /* Only build polygon related data if we have polygons. */ + const Span<MPoly> polys = mesh_eval->polys(); if (cache->num_subdiv_loops != 0) { /* Build buffers for the PatchMap. */ draw_patch_map_build(&cache->gpu_patch_map, subdiv); @@ -1102,7 +1110,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache, GPU_vertbuf_get_data(cache->fdots_patch_coords); for (int i = 0; i < mesh_eval->totpoly; i++) { const int ptex_face_index = cache->face_ptex_offset[i]; - if (mesh_eval->mpoly[i].totloop == 4) { + if (polys[i].totloop == 4) { /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */ blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f); } @@ -1115,16 +1123,16 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache, } cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer( - cache->subdiv_polygon_offset, mesh_eval->totpoly); + cache->subdiv_polygon_offset, polys.size()); cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset, - mesh_eval->totpoly + 1); + polys.size() + 1); build_vertex_face_adjacency_maps(cache); } cache->resolution = to_mesh_settings.resolution; - cache->num_coarse_poly = mesh_eval->totpoly; + cache->num_coarse_poly = polys.size(); /* To avoid floating point precision issues when evaluating patches at patch boundaries, * ensure that all loops sharing a vertex use the same patch coordinate. This could cause @@ -1204,8 +1212,8 @@ struct DRWSubdivUboStorage { * of out of bond accesses as compute dispatch are of fixed size. */ uint total_dispatch_size; - int _pad0; - int _pad2; + int is_edit_mode; + int use_hide; int _pad3; }; @@ -1236,6 +1244,8 @@ static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache, ubo->coarse_face_hidden_mask = SUBDIV_COARSE_FACE_FLAG_HIDDEN_MASK; ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK; ubo->total_dispatch_size = total_dispatch_size; + ubo->is_edit_mode = cache->is_edit_mode; + ubo->use_hide = cache->use_hide; } static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache, @@ -1468,6 +1478,11 @@ void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache, { GPUShader *shader = nullptr; + if (!draw_subdiv_cache_need_polygon_data(cache)) { + /* Happens on meshes with only loose geometry. */ + return; + } + if (dimensions == 1) { shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D, "#define SUBDIV_POLYGON_OFFSET\n" @@ -1953,17 +1968,19 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache, return; } + const blender::VArraySpan<int> material_indices = mesh_eval->attributes().lookup_or_default<int>( + "material_index", ATTR_DOMAIN_FACE, 0); + /* Count number of subdivided polygons for each material. */ int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start")); int *subdiv_polygon_offset = cache->subdiv_polygon_offset; /* TODO: parallel_reduce? */ for (int i = 0; i < mesh_eval->totpoly; i++) { - const MPoly *mpoly = &mesh_eval->mpoly[i]; const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads : subdiv_polygon_offset[i + 1]; const int quad_count = next_offset - subdiv_polygon_offset[i]; - const int mat_index = mpoly->mat_nr; + const int mat_index = material_indices[i]; mat_start[mat_index] += quad_count; } @@ -1982,8 +1999,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache, MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset")); for (int i = 0; i < mesh_eval->totpoly; i++) { - const MPoly *mpoly = &mesh_eval->mpoly[i]; - const int mat_index = mpoly->mat_nr; + const int mat_index = material_indices[i]; const int single_material_index = subdiv_polygon_offset[i]; const int material_offset = mat_end[mat_index]; const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads : @@ -2004,7 +2020,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache, static bool draw_subdiv_create_requested_buffers(Object *ob, Mesh *mesh, - struct MeshBatchCache *batch_cache, + MeshBatchCache *batch_cache, MeshBufferCache *mbc, const bool is_editmode, const bool is_paint_mode, @@ -2012,6 +2028,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob, const float obmat[4][4], const bool do_final, const bool do_uvedit, + const bool do_cage, const ToolSettings *ts, const bool use_hide, OpenSubdiv_EvaluatorCache *evaluator_cache) @@ -2038,7 +2055,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob, draw_subdiv_invalidate_evaluator_for_orco(subdiv, mesh_eval); if (!BKE_subdiv_eval_begin_from_mesh( - subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) { + subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GPU, evaluator_cache)) { /* This could happen in two situations: * - OpenSubdiv is disabled. * - Something totally bad happened, and OpenSubdiv rejected our @@ -2055,9 +2072,8 @@ static bool draw_subdiv_create_requested_buffers(Object *ob, return false; } - /* Edges which do not come from coarse edges should not be drawn in edit mode, only in object - * mode when optimal display in turned off. */ - const bool optimal_display = runtime_data->use_optimal_display || is_editmode; + /* Edges which do not come from coarse edges should not be drawn in edit cage mode. */ + const bool optimal_display = runtime_data->use_optimal_display || (is_editmode && !do_cage); draw_cache->bm = bm; draw_cache->mesh = mesh_eval; @@ -2083,6 +2099,12 @@ static bool draw_subdiv_create_requested_buffers(Object *ob, MeshRenderData *mr = mesh_render_data_create( ob, mesh, is_editmode, is_paint_mode, is_mode_active, obmat, do_final, do_uvedit, ts); mr->use_hide = use_hide; + draw_cache->use_hide = use_hide; + + /* Used for setting loop normals flags. Mapped extraction is only used during edit mode. + * See comments in #extract_lnor_iter_poly_mesh. + */ + draw_cache->is_edit_mode = mr->edit_bmesh != nullptr; draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval, mr); @@ -2134,9 +2156,20 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac int subd_vert_offset = 0; /* Subdivide each loose coarse edge. */ + const Span<MVert> coarse_verts = coarse_mesh->verts(); + const Span<MEdge> coarse_edges = coarse_mesh->edges(); + + int *vert_to_edge_buffer; + MeshElemMap *vert_to_edge_map; + BKE_mesh_vert_edge_map_create(&vert_to_edge_map, + &vert_to_edge_buffer, + coarse_edges.data(), + coarse_mesh->totvert, + coarse_edges.size()); + for (int i = 0; i < coarse_loose_edge_len; i++) { const int coarse_edge_index = cache->loose_geom.edges[i]; - const MEdge *coarse_edge = &coarse_mesh->medge[cache->loose_geom.edges[i]]; + const MEdge *coarse_edge = &coarse_edges[cache->loose_geom.edges[i]]; /* Perform interpolation of each vertex. */ for (int i = 0; i < resolution - 1; i++, subd_edge_offset++) { @@ -2147,8 +2180,13 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac DRWSubdivLooseVertex &subd_v1 = loose_subd_verts[subd_vert_offset]; subd_v1.coarse_vertex_index = (i == 0) ? coarse_edge->v1 : -1u; const float u1 = i * inv_resolution_1; - BKE_subdiv_mesh_interpolate_position_on_edge( - coarse_mesh, coarse_edge, is_simple, u1, subd_v1.co); + BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(), + coarse_edges.data(), + vert_to_edge_map, + coarse_edge_index, + is_simple, + u1, + subd_v1.co); subd_edge.loose_subdiv_v1_index = subd_vert_offset++; @@ -2156,17 +2194,25 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac DRWSubdivLooseVertex &subd_v2 = loose_subd_verts[subd_vert_offset]; subd_v2.coarse_vertex_index = ((i + 1) == resolution - 1) ? coarse_edge->v2 : -1u; const float u2 = (i + 1) * inv_resolution_1; - BKE_subdiv_mesh_interpolate_position_on_edge( - coarse_mesh, coarse_edge, is_simple, u2, subd_v2.co); + BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(), + coarse_edges.data(), + vert_to_edge_map, + coarse_edge_index, + is_simple, + u2, + subd_v2.co); subd_edge.loose_subdiv_v2_index = subd_vert_offset++; } } + MEM_freeN(vert_to_edge_buffer); + MEM_freeN(vert_to_edge_map); + /* Copy the remaining loose_verts. */ for (int i = 0; i < coarse_loose_vert_len; i++) { const int coarse_vertex_index = cache->loose_geom.verts[i]; - const MVert &coarse_vertex = coarse_mesh->mvert[coarse_vertex_index]; + const MVert &coarse_vertex = coarse_verts[coarse_vertex_index]; DRWSubdivLooseVertex &subd_v = loose_subd_verts[subd_vert_offset++]; subd_v.coarse_vertex_index = cache->loose_geom.verts[i]; @@ -2195,7 +2241,7 @@ static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr; void DRW_create_subdivision(Object *ob, Mesh *mesh, - struct MeshBatchCache *batch_cache, + MeshBatchCache *batch_cache, MeshBufferCache *mbc, const bool is_editmode, const bool is_paint_mode, @@ -2203,11 +2249,12 @@ void DRW_create_subdivision(Object *ob, const float obmat[4][4], const bool do_final, const bool do_uvedit, + const bool do_cage, const ToolSettings *ts, const bool use_hide) { if (g_evaluator_cache == nullptr) { - g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE); + g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GPU); } #undef TIME_SUBDIV @@ -2226,6 +2273,7 @@ void DRW_create_subdivision(Object *ob, obmat, do_final, do_uvedit, + do_cage, ts, use_hide, g_evaluator_cache)) { diff --git a/source/blender/draw/intern/draw_color_management.cc b/source/blender/draw/intern/draw_color_management.cc index bb11f1ab3ad..eab86226be5 100644 --- a/source/blender/draw/intern/draw_color_management.cc +++ b/source/blender/draw/intern/draw_color_management.cc @@ -169,7 +169,7 @@ void DRW_transform_none(GPUTexture *tex) /* Draw as texture for final render (without immediate mode). */ GPUBatch *geom = DRW_cache_fullscreen_quad_get(); - GPU_batch_program_set_builtin(geom, GPU_SHADER_2D_IMAGE_COLOR); + GPU_batch_program_set_builtin(geom, GPU_SHADER_3D_IMAGE_COLOR); GPU_batch_uniform_4f(geom, "color", 1.0f, 1.0f, 1.0f, 1.0f); GPU_batch_texture_bind(geom, "image", tex); diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc new file mode 100644 index 00000000000..ff69885b3b6 --- /dev/null +++ b/source/blender/draw/intern/draw_command.cc @@ -0,0 +1,600 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_compute.h" +#include "GPU_debug.h" + +#include "draw_command.hh" +#include "draw_shader.h" +#include "draw_view.hh" + +#include <bitset> +#include <sstream> + +namespace blender::draw::command { + +/* -------------------------------------------------------------------- */ +/** \name Commands Execution + * \{ */ + +void ShaderBind::execute(RecordingState &state) const +{ + if (assign_if_different(state.shader, shader)) { + GPU_shader_bind(shader); + } +} + +void ResourceBind::execute() const +{ + if (slot == -1) { + return; + } + switch (type) { + case ResourceBind::Type::Sampler: + GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false); + break; + case ResourceBind::Type::Image: + GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot); + break; + case ResourceBind::Type::UniformBuf: + GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot); + break; + case ResourceBind::Type::StorageBuf: + GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot); + break; + } +} + +void PushConstant::execute(RecordingState &state) const +{ + if (location == -1) { + return; + } + switch (type) { + case PushConstant::Type::IntValue: + GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value); + break; + case PushConstant::Type::IntReference: + GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref); + break; + case PushConstant::Type::FloatValue: + GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value); + break; + case PushConstant::Type::FloatReference: + GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref); + break; + } +} + +void Draw::execute(RecordingState &state) const +{ + state.front_facing_set(handle.has_inverted_handedness()); + + if (GPU_shader_draw_parameters_support() == false) { + GPU_batch_resource_id_buf_set(batch, state.resource_id_buf); + } + + GPU_batch_set_shader(batch, state.shader); + GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len); +} + +void DrawMulti::execute(RecordingState &state) const +{ + DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_; + DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_; + + uint group_index = this->group_first; + while (group_index != (uint)-1) { + const DrawGroup &group = groups[group_index]; + + if (group.vertex_len > 0) { + if (GPU_shader_draw_parameters_support() == false) { + GPU_batch_resource_id_buf_set(group.gpu_batch, state.resource_id_buf); + } + + GPU_batch_set_shader(group.gpu_batch, state.shader); + + constexpr intptr_t stride = sizeof(DrawCommand); + /* We have 2 indirect command reserved per draw group. */ + intptr_t offset = stride * group_index * 2; + + /* Draw negatively scaled geometry first. */ + if (group.len - group.front_facing_len > 0) { + state.front_facing_set(true); + GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset); + } + + if (group.front_facing_len > 0) { + state.front_facing_set(false); + GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride); + } + } + + group_index = group.next; + } +} + +void DrawIndirect::execute(RecordingState &state) const +{ + state.front_facing_set(handle.has_inverted_handedness()); + + GPU_batch_draw_indirect(batch, *indirect_buf, 0); +} + +void Dispatch::execute(RecordingState &state) const +{ + if (is_reference) { + GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z); + } + else { + GPU_compute_dispatch(state.shader, size.x, size.y, size.z); + } +} + +void DispatchIndirect::execute(RecordingState &state) const +{ + GPU_compute_dispatch_indirect(state.shader, *indirect_buf); +} + +void Barrier::execute() const +{ + GPU_memory_barrier(type); +} + +void Clear::execute() const +{ + GPUFrameBuffer *fb = GPU_framebuffer_active_get(); + GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil); +} + +void StateSet::execute(RecordingState &recording_state) const +{ + /** + * Does not support locked state for the moment and never should. + * Better implement a less hacky selection! + */ + BLI_assert(DST.state_lock == 0); + + if (!assign_if_different(recording_state.pipeline_state, new_state)) { + return; + } + + /* Keep old API working. Keep the state tracking in sync. */ + /* TODO(fclem): Move at the end of a pass. */ + DST.state = new_state; + + GPU_state_set(to_write_mask(new_state), + to_blend(new_state), + to_face_cull_test(new_state), + to_depth_test(new_state), + to_stencil_test(new_state), + to_stencil_op(new_state), + to_provoking_vertex(new_state)); + + if (new_state & DRW_STATE_SHADOW_OFFSET) { + GPU_shadow_offset(true); + } + else { + GPU_shadow_offset(false); + } + + /* TODO: this should be part of shader state. */ + if (new_state & DRW_STATE_CLIP_PLANES) { + GPU_clip_distances(recording_state.view_clip_plane_count); + } + else { + GPU_clip_distances(0); + } + + if (new_state & DRW_STATE_IN_FRONT_SELECT) { + /* XXX `GPU_depth_range` is not a perfect solution + * since very distant geometries can still be occluded. + * Also the depth test precision of these geometries is impaired. + * However, it solves the selection for the vast majority of cases. */ + GPU_depth_range(0.0f, 0.01f); + } + else { + GPU_depth_range(0.0f, 1.0f); + } + + if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) { + GPU_program_point_size(true); + } + else { + GPU_program_point_size(false); + } +} + +void StencilSet::execute() const +{ + GPU_stencil_write_mask_set(write_mask); + GPU_stencil_compare_mask_set(compare_mask); + GPU_stencil_reference_set(reference); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Commands Serialization for debugging + * \{ */ + +std::string ShaderBind::serialize() const +{ + return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")"; +} + +std::string ResourceBind::serialize() const +{ + switch (type) { + case Type::Sampler: + return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + + (sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")"; + case Type::Image: + return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + case Type::UniformBuf: + return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + case Type::StorageBuf: + return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + default: + BLI_assert_unreachable(); + return ""; + } +} + +std::string PushConstant::serialize() const +{ + std::stringstream ss; + for (int i = 0; i < array_len; i++) { + switch (comp_len) { + case 1: + switch (type) { + case Type::IntValue: + ss << int1_value; + break; + case Type::IntReference: + ss << int_ref[i]; + break; + case Type::FloatValue: + ss << float1_value; + break; + case Type::FloatReference: + ss << float_ref[i]; + break; + } + break; + case 2: + switch (type) { + case Type::IntValue: + ss << int2_value; + break; + case Type::IntReference: + ss << int2_ref[i]; + break; + case Type::FloatValue: + ss << float2_value; + break; + case Type::FloatReference: + ss << float2_ref[i]; + break; + } + break; + case 3: + switch (type) { + case Type::IntValue: + ss << int3_value; + break; + case Type::IntReference: + ss << int3_ref[i]; + break; + case Type::FloatValue: + ss << float3_value; + break; + case Type::FloatReference: + ss << float3_ref[i]; + break; + } + break; + case 4: + switch (type) { + case Type::IntValue: + ss << int4_value; + break; + case Type::IntReference: + ss << int4_ref[i]; + break; + case Type::FloatValue: + ss << float4_value; + break; + case Type::FloatReference: + ss << float4_ref[i]; + break; + } + break; + case 16: + switch (type) { + case Type::IntValue: + case Type::IntReference: + BLI_assert_unreachable(); + break; + case Type::FloatValue: + ss << *reinterpret_cast<const float4x4 *>(&float4_value); + break; + case Type::FloatReference: + ss << *float4x4_ref; + break; + } + break; + } + if (i < array_len - 1) { + ss << ", "; + } + } + + return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")"; +} + +std::string Draw::serialize() const +{ + std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len); + std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len); + std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" : + std::to_string(vertex_first); + return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len + + ", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) + + ")"; +} + +std::string DrawMulti::serialize(std::string line_prefix) const +{ + DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_; + + MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(), + multi_draw_buf->prototype_count_); + + /* This emulates the GPU sorting but without the unstable draw order. */ + std::sort( + prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) { + return (a.group_id < b.group_id) || + (a.group_id == b.group_id && a.resource_handle > b.resource_handle); + }); + + /* Compute prefix sum to have correct offsets. */ + uint prefix_sum = 0u; + for (DrawGroup &group : groups) { + group.start = prefix_sum; + prefix_sum += group.front_proto_len + group.back_proto_len; + } + + std::stringstream ss; + + uint group_len = 0; + uint group_index = this->group_first; + while (group_index != (uint)-1) { + const DrawGroup &grp = groups[group_index]; + + ss << std::endl << line_prefix << " .group(id=" << group_index << ", len=" << grp.len << ")"; + + intptr_t offset = grp.start; + + if (grp.back_proto_len > 0) { + for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) { + BLI_assert(proto.group_id == group_index); + ResourceHandle handle(proto.resource_handle); + BLI_assert(handle.has_inverted_handedness()); + ss << std::endl + << line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len) + << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)"; + } + offset += grp.back_proto_len; + } + + if (grp.front_proto_len > 0) { + for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) { + BLI_assert(proto.group_id == group_index); + ResourceHandle handle(proto.resource_handle); + BLI_assert(!handle.has_inverted_handedness()); + ss << std::endl + << line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len) + << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)"; + } + } + + group_index = grp.next; + group_len++; + } + + ss << std::endl; + + return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str(); +} + +std::string DrawIndirect::serialize() const +{ + return std::string(".draw_indirect()"); +} + +std::string Dispatch::serialize() const +{ + int3 sz = is_reference ? *size_ref : size; + return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) + + ", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")"; +} + +std::string DispatchIndirect::serialize() const +{ + return std::string(".dispatch_indirect()"); +} + +std::string Barrier::serialize() const +{ + /* TODO(@fclem): Better serialization... */ + return std::string(".barrier(") + std::to_string(type) + ")"; +} + +std::string Clear::serialize() const +{ + std::stringstream ss; + if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) { + ss << "color=" << color; + if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { + ss << ", "; + } + } + if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) { + ss << "depth=" << depth; + if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) { + ss << ", "; + } + } + if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) { + ss << "stencil=0b" << std::bitset<8>(stencil) << ")"; + } + return std::string(".clear(") + ss.str() + ")"; +} + +std::string StateSet::serialize() const +{ + /* TODO(@fclem): Better serialization... */ + return std::string(".state_set(") + std::to_string(new_state) + ")"; +} + +std::string StencilSet::serialize() const +{ + std::stringstream ss; + ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b" + << std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference); + return ss.str(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Commands buffers binding / command / resource ID generation + * \{ */ + +void DrawCommandBuf::bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands) +{ + UNUSED_VARS(headers, commands); + + resource_id_count_ = 0; + + for (const Header &header : headers) { + if (header.type != Type::Draw) { + continue; + } + + Draw &cmd = commands[header.index].draw; + + int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len; + /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */ + GPU_batch_draw_parameter_get( + cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len); + /* Instancing attributes are not supported using the new pipeline since we use the base + * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */ + BLI_assert(batch_inst_len == 1); + + if (cmd.vertex_len == (uint)-1) { + cmd.vertex_len = batch_vert_len; + } + + if (cmd.handle.raw > 0) { + /* Save correct offset to start of resource_id buffer region for this draw. */ + uint instance_first = resource_id_count_; + resource_id_count_ += cmd.instance_len; + /* Ensure the buffer is big enough. */ + resource_id_buf_.get_or_resize(resource_id_count_ - 1); + + /* Copy the resource id for all instances. */ + uint index = cmd.handle.resource_index(); + for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) { + resource_id_buf_[i] = index; + } + } + } + + resource_id_buf_.push_update(); + + if (GPU_shader_draw_parameters_support() == false) { + state.resource_id_buf = resource_id_buf_; + } + else { + GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT); + } +} + +void DrawMultiBuf::bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + VisibilityBuf &visibility_buf) +{ + UNUSED_VARS(headers, commands); + + GPU_debug_group_begin("DrawMultiBuf.bind"); + + resource_id_count_ = 0u; + for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) { + /* Compute prefix sum of all instance of previous group. */ + group.start = resource_id_count_; + resource_id_count_ += group.len; + + int batch_inst_len; + /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */ + GPU_batch_draw_parameter_get(group.gpu_batch, + &group.vertex_len, + &group.vertex_first, + &group.base_index, + &batch_inst_len); + + /* Instancing attributes are not supported using the new pipeline since we use the base + * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */ + BLI_assert(batch_inst_len == 1); + UNUSED_VARS_NDEBUG(batch_inst_len); + + /* Now that we got the batch information, we can set the counters to 0. */ + group.total_counter = group.front_facing_counter = group.back_facing_counter = 0; + } + + group_buf_.push_update(); + prototype_buf_.push_update(); + /* Allocate enough for the expansion pass. */ + resource_id_buf_.get_or_resize(resource_id_count_); + /* Two command per group. */ + command_buf_.get_or_resize(group_count_ * 2); + + if (prototype_count_ > 0) { + GPUShader *shader = DRW_shader_draw_command_generate_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_); + GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf")); + GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf")); + GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf")); + GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf")); + GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT); + GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1); + if (GPU_shader_draw_parameters_support() == false) { + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + state.resource_id_buf = resource_id_buf_; + } + else { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + } + + GPU_debug_group_end(); +} + +/** \} */ + +}; // namespace blender::draw::command diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh new file mode 100644 index 00000000000..46a9199a267 --- /dev/null +++ b/source/blender/draw/intern/draw_command.hh @@ -0,0 +1,534 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Commands stored inside draw passes. Converted into GPU commands upon pass submission. + * + * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or + * `DrawMultiBuf`. See implementation details at their definition. + */ + +#include "BKE_global.h" +#include "BLI_map.hh" +#include "DRW_gpu_wrapper.hh" + +#include "draw_command_shared.hh" +#include "draw_handle.hh" +#include "draw_state.h" +#include "draw_view.hh" + +namespace blender::draw::command { + +class DrawCommandBuf; +class DrawMultiBuf; + +/* -------------------------------------------------------------------- */ +/** \name Recording State + * \{ */ + +/** + * Command recording state. + * Keep track of several states and avoid redundant state changes. + */ +struct RecordingState { + GPUShader *shader = nullptr; + bool front_facing = true; + bool inverted_view = false; + DRWState pipeline_state = DRW_STATE_NO_DRAW; + int view_clip_plane_count = 0; + /** Used for gl_BaseInstance workaround. */ + GPUStorageBuf *resource_id_buf = nullptr; + + void front_facing_set(bool facing) + { + /* Facing is inverted if view is not in expected handedness. */ + facing = this->inverted_view == facing; + /* Remove redundant changes. */ + if (assign_if_different(this->front_facing, facing)) { + GPU_front_facing(!facing); + } + } + + void cleanup() + { + if (front_facing == false) { + GPU_front_facing(false); + } + + if (G.debug & G_DEBUG_GPU) { + GPU_storagebuf_unbind_all(); + GPU_texture_image_unbind_all(); + GPU_texture_unbind_all(); + GPU_uniformbuf_unbind_all(); + } + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Regular Commands + * \{ */ + +enum class Type : uint8_t { + /** + * None Type commands are either uninitialized or are repurposed as data storage. + * They are skipped during submission. + */ + None = 0, + + /** Commands stored as Undetermined in regular command buffer. */ + Barrier, + Clear, + Dispatch, + DispatchIndirect, + Draw, + DrawIndirect, + PushConstant, + ResourceBind, + ShaderBind, + StateSet, + StencilSet, + + /** Special commands stored in separate buffers. */ + SubPass, + DrawMulti, +}; + +/** + * The index of the group is implicit since it is known by the one who want to + * access it. This also allows to have an indexed object to split the command + * stream. + */ +struct Header { + /** Command type. */ + Type type; + /** Command index in command heap of this type. */ + uint index; +}; + +struct ShaderBind { + GPUShader *shader; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct ResourceBind { + eGPUSamplerState sampler; + int slot; + bool is_reference; + + enum class Type : uint8_t { + Sampler = 0, + Image, + UniformBuf, + StorageBuf, + } type; + + union { + /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more + * debug info. */ + GPUUniformBuf *uniform_buf; + GPUUniformBuf **uniform_buf_ref; + GPUStorageBuf *storage_buf; + GPUStorageBuf **storage_buf_ref; + /** NOTE: Texture is used for both Sampler and Image binds. */ + GPUTexture *texture; + GPUTexture **texture_ref; + }; + + ResourceBind() = default; + + ResourceBind(int slot_, GPUUniformBuf *res) + : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){}; + ResourceBind(int slot_, GPUUniformBuf **res) + : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){}; + ResourceBind(int slot_, GPUStorageBuf *res) + : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){}; + ResourceBind(int slot_, GPUStorageBuf **res) + : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){}; + ResourceBind(int slot_, draw::Image *res) + : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){}; + ResourceBind(int slot_, draw::Image **res) + : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){}; + ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){}; + ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){}; + + void execute() const; + std::string serialize() const; +}; + +struct PushConstant { + int location; + uint8_t array_len; + uint8_t comp_len; + enum class Type : uint8_t { + IntValue = 0, + FloatValue, + IntReference, + FloatReference, + } type; + /** + * IMPORTANT: Data is at the end of the struct as it can span over the next commands. + * These next commands are not real commands but just memory to hold the data and are not + * referenced by any Command::Header. + * This is a hack to support float4x4 copy. + */ + union { + int int1_value; + int2 int2_value; + int3 int3_value; + int4 int4_value; + float float1_value; + float2 float2_value; + float3 float3_value; + float4 float4_value; + const int *int_ref; + const int2 *int2_ref; + const int3 *int3_ref; + const int4 *int4_ref; + const float *float_ref; + const float2 *float2_ref; + const float3 *float3_ref; + const float4 *float4_ref; + const float4x4 *float4x4_ref; + }; + + PushConstant() = default; + + PushConstant(int loc, const float &val) + : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){}; + PushConstant(int loc, const float2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){}; + PushConstant(int loc, const float3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){}; + PushConstant(int loc, const float4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){}; + + PushConstant(int loc, const int &val) + : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){}; + PushConstant(int loc, const int2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){}; + PushConstant(int loc, const int3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){}; + PushConstant(int loc, const int4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){}; + + PushConstant(int loc, const float *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){}; + PushConstant(int loc, const float2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){}; + PushConstant(int loc, const float3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){}; + PushConstant(int loc, const float4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){}; + PushConstant(int loc, const float4x4 *val) + : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){}; + + PushConstant(int loc, const int *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){}; + PushConstant(int loc, const int2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){}; + PushConstant(int loc, const int3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){}; + PushConstant(int loc, const int4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Draw { + GPUBatch *batch; + uint instance_len; + uint vertex_len; + uint vertex_first; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DrawMulti { + GPUBatch *batch; + DrawMultiBuf *multi_draw_buf; + uint group_first; + uint uuid; + + void execute(RecordingState &state) const; + std::string serialize(std::string line_prefix) const; +}; + +struct DrawIndirect { + GPUBatch *batch; + GPUStorageBuf **indirect_buf; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Dispatch { + bool is_reference; + union { + int3 size; + int3 *size_ref; + }; + + Dispatch() = default; + + Dispatch(int3 group_len) : is_reference(false), size(group_len){}; + Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DispatchIndirect { + GPUStorageBuf **indirect_buf; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Barrier { + eGPUBarrier type; + + void execute() const; + std::string serialize() const; +}; + +struct Clear { + uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */ + uint8_t stencil; + float depth; + float4 color; + + void execute() const; + std::string serialize() const; +}; + +struct StateSet { + DRWState new_state; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct StencilSet { + uint write_mask; + uint compare_mask; + uint reference; + + void execute() const; + std::string serialize() const; +}; + +union Undetermined { + ShaderBind shader_bind; + ResourceBind resource_bind; + PushConstant push_constant; + Draw draw; + DrawMulti draw_multi; + DrawIndirect draw_indirect; + Dispatch dispatch; + DispatchIndirect dispatch_indirect; + Barrier barrier; + Clear clear; + StateSet state_set; + StencilSet stencil_set; +}; + +/** Try to keep the command size as low as possible for performance. */ +BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.") + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Commands + * + * A draw command buffer used to issue single draw commands without instance merging or any + * other optimizations. + * + * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands. + * + * \{ */ + +class DrawCommandBuf { + friend Manager; + + private: + using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>; + + /** Array of resource id. One per instance. Generated on GPU and send to GPU. */ + ResourceIdBuf resource_id_buf_; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear(){}; + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + vertex_first = vertex_first != -1 ? vertex_first : 0; + instance_len = instance_len != -1 ? instance_len : 1; + + int64_t index = commands.append_and_get_index({}); + headers.append({Type::Draw, static_cast<uint>(index)}); + commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle}; + } + + void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands); +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Multi Draw Commands + * + * For efficient rendering of large scene we strive to minimize the number of draw call and state + * changes. To this end, we group many rendering commands and sort them per render state using + * `DrawGroup` as a container. This is done automatically for any successive commands with the + * same state. + * + * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti` + * (Pipeline State). + * + * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect + * command. + * + * To support this arbitrary reordering, we only need to know the offset of all the commands for a + * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to + * GPU and used on CPU to issue the right command indirect. + * + * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the + * GPU. + * + * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or + * not depending on visibility) to the correct draw command using the offset of the `DrawGroup` + * computed on CPU. After that, it also outputs one resource ID for each instance inside a + * `DrawPrototype`. + * + * \{ */ + +class DrawMultiBuf { + friend Manager; + friend DrawMulti; + + private: + using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>; + using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>; + using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>; + using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>; + + using DrawGroupKey = std::pair<uint, GPUBatch *>; + using DrawGroupMap = Map<DrawGroupKey, uint>; + /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */ + DrawGroupMap group_ids_; + + /** DrawGroup Command heap. Uploaded to GPU for sorting. */ + DrawGroupBuf group_buf_ = {"DrawGroupBuf"}; + /** Command Prototypes. Unsorted */ + DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"}; + /** Command list generated by the sorting / compaction steps. Lives on GPU. */ + DrawCommandBuf command_buf_ = {"DrawCommandBuf"}; + /** Array of resource id. One per instance. Lives on GPU. */ + ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"}; + /** Give unique ID to each header so we can use that as hash key. */ + uint header_id_counter_ = 0; + /** Number of groups inside group_buf_. */ + uint group_count_ = 0; + /** Number of prototype command inside prototype_buf_. */ + uint prototype_count_ = 0; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear() + { + header_id_counter_ = 0; + group_count_ = 0; + prototype_count_ = 0; + group_ids_.clear(); + } + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + /* Unsupported for now. Use PassSimple. */ + BLI_assert(vertex_first == 0 || vertex_first == -1); + BLI_assert(vertex_len == -1); + UNUSED_VARS_NDEBUG(vertex_len, vertex_first); + + instance_len = instance_len != -1 ? instance_len : 1; + + /* If there was some state changes since previous call, we have to create another command. */ + if (headers.is_empty() || headers.last().type != Type::DrawMulti) { + uint index = commands.append_and_get_index({}); + headers.append({Type::DrawMulti, index}); + commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++}; + } + + DrawMulti &cmd = commands.last().draw_multi; + + uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1); + + bool inverted = handle.has_inverted_handedness(); + + if (group_id == (uint)-1) { + uint new_group_id = group_count_++; + + DrawGroup &group = group_buf_.get_or_resize(new_group_id); + group.next = cmd.group_first; + group.len = instance_len; + group.front_facing_len = inverted ? 0 : instance_len; + group.gpu_batch = batch; + group.front_proto_len = 0; + group.back_proto_len = 0; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + /* Append to list. */ + cmd.group_first = new_group_id; + group_id = new_group_id; + } + else { + DrawGroup &group = group_buf_[group_id]; + group.len += instance_len; + group.front_facing_len += inverted ? 0 : instance_len; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + } + + DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++); + draw.group_id = group_id; + draw.resource_handle = handle.raw; + draw.instance_len = instance_len; + } + + void bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + VisibilityBuf &visibility_buf); +}; + +/** \} */ + +}; // namespace blender::draw::command diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh new file mode 100644 index 00000000000..9fbbe23f0ce --- /dev/null +++ b/source/blender/draw/intern/draw_command_shared.hh @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#ifndef GPU_SHADER +# include "BLI_span.hh" +# include "GPU_shader_shared_utils.h" + +namespace blender::draw::command { + +struct RecordingState; + +#endif + +/* -------------------------------------------------------------------- */ +/** \name Multi Draw + * \{ */ + +/** + * A DrawGroup allow to split the command stream into batch-able chunks of commands with + * the same render state. + */ +struct DrawGroup { + /** Index of next #DrawGroup from the same header. */ + uint next; + + /** Index of the first instances after sorting. */ + uint start; + /** Total number of instances (including inverted facing). Needed to issue the draw call. */ + uint len; + /** Number of non inverted scaling instances in this Group. */ + uint front_facing_len; + + /** #GPUBatch values to be copied to #DrawCommand after sorting (if not overridden). */ + int vertex_len; + int vertex_first; + int base_index; + + /** Atomic counters used during command sorting. */ + uint total_counter; + +#ifndef GPU_SHADER + /* NOTE: Union just to make sure the struct has always the same size on all platform. */ + union { + struct { + /** For debug printing only. */ + uint front_proto_len; + uint back_proto_len; + /** Needed to create the correct draw call. */ + GPUBatch *gpu_batch; + }; + struct { +#endif + uint front_facing_counter; + uint back_facing_counter; + uint _pad0, _pad1; +#ifndef GPU_SHADER + }; + }; +#endif +}; +BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16) + +/** + * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then + * converted into #DrawCommand on GPU after visibility and compaction. Multiple + * #DrawPrototype might get merged into the same final #DrawCommand. + */ +struct DrawPrototype { + /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */ + uint group_id; + /* Resource handle associated with this call. Also reference visibility. */ + uint resource_handle; + /* Number of instances. */ + uint instance_len; + uint _pad0; +}; +BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16) + +/** \} */ + +#ifndef GPU_SHADER +}; // namespace blender::draw::command +#endif diff --git a/source/blender/draw/intern/draw_common.c b/source/blender/draw/intern/draw_common.c index 0f330dbb519..c1b4c3c1f81 100644 --- a/source/blender/draw/intern/draw_common.c +++ b/source/blender/draw/intern/draw_common.c @@ -280,10 +280,11 @@ int DRW_object_wire_theme_get(Object *ob, ViewLayer *view_layer, float **r_color { const DRWContextState *draw_ctx = DRW_context_state_get(); const bool is_edit = (draw_ctx->object_mode & OB_MODE_EDIT) && (ob->mode & OB_MODE_EDIT); - const bool active = view_layer->basact && - ((ob->base_flag & BASE_FROM_DUPLI) ? - (DRW_object_get_dupli_parent(ob) == view_layer->basact->object) : - (view_layer->basact->object == ob)); + BKE_view_layer_synced_ensure(draw_ctx->scene, view_layer); + const Base *base = BKE_view_layer_active_base_get(view_layer); + const bool active = base && ((ob->base_flag & BASE_FROM_DUPLI) ? + (DRW_object_get_dupli_parent(ob) == base->object) : + (base->object == ob)); /* confusing logic here, there are 2 methods of setting the color * 'colortab[colindex]' and 'theme_id', colindex overrides theme_id. @@ -417,7 +418,6 @@ bool DRW_object_is_flat(Object *ob, int *r_axis) OB_CURVES_LEGACY, OB_SURF, OB_FONT, - OB_MBALL, OB_CURVES, OB_POINTCLOUD, OB_VOLUME)) { diff --git a/source/blender/draw/intern/draw_common_shader_shared.h b/source/blender/draw/intern/draw_common_shader_shared.h index c9819d9da87..57cb7880ce6 100644 --- a/source/blender/draw/intern/draw_common_shader_shared.h +++ b/source/blender/draw/intern/draw_common_shader_shared.h @@ -19,7 +19,7 @@ typedef struct GlobalsUboStorage GlobalsUboStorage; #define UBO_LAST_COLOR color_uv_shadow /* Used as ubo but colors can be directly referenced as well */ -/* NOTE: Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */ +/* \note Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */ struct GlobalsUboStorage { /* UBOs data needs to be 16 byte aligned (size of vec4) */ float4 color_wire; diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc index c40f2275968..a61769e7a63 100644 --- a/source/blender/draw/intern/draw_curves.cc +++ b/source/blender/draw/intern/draw_curves.cc @@ -33,25 +33,17 @@ #include "draw_manager.h" #include "draw_shader.h" -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif - BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct CurvesEvalCall { struct CurvesEvalCall *next; GPUVertBuf *vbo; @@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; @@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data) CurvesUniformBufPool *pool = drw_data->curves_ubos; pool->reset(); -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); -#else - g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0); + } + else { + g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); @@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, { GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); - pr_call->next = g_tf_calls; - pr_call->vbo = vbo; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__); + pr_call->next = g_tf_calls; + pr_call->vbo = vbo; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -246,13 +242,14 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, c } } -static CurvesEvalCache *drw_curves_cache_get(Object *object, +static CurvesEvalCache *drw_curves_cache_get(Curves &curves, GPUMaterial *gpu_material, int subdiv, int thickness_res) { CurvesEvalCache *cache; - bool update = curves_ensure_procedural_data(object, &cache, gpu_material, subdiv, thickness_res); + const bool update = curves_ensure_procedural_data( + &curves, &cache, gpu_material, subdiv, thickness_res); if (update) { if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { @@ -268,12 +265,13 @@ static CurvesEvalCache *drw_curves_cache_get(Object *object, GPUVertBuf *DRW_curves_pos_buffer_get(Object *object) { const DRWContextState *draw_ctx = DRW_context_state_get(); - Scene *scene = draw_ctx->scene; + const Scene *scene = draw_ctx->scene; - int subdiv = scene->r.hair_subdiv; - int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2; + const int subdiv = scene->r.hair_subdiv; + const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2; - CurvesEvalCache *cache = drw_curves_cache_get(object, nullptr, subdiv, thickness_res); + Curves &curves = *static_cast<Curves *>(object->data); + CurvesEvalCache *cache = drw_curves_cache_get(curves, nullptr, subdiv, thickness_res); return cache->final[subdiv].proc_buf; } @@ -303,15 +301,16 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object, GPUMaterial *gpu_material) { const DRWContextState *draw_ctx = DRW_context_state_get(); - Scene *scene = draw_ctx->scene; + const Scene *scene = draw_ctx->scene; CurvesUniformBufPool *pool = DST.vmempool->curves_ubos; CurvesInfosBuf &curves_infos = pool->alloc(); + Curves &curves_id = *static_cast<Curves *>(object->data); - int subdiv = scene->r.hair_subdiv; - int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2; + const int subdiv = scene->r.hair_subdiv; + const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2; CurvesEvalCache *curves_cache = drw_curves_cache_get( - object, gpu_material, subdiv, thickness_res); + curves_id, gpu_material, subdiv, thickness_res); DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent); @@ -330,13 +329,10 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object, /* Use the radius of the root and tip of the first curve for now. This is a workaround that we * use for now because we can't use a per-point radius yet. */ - Curves &curves_id = *static_cast<Curves *>(object->data); const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap( curves_id.geometry); if (curves.curves_num() >= 1) { - CurveComponent curves_component; - curves_component.replace(&curves_id, GeometryOwnershipType::ReadOnly); - blender::VArray<float> radii = curves_component.attribute_get_for_read( + blender::VArray<float> radii = curves.attributes().lookup_or_default( "radius", ATTR_DOMAIN_POINT, 0.005f); const blender::IndexRange first_curve_points = curves.points_for_curve(0); const float first_radius = radii[first_curve_points.first()]; @@ -383,7 +379,7 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object, * attributes. */ const int index = attribute_index_in_material(gpu_material, request.attribute_name); if (index != -1) { - curves_infos.is_point_attribute[index] = request.domain == ATTR_DOMAIN_POINT; + curves_infos.is_point_attribute[index][0] = request.domain == ATTR_DOMAIN_POINT; } } @@ -411,82 +407,118 @@ void DRW_curves_update() /* Update legacy hair too, to avoid verbosity in callers. */ DRW_hair_update(); -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = static_cast<float *>( + MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + CurvesEvalCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = static_cast<float *>( - MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer")); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - CurvesEvalCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a Frame-buffer is active to perform any rendering work, + * even if there is no output */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + /* Release temporary frame-buffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing frame-buffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_curves_free() diff --git a/source/blender/draw/intern/draw_curves_private.h b/source/blender/draw/intern/draw_curves_private.h index 7d54e1089d6..31122ed5248 100644 --- a/source/blender/draw/intern/draw_curves_private.h +++ b/source/blender/draw/intern/draw_curves_private.h @@ -16,6 +16,12 @@ extern "C" { #endif +struct Curves; +struct GPUVertBuf; +struct GPUIndexBuf; +struct GPUBatch; +struct GPUTexture; + #define MAX_THICKRES 2 /* see eHairType */ #define MAX_HAIR_SUBDIV 4 /* see hair_subdiv rna */ @@ -25,11 +31,6 @@ typedef enum CurvesEvalShader { } CurvesEvalShader; #define CURVES_EVAL_SHADER_NUM 3 -struct GPUVertBuf; -struct GPUIndexBuf; -struct GPUBatch; -struct GPUTexture; - typedef struct CurvesEvalFinalCache { /* Output of the subdivision stage: vertex buffer sized to subdiv level. */ GPUVertBuf *proc_buf; @@ -95,7 +96,7 @@ typedef struct CurvesEvalCache { /** * Ensure all necessary textures and buffers exist for GPU accelerated drawing. */ -bool curves_ensure_procedural_data(struct Object *object, +bool curves_ensure_procedural_data(struct Curves *curves, struct CurvesEvalCache **r_hair_cache, struct GPUMaterial *gpu_material, int subdiv, diff --git a/source/blender/draw/intern/draw_debug.c b/source/blender/draw/intern/draw_debug.c deleted file mode 100644 index b568119627e..00000000000 --- a/source/blender/draw/intern/draw_debug.c +++ /dev/null @@ -1,196 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2018 Blender Foundation. */ - -/** \file - * \ingroup draw - * - * \brief Simple API to draw debug shapes in the viewport. - */ - -#include "MEM_guardedalloc.h" - -#include "DNA_object_types.h" - -#include "BKE_object.h" - -#include "BLI_link_utils.h" - -#include "GPU_immediate.h" -#include "GPU_matrix.h" - -#include "draw_debug.h" -#include "draw_manager.h" - -/* --------- Register --------- */ - -/* Matrix applied to all points before drawing. Could be a stack if needed. */ -static float g_modelmat[4][4]; - -void DRW_debug_modelmat_reset(void) -{ - unit_m4(g_modelmat); -} - -void DRW_debug_modelmat(const float modelmat[4][4]) -{ - copy_m4_m4(g_modelmat, modelmat); -} - -void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4]) -{ - DRWDebugLine *line = MEM_mallocN(sizeof(DRWDebugLine), "DRWDebugLine"); - mul_v3_m4v3(line->pos[0], g_modelmat, v1); - mul_v3_m4v3(line->pos[1], g_modelmat, v2); - copy_v4_v4(line->color, color); - BLI_LINKS_PREPEND(DST.debug.lines, line); -} - -void DRW_debug_polygon_v3(const float (*v)[3], const int vert_len, const float color[4]) -{ - BLI_assert(vert_len > 1); - - for (int i = 0; i < vert_len; i++) { - DRW_debug_line_v3v3(v[i], v[(i + 1) % vert_len], color); - } -} - -void DRW_debug_m4(const float m[4][4]) -{ - float v0[3] = {0.0f, 0.0f, 0.0f}; - float v1[3] = {1.0f, 0.0f, 0.0f}; - float v2[3] = {0.0f, 1.0f, 0.0f}; - float v3[3] = {0.0f, 0.0f, 1.0f}; - - mul_m4_v3(m, v0); - mul_m4_v3(m, v1); - mul_m4_v3(m, v2); - mul_m4_v3(m, v3); - - DRW_debug_line_v3v3(v0, v1, (float[4]){1.0f, 0.0f, 0.0f, 1.0f}); - DRW_debug_line_v3v3(v0, v2, (float[4]){0.0f, 1.0f, 0.0f, 1.0f}); - DRW_debug_line_v3v3(v0, v3, (float[4]){0.0f, 0.0f, 1.0f, 1.0f}); -} - -void DRW_debug_bbox(const BoundBox *bbox, const float color[4]) -{ - DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[1], color); - DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[2], color); - DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[3], color); - DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[0], color); - - DRW_debug_line_v3v3(bbox->vec[4], bbox->vec[5], color); - DRW_debug_line_v3v3(bbox->vec[5], bbox->vec[6], color); - DRW_debug_line_v3v3(bbox->vec[6], bbox->vec[7], color); - DRW_debug_line_v3v3(bbox->vec[7], bbox->vec[4], color); - - DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[4], color); - DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[5], color); - DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[6], color); - DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[7], color); -} - -void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], const bool invert) -{ - BoundBox bb; - const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f}; - float project_matrix[4][4]; - if (invert) { - invert_m4_m4(project_matrix, m); - } - else { - copy_m4_m4(project_matrix, m); - } - - BKE_boundbox_init_from_minmax(&bb, min, max); - for (int i = 0; i < 8; i++) { - mul_project_m4_v3(project_matrix, bb.vec[i]); - } - DRW_debug_bbox(&bb, color); -} - -void DRW_debug_sphere(const float center[3], const float radius, const float color[4]) -{ - float size_mat[4][4]; - DRWDebugSphere *sphere = MEM_mallocN(sizeof(DRWDebugSphere), "DRWDebugSphere"); - /* Bake all transform into a Matrix4 */ - scale_m4_fl(size_mat, radius); - copy_m4_m4(sphere->mat, g_modelmat); - translate_m4(sphere->mat, center[0], center[1], center[2]); - mul_m4_m4m4(sphere->mat, sphere->mat, size_mat); - - copy_v4_v4(sphere->color, color); - BLI_LINKS_PREPEND(DST.debug.spheres, sphere); -} - -/* --------- Render --------- */ - -static void drw_debug_draw_lines(void) -{ - int count = BLI_linklist_count((LinkNode *)DST.debug.lines); - - if (count == 0) { - return; - } - - GPUVertFormat *vert_format = immVertexFormat(); - uint pos = GPU_vertformat_attr_add(vert_format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT); - uint col = GPU_vertformat_attr_add(vert_format, "color", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - - immBindBuiltinProgram(GPU_SHADER_3D_FLAT_COLOR); - - immBegin(GPU_PRIM_LINES, count * 2); - - while (DST.debug.lines) { - void *next = DST.debug.lines->next; - - immAttr4fv(col, DST.debug.lines->color); - immVertex3fv(pos, DST.debug.lines->pos[0]); - - immAttr4fv(col, DST.debug.lines->color); - immVertex3fv(pos, DST.debug.lines->pos[1]); - - MEM_freeN(DST.debug.lines); - DST.debug.lines = next; - } - immEnd(); - - immUnbindProgram(); -} - -static void drw_debug_draw_spheres(void) -{ - int count = BLI_linklist_count((LinkNode *)DST.debug.spheres); - - if (count == 0) { - return; - } - - float persmat[4][4]; - DRW_view_persmat_get(NULL, persmat, false); - - GPUBatch *empty_sphere = DRW_cache_empty_sphere_get(); - GPU_batch_program_set_builtin(empty_sphere, GPU_SHADER_3D_UNIFORM_COLOR); - while (DST.debug.spheres) { - void *next = DST.debug.spheres->next; - float MVP[4][4]; - - mul_m4_m4m4(MVP, persmat, DST.debug.spheres->mat); - GPU_batch_uniform_mat4(empty_sphere, "ModelViewProjectionMatrix", MVP); - GPU_batch_uniform_4fv(empty_sphere, "color", DST.debug.spheres->color); - GPU_batch_draw(empty_sphere); - - MEM_freeN(DST.debug.spheres); - DST.debug.spheres = next; - } -} - -void drw_debug_draw(void) -{ - drw_debug_draw_lines(); - drw_debug_draw_spheres(); -} - -void drw_debug_init(void) -{ - DRW_debug_modelmat_reset(); -} diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc new file mode 100644 index 00000000000..b0662a42ea0 --- /dev/null +++ b/source/blender/draw/intern/draw_debug.cc @@ -0,0 +1,736 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2018 Blender Foundation. */ + +/** \file + * \ingroup draw + * + * \brief Simple API to draw debug shapes in the viewport. + */ + +#include "BKE_object.h" +#include "BLI_link_utils.h" +#include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_debug.h" + +#include "draw_debug.h" +#include "draw_debug.hh" +#include "draw_manager.h" +#include "draw_shader.h" +#include "draw_shader_shared.h" + +#include <iomanip> + +#ifdef DEBUG +# define DRAW_DEBUG +#else +/* Uncomment to forcibly enable debug draw in release mode. */ +//#define DRAW_DEBUG +#endif + +namespace blender::draw { + +/* -------------------------------------------------------------------- */ +/** \name Init and state + * \{ */ + +DebugDraw::DebugDraw() +{ + constexpr int circle_resolution = 16; + for (auto axis : IndexRange(3)) { + for (auto edge : IndexRange(circle_resolution)) { + for (auto vert : IndexRange(2)) { + const float angle = (2 * M_PI) * (edge + vert) / float(circle_resolution); + float point[3] = {cosf(angle), sinf(angle), 0.0f}; + sphere_verts_.append( + float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3])); + } + } + } + + constexpr int point_resolution = 4; + for (auto axis : IndexRange(3)) { + for (auto edge : IndexRange(point_resolution)) { + for (auto vert : IndexRange(2)) { + const float angle = (2 * M_PI) * (edge + vert) / float(point_resolution); + float point[3] = {cosf(angle), sinf(angle), 0.0f}; + point_verts_.append( + float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3])); + } + } + } +}; + +void DebugDraw::init() +{ + cpu_print_buf_.command.vertex_len = 0; + cpu_print_buf_.command.vertex_first = 0; + cpu_print_buf_.command.instance_len = 1; + cpu_print_buf_.command.instance_first_array = 0; + + cpu_draw_buf_.command.vertex_len = 0; + cpu_draw_buf_.command.vertex_first = 0; + cpu_draw_buf_.command.instance_len = 1; + cpu_draw_buf_.command.instance_first_array = 0; + + gpu_print_buf_.command.vertex_len = 0; + gpu_print_buf_.command.vertex_first = 0; + gpu_print_buf_.command.instance_len = 1; + gpu_print_buf_.command.instance_first_array = 0; + gpu_print_buf_used = false; + + gpu_draw_buf_.command.vertex_len = 0; + gpu_draw_buf_.command.vertex_first = 0; + gpu_draw_buf_.command.instance_len = 1; + gpu_draw_buf_.command.instance_first_array = 0; + gpu_draw_buf_used = false; + + modelmat_reset(); +} + +void DebugDraw::modelmat_reset() +{ + model_mat_ = float4x4::identity(); +} + +void DebugDraw::modelmat_set(const float modelmat[4][4]) +{ + model_mat_ = modelmat; +} + +GPUStorageBuf *DebugDraw::gpu_draw_buf_get() +{ + BLI_assert(GPU_shader_storage_buffer_objects_support()); + if (!gpu_draw_buf_used) { + gpu_draw_buf_used = true; + gpu_draw_buf_.push_update(); + } + return gpu_draw_buf_; +} + +GPUStorageBuf *DebugDraw::gpu_print_buf_get() +{ + BLI_assert(GPU_shader_storage_buffer_objects_support()); + if (!gpu_print_buf_used) { + gpu_print_buf_used = true; + gpu_print_buf_.push_update(); + } + return gpu_print_buf_; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw functions + * \{ */ + +void DebugDraw::draw_line(float3 v1, float3 v2, float4 color) +{ + draw_line(v1, v2, color_pack(color)); +} + +void DebugDraw::draw_polygon(Span<float3> poly_verts, float4 color) +{ + BLI_assert(!poly_verts.is_empty()); + + uint col = color_pack(color); + float3 v0 = model_mat_ * poly_verts.last(); + for (auto vert : poly_verts) { + float3 v1 = model_mat_ * vert; + draw_line(v0, v1, col); + v0 = v1; + } +} + +void DebugDraw::draw_matrix(const float4x4 m4) +{ + float3 v0 = float3(0.0f, 0.0f, 0.0f); + float3 v1 = float3(1.0f, 0.0f, 0.0f); + float3 v2 = float3(0.0f, 1.0f, 0.0f); + float3 v3 = float3(0.0f, 0.0f, 1.0f); + + mul_project_m4_v3(m4.ptr(), v0); + mul_project_m4_v3(m4.ptr(), v1); + mul_project_m4_v3(m4.ptr(), v2); + mul_project_m4_v3(m4.ptr(), v3); + + draw_line(v0, v1, float4(1.0f, 0.0f, 0.0f, 1.0f)); + draw_line(v0, v2, float4(0.0f, 1.0f, 0.0f, 1.0f)); + draw_line(v0, v3, float4(0.0f, 0.0f, 1.0f, 1.0f)); +} + +void DebugDraw::draw_bbox(const BoundBox &bbox, const float4 color) +{ + uint col = color_pack(color); + draw_line(bbox.vec[0], bbox.vec[1], col); + draw_line(bbox.vec[1], bbox.vec[2], col); + draw_line(bbox.vec[2], bbox.vec[3], col); + draw_line(bbox.vec[3], bbox.vec[0], col); + + draw_line(bbox.vec[4], bbox.vec[5], col); + draw_line(bbox.vec[5], bbox.vec[6], col); + draw_line(bbox.vec[6], bbox.vec[7], col); + draw_line(bbox.vec[7], bbox.vec[4], col); + + draw_line(bbox.vec[0], bbox.vec[4], col); + draw_line(bbox.vec[1], bbox.vec[5], col); + draw_line(bbox.vec[2], bbox.vec[6], col); + draw_line(bbox.vec[3], bbox.vec[7], col); +} + +void DebugDraw::draw_matrix_as_bbox(float4x4 mat, const float4 color) +{ + BoundBox bb; + const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f}; + BKE_boundbox_init_from_minmax(&bb, min, max); + for (auto i : IndexRange(8)) { + mul_project_m4_v3(mat.ptr(), bb.vec[i]); + } + draw_bbox(bb, color); +} + +void DebugDraw::draw_sphere(const float3 center, float radius, const float4 color) +{ + uint col = color_pack(color); + for (auto i : IndexRange(sphere_verts_.size() / 2)) { + float3 v0 = sphere_verts_[i * 2] * radius + center; + float3 v1 = sphere_verts_[i * 2 + 1] * radius + center; + draw_line(v0, v1, col); + } +} + +void DebugDraw::draw_point(const float3 center, float radius, const float4 color) +{ + uint col = color_pack(color); + for (auto i : IndexRange(point_verts_.size() / 2)) { + float3 v0 = point_verts_[i * 2] * radius + center; + float3 v1 = point_verts_[i * 2 + 1] * radius + center; + draw_line(v0, v1, col); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Print functions + * \{ */ + +template<> void DebugDraw::print_value<uint>(const uint &value) +{ + print_value_uint(value, false, false, true); +} +template<> void DebugDraw::print_value<int>(const int &value) +{ + print_value_uint(uint(abs(value)), false, (value < 0), false); +} +template<> void DebugDraw::print_value<bool>(const bool &value) +{ + print_string(value ? "true " : "false"); +} +template<> void DebugDraw::print_value<float>(const float &val) +{ + std::stringstream ss; + ss << std::setw(12) << std::to_string(val); + print_string(ss.str()); +} +template<> void DebugDraw::print_value<double>(const double &val) +{ + print_value(float(val)); +} + +template<> void DebugDraw::print_value_hex<uint>(const uint &value) +{ + print_value_uint(value, true, false, false); +} +template<> void DebugDraw::print_value_hex<int>(const int &value) +{ + print_value_uint(uint(value), true, false, false); +} +template<> void DebugDraw::print_value_hex<float>(const float &value) +{ + print_value_uint(*reinterpret_cast<const uint *>(&value), true, false, false); +} +template<> void DebugDraw::print_value_hex<double>(const double &val) +{ + print_value_hex(float(val)); +} + +template<> void DebugDraw::print_value_binary<uint>(const uint &value) +{ + print_value_binary(value); +} +template<> void DebugDraw::print_value_binary<int>(const int &value) +{ + print_value_binary(uint(value)); +} +template<> void DebugDraw::print_value_binary<float>(const float &value) +{ + print_value_binary(*reinterpret_cast<const uint *>(&value)); +} +template<> void DebugDraw::print_value_binary<double>(const double &val) +{ + print_value_binary(float(val)); +} + +template<> void DebugDraw::print_value<float2>(const float2 &value) +{ + print_no_endl("float2(", value[0], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<float3>(const float3 &value) +{ + print_no_endl("float3(", value[0], ", ", value[1], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<float4>(const float4 &value) +{ + print_no_endl("float4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +template<> void DebugDraw::print_value<int2>(const int2 &value) +{ + print_no_endl("int2(", value[0], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<int3>(const int3 &value) +{ + print_no_endl("int3(", value[0], ", ", value[1], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<int4>(const int4 &value) +{ + print_no_endl("int4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +template<> void DebugDraw::print_value<uint2>(const uint2 &value) +{ + print_no_endl("uint2(", value[0], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<uint3>(const uint3 &value) +{ + print_no_endl("uint3(", value[0], ", ", value[1], ", ", value[1], ")"); +} +template<> void DebugDraw::print_value<uint4>(const uint4 &value) +{ + print_no_endl("uint4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Internals + * + * IMPORTANT: All of these are copied from the shader libs (common_debug_draw_lib.glsl & + * common_debug_print_lib.glsl). They need to be kept in sync to write the same data. + * \{ */ + +void DebugDraw::draw_line(float3 v1, float3 v2, uint color) +{ + DebugDrawBuf &buf = cpu_draw_buf_; + uint index = buf.command.vertex_len; + if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) { + buf.verts[index + 0] = vert_pack(model_mat_ * v1, color); + buf.verts[index + 1] = vert_pack(model_mat_ * v2, color); + buf.command.vertex_len += 2; + } +} + +/* Keep in sync with drw_debug_color_pack(). */ +uint DebugDraw::color_pack(float4 color) +{ + color = math::clamp(color, 0.0f, 1.0f); + uint result = 0; + result |= uint(color.x * 255.0f) << 0u; + result |= uint(color.y * 255.0f) << 8u; + result |= uint(color.z * 255.0f) << 16u; + result |= uint(color.w * 255.0f) << 24u; + return result; +} + +DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color) +{ + DRWDebugVert vert; + vert.pos0 = *reinterpret_cast<uint32_t *>(&pos.x); + vert.pos1 = *reinterpret_cast<uint32_t *>(&pos.y); + vert.pos2 = *reinterpret_cast<uint32_t *>(&pos.z); + vert.color = color; + return vert; +} + +void DebugDraw::print_newline() +{ + print_col_ = 0u; + print_row_ = ++cpu_print_buf_.command.instance_first_array; +} + +void DebugDraw::print_string_start(uint len) +{ + /* Break before word. */ + if (print_col_ + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) { + print_newline(); + } +} + +/* Copied from gpu_shader_dependency. */ +void DebugDraw::print_string(std::string str) +{ + size_t len_before_pad = str.length(); + /* Pad string to uint size to avoid out of bound reads. */ + while (str.length() % 4 != 0) { + str += " "; + } + + print_string_start(len_before_pad); + for (size_t i = 0; i < len_before_pad; i += 4) { + union { + uint8_t chars[4]; + uint32_t word; + }; + + chars[0] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 0); + chars[1] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 1); + chars[2] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 2); + chars[3] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 3); + + if (i + 4 > len_before_pad) { + chars[len_before_pad - i] = '\0'; + } + print_char4(word); + } +} + +/* Keep in sync with shader. */ +void DebugDraw::print_char4(uint data) +{ + /* Convert into char stream. */ + for (; data != 0u; data >>= 8u) { + uint char1 = data & 0xFFu; + /* Check for null terminator. */ + if (char1 == 0x00) { + break; + } + /* NOTE: Do not skip the header manually like in GPU. */ + uint cursor = cpu_print_buf_.command.vertex_len++; + if (cursor < DRW_DEBUG_PRINT_MAX) { + /* For future usage. (i.e: Color) */ + uint flags = 0u; + uint col = print_col_++; + uint print_header = (flags << 24u) | (print_row_ << 16u) | (col << 8u); + cpu_print_buf_.char_array[cursor] = print_header | char1; + /* Break word. */ + if (print_col_ > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) { + print_newline(); + } + } + } +} + +void DebugDraw::print_append_char(uint char1, uint &char4) +{ + char4 = (char4 << 8u) | char1; +} + +void DebugDraw::print_append_digit(uint digit, uint &char4) +{ + const uint char_A = 0x41u; + const uint char_0 = 0x30u; + bool is_hexadecimal = digit > 9u; + char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit)); +} + +void DebugDraw::print_append_space(uint &char4) +{ + char4 = (char4 << 8u) | 0x20u; +} + +void DebugDraw::print_value_binary(uint value) +{ + print_string("0b"); + print_string_start(10u * 4u); + uint digits[10] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u}; + uint digit = 0u; + for (uint i = 0u; i < 32u; i++) { + print_append_digit(((value >> i) & 1u), digits[digit / 4u]); + digit++; + if ((i % 4u) == 3u) { + print_append_space(digits[digit / 4u]); + digit++; + } + } + /* Numbers are written from right to left. So we need to reverse the order. */ + for (int j = 9; j >= 0; j--) { + print_char4(digits[j]); + } +} + +void DebugDraw::print_value_uint(uint value, + const bool hex, + bool is_negative, + const bool is_unsigned) +{ + print_string_start(3u * 4u); + const uint blank_value = hex ? 0x30303030u : 0x20202020u; + const uint prefix = hex ? 0x78302020u : 0x20202020u; + uint digits[3] = {blank_value, blank_value, prefix}; + const uint base = hex ? 16u : 10u; + uint digit = 0u; + /* Add `u` suffix. */ + if (is_unsigned) { + print_append_char('u', digits[digit / 4u]); + digit++; + } + /* Number's digits. */ + for (; value != 0u || digit == uint(is_unsigned); value /= base) { + print_append_digit(value % base, digits[digit / 4u]); + digit++; + } + /* Add negative sign. */ + if (is_negative) { + print_append_char('-', digits[digit / 4u]); + digit++; + } + /* Need to pad to uint alignment because we are issuing chars in "reverse". */ + for (uint i = digit % 4u; i < 4u && i > 0u; i++) { + print_append_space(digits[digit / 4u]); + digit++; + } + /* Numbers are written from right to left. So we need to reverse the order. */ + for (int j = 2; j >= 0; j--) { + print_char4(digits[j]); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Display + * \{ */ + +void DebugDraw::display_lines() +{ + if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) { + return; + } + GPU_debug_group_begin("Lines"); + cpu_draw_buf_.push_update(); + + float4x4 persmat; + const DRWView *view = DRW_view_get_active(); + DRW_view_persmat_get(view, persmat.ptr(), false); + + drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS); + + GPUBatch *batch = drw_cache_procedural_lines_get(); + GPUShader *shader = DRW_shader_debug_draw_display_get(); + GPU_batch_set_shader(batch, shader); + int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS); + GPU_shader_uniform_mat4(shader, "persmat", persmat.ptr()); + + if (gpu_draw_buf_used) { + GPU_debug_group_begin("GPU"); + GPU_storagebuf_bind(gpu_draw_buf_, slot); + GPU_batch_draw_indirect(batch, gpu_draw_buf_, 0); + GPU_storagebuf_unbind(gpu_draw_buf_); + GPU_debug_group_end(); + } + + GPU_debug_group_begin("CPU"); + GPU_storagebuf_bind(cpu_draw_buf_, slot); + GPU_batch_draw_indirect(batch, cpu_draw_buf_, 0); + GPU_storagebuf_unbind(cpu_draw_buf_); + GPU_debug_group_end(); + + GPU_debug_group_end(); +} + +void DebugDraw::display_prints() +{ + if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) { + return; + } + GPU_debug_group_begin("Prints"); + cpu_print_buf_.push_update(); + + drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_PROGRAM_POINT_SIZE); + + GPUBatch *batch = drw_cache_procedural_points_get(); + GPUShader *shader = DRW_shader_debug_print_display_get(); + GPU_batch_set_shader(batch, shader); + int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT); + + if (gpu_print_buf_used) { + GPU_debug_group_begin("GPU"); + GPU_storagebuf_bind(gpu_print_buf_, slot); + GPU_batch_draw_indirect(batch, gpu_print_buf_, 0); + GPU_storagebuf_unbind(gpu_print_buf_); + GPU_debug_group_end(); + } + + GPU_debug_group_begin("CPU"); + GPU_storagebuf_bind(cpu_print_buf_, slot); + GPU_batch_draw_indirect(batch, cpu_print_buf_, 0); + GPU_storagebuf_unbind(cpu_print_buf_); + GPU_debug_group_end(); + + GPU_debug_group_end(); +} + +void DebugDraw::display_to_view() +{ + GPU_debug_group_begin("DebugDraw"); + + display_lines(); + /* Print 3D shapes before text to avoid overlaps. */ + display_prints(); + /* Init again so we don't draw the same thing twice. */ + init(); + + GPU_debug_group_end(); +} + +} // namespace blender::draw + +blender::draw::DebugDraw *DRW_debug_get() +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return nullptr; + } + return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name C-API private + * \{ */ + +void drw_debug_draw() +{ +#ifdef DRAW_DEBUG + if (!GPU_shader_storage_buffer_objects_support() || DST.debug == nullptr) { + return; + } + /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */ + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->display_to_view(); +#endif +} + +/** + * NOTE: Init is once per draw manager cycle. + */ +void drw_debug_init() +{ + /* Module should not be used in release builds. */ + /* TODO(@fclem): Hide the functions declarations without using `ifdefs` everywhere. */ +#ifdef DRAW_DEBUG + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */ + if (DST.debug == nullptr) { + DST.debug = reinterpret_cast<DRWDebugModule *>(new blender::draw::DebugDraw()); + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->init(); +#endif +} + +void drw_debug_module_free(DRWDebugModule *module) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + if (module != nullptr) { + delete reinterpret_cast<blender::draw::DebugDraw *>(module); + } +} + +GPUStorageBuf *drw_debug_gpu_draw_buf_get() +{ + return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_draw_buf_get(); +} + +GPUStorageBuf *drw_debug_gpu_print_buf_get() +{ + return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_print_buf_get(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name C-API public + * \{ */ + +void DRW_debug_modelmat_reset() +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_reset(); +} + +void DRW_debug_modelmat(const float modelmat[4][4]) +{ +#ifdef DRAW_DEBUG + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_set(modelmat); +#else + UNUSED_VARS(modelmat); +#endif +} + +void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4]) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_line(v1, v2, color); +} + +void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4]) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_polygon( + blender::Span<float3>((float3 *)v, vert_len), color); +} + +void DRW_debug_m4(const float m[4][4]) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix(m); +} + +void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4]) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + blender::float4x4 m4 = m; + if (invert) { + m4 = m4.inverted(); + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix_as_bbox(m4, color); +} + +void DRW_debug_bbox(const BoundBox *bbox, const float color[4]) +{ +#ifdef DRAW_DEBUG + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_bbox(*bbox, color); +#else + UNUSED_VARS(bbox, color); +#endif +} + +void DRW_debug_sphere(const float center[3], float radius, const float color[4]) +{ + if (!GPU_shader_storage_buffer_objects_support()) { + return; + } + reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_sphere(center, radius, color); +} + +/** \} */ diff --git a/source/blender/draw/intern/draw_debug.h b/source/blender/draw/intern/draw_debug.h index 333d734edb9..9a56a12242e 100644 --- a/source/blender/draw/intern/draw_debug.h +++ b/source/blender/draw/intern/draw_debug.h @@ -3,21 +3,38 @@ /** \file * \ingroup draw + * + * \brief Simple API to draw debug shapes in the viewport. + * IMPORTANT: This is the legacy API for C. Use draw_debug.hh instead in new C++ code. */ #pragma once +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct DRWDebugModule DRWDebugModule; + struct BoundBox; void DRW_debug_modelmat_reset(void); void DRW_debug_modelmat(const float modelmat[4][4]); +/** + * IMPORTANT: For now there is a limit of DRW_DEBUG_DRAW_VERT_MAX that can be drawn + * using all the draw functions. + */ void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4]); void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4]); /** * \note g_modelmat is still applied on top. */ void DRW_debug_m4(const float m[4][4]); -void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], bool invert); +void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4]); void DRW_debug_bbox(const BoundBox *bbox, const float color[4]); void DRW_debug_sphere(const float center[3], float radius, const float color[4]); + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/draw/intern/draw_debug.hh b/source/blender/draw/intern/draw_debug.hh new file mode 100644 index 00000000000..c83936bf1af --- /dev/null +++ b/source/blender/draw/intern/draw_debug.hh @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + * + * \brief Simple API to draw debug shapes and log in the viewport. + * + * Both CPU and GPU implementation are supported and symmetrical (meaning GPU shader can use it + * too, see common_debug_print/draw_lib.glsl). + * + * NOTE: CPU logging will overlap GPU logging on screen as it is drawn after. + */ + +#pragma once + +#include "BLI_math_vec_types.hh" +#include "BLI_string_ref.hh" +#include "BLI_vector.hh" +#include "DNA_object_types.h" +#include "DRW_gpu_wrapper.hh" + +namespace blender::draw { + +/* Shortcuts to avoid boilerplate code and match shader API. */ +#define drw_debug_line(...) DRW_debug_get()->draw_line(__VA_ARGS__) +#define drw_debug_polygon(...) DRW_debug_get()->draw_polygon(__VA_ARGS__) +#define drw_debug_bbox(...) DRW_debug_get()->draw_bbox(__VA_ARGS__) +#define drw_debug_sphere(...) DRW_debug_get()->draw_sphere(__VA_ARGS__) +#define drw_debug_point(...) DRW_debug_get()->draw_point(__VA_ARGS__) +#define drw_debug_matrix(...) DRW_debug_get()->draw_matrix(__VA_ARGS__) +#define drw_debug_matrix_as_bbox(...) DRW_debug_get()->draw_matrix_as_bbox(__VA_ARGS__) +#define drw_print(...) DRW_debug_get()->print(__VA_ARGS__) +#define drw_print_hex(...) DRW_debug_get()->print_hex(__VA_ARGS__) +#define drw_print_binary(...) DRW_debug_get()->print_binary(__VA_ARGS__) +#define drw_print_no_endl(...) DRW_debug_get()->print_no_endl(__VA_ARGS__) + +/* Will log variable along with its name, like the shader version of print(). */ +#define drw_print_id(v_) DRW_debug_get()->print(#v_, "= ", v_) +#define drw_print_id_no_endl(v_) DRW_debug_get()->print_no_endl(#v_, "= ", v_) + +class DebugDraw { + private: + using DebugDrawBuf = StorageBuffer<DRWDebugDrawBuffer>; + using DebugPrintBuf = StorageBuffer<DRWDebugPrintBuffer>; + + /** Data buffers containing all verts or chars to draw. */ + DebugDrawBuf cpu_draw_buf_ = {"DebugDrawBuf-CPU"}; + DebugDrawBuf gpu_draw_buf_ = {"DebugDrawBuf-GPU"}; + DebugPrintBuf cpu_print_buf_ = {"DebugPrintBuf-CPU"}; + DebugPrintBuf gpu_print_buf_ = {"DebugPrintBuf-GPU"}; + /** True if the gpu buffer have been requested and may contain data to draw. */ + bool gpu_print_buf_used = false; + bool gpu_draw_buf_used = false; + /** Matrix applied to all points before drawing. Could be a stack if needed. */ + float4x4 model_mat_; + /** Precomputed shapes verts. */ + Vector<float3> sphere_verts_; + Vector<float3> point_verts_; + /** Cursor position for print functionality. */ + uint print_col_ = 0; + uint print_row_ = 0; + + public: + DebugDraw(); + ~DebugDraw(){}; + + /** + * Resets all buffers and reset model matrix state. + * Not to be called by user. + */ + void init(); + + /** + * Resets model matrix state to identity. + */ + void modelmat_reset(); + /** + * Sets model matrix transform to apply to any vertex passed to drawing functions. + */ + void modelmat_set(const float modelmat[4][4]); + + /** + * Drawing functions that will draw wire-frames with the given color. + */ + void draw_line(float3 v1, float3 v2, float4 color = {1, 0, 0, 1}); + void draw_polygon(Span<float3> poly_verts, float4 color = {1, 0, 0, 1}); + void draw_bbox(const BoundBox &bbox, const float4 color = {1, 0, 0, 1}); + void draw_sphere(const float3 center, float radius, const float4 color = {1, 0, 0, 1}); + void draw_point(const float3 center, float radius = 0.01f, const float4 color = {1, 0, 0, 1}); + /** + * Draw a matrix transformation as 3 colored axes. + */ + void draw_matrix(const float4x4 m4); + /** + * Draw a matrix as a 2 units length bounding box, centered on origin. + */ + void draw_matrix_as_bbox(float4x4 mat, const float4 color = {1, 0, 0, 1}); + + /** + * Will draw all debug shapes and text cached up until now to the current view / frame-buffer. + * Draw buffers will be emptied and ready for new debug data. + */ + void display_to_view(); + + /** + * Log variable or strings inside the viewport. + * Using a unique non string argument will print the variable name with it. + * Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`. + */ + template<typename... Ts> void print(StringRefNull str, Ts... args) + { + print_no_endl(str, args...); + print_newline(); + } + template<typename T> void print(const T &value) + { + print_value(value); + print_newline(); + } + template<typename T> void print_hex(const T &value) + { + print_value_hex(value); + print_newline(); + } + template<typename T> void print_binary(const T &value) + { + print_value_binary(value); + print_newline(); + } + + /** + * Same as `print()` but does not finish the line. + */ + void print_no_endl(std::string arg) + { + print_string(arg); + } + void print_no_endl(StringRef arg) + { + print_string(arg); + } + void print_no_endl(StringRefNull arg) + { + print_string(arg); + } + void print_no_endl(char const *arg) + { + print_string(StringRefNull(arg)); + } + template<typename T> void print_no_endl(T arg) + { + print_value(arg); + } + template<typename T, typename... Ts> void print_no_endl(T arg, Ts... args) + { + print_no_endl(arg); + print_no_endl(args...); + } + + /** + * Not to be called by user. Should become private. + */ + GPUStorageBuf *gpu_draw_buf_get(); + GPUStorageBuf *gpu_print_buf_get(); + + private: + uint color_pack(float4 color); + DRWDebugVert vert_pack(float3 pos, uint color); + + void draw_line(float3 v1, float3 v2, uint color); + + void print_newline(); + void print_string_start(uint len); + void print_string(std::string str); + void print_char4(uint data); + void print_append_char(uint char1, uint &char4); + void print_append_digit(uint digit, uint &char4); + void print_append_space(uint &char4); + void print_value_binary(uint value); + void print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned); + + template<typename T> void print_value(const T &value); + template<typename T> void print_value_hex(const T &value); + template<typename T> void print_value_binary(const T &value); + + void display_lines(); + void display_prints(); +}; + +} // namespace blender::draw + +/** + * Ease of use function to get the debug module. + * TODO(fclem): Should be removed once DRWManager is no longer global. + * IMPORTANT: Can return nullptr if storage buffer is not supported. + */ +blender::draw::DebugDraw *DRW_debug_get(); diff --git a/source/blender/draw/intern/draw_defines.h b/source/blender/draw/intern/draw_defines.h new file mode 100644 index 00000000000..3df7e47cffb --- /dev/null +++ b/source/blender/draw/intern/draw_defines.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +/** \file + * \ingroup draw + * + * List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid + * dragging larger headers into the createInfo pipeline which would cause problems. + */ + +#pragma once + +#define DRW_VIEW_UBO_SLOT 0 + +#define DRW_RESOURCE_ID_SLOT 11 +#define DRW_OBJ_MAT_SLOT 10 +#define DRW_OBJ_INFOS_SLOT 9 +#define DRW_OBJ_ATTR_SLOT 8 + +#define DRW_DEBUG_PRINT_SLOT 15 +#define DRW_DEBUG_DRAW_SLOT 14 + +#define DRW_COMMAND_GROUP_SIZE 64 +#define DRW_FINALIZE_GROUP_SIZE 64 +/* Must be multiple of 32. Set to 32 for shader simplicity. */ +#define DRW_VISIBILITY_GROUP_SIZE 32 diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc index 0a3c16e0d71..ceee1c7cb48 100644 --- a/source/blender/draw/intern/draw_hair.cc +++ b/source/blender/draw/intern/draw_hair.cc @@ -22,33 +22,29 @@ #include "GPU_batch.h" #include "GPU_capabilities.h" #include "GPU_compute.h" +#include "GPU_context.h" #include "GPU_material.h" #include "GPU_shader.h" #include "GPU_texture.h" #include "GPU_vertex_buffer.h" +#include "DRW_gpu_wrapper.hh" + #include "draw_hair_private.h" #include "draw_shader.h" - -#ifndef __APPLE__ -# define USE_TRANSFORM_FEEDBACK -# define USE_COMPUTE_SHADERS -#endif +#include "draw_shader_shared.h" BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get() { -#ifdef USE_COMPUTE_SHADERS if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) { return PART_REFINE_SHADER_COMPUTE; } -#endif -#ifdef USE_TRANSFORM_FEEDBACK - return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; -#endif + if (GPU_transform_feedback_support()) { + return PART_REFINE_SHADER_TRANSFORM_FEEDBACK; + } return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND; } -#ifndef USE_TRANSFORM_FEEDBACK struct ParticleRefineCall { struct ParticleRefineCall *next; GPUVertBuf *vbo; @@ -60,11 +56,11 @@ static ParticleRefineCall *g_tf_calls = nullptr; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; -#endif static GPUVertBuf *g_dummy_vbo = nullptr; static GPUTexture *g_dummy_texture = nullptr; -static DRWPass *g_tf_pass; /* XXX can be a problem with multiple DRWManager in the future */ +static DRWPass *g_tf_pass; /* XXX can be a problem with multiple #DRWManager in the future */ +static blender::draw::UniformBuffer<CurvesInfos> *g_dummy_curves_info = nullptr; static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement) { @@ -73,26 +69,35 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement) void DRW_hair_init(void) { -#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS) - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); -#else - g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); -#endif + if (GPU_transform_feedback_support() || GPU_compute_shader_support()) { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW); + } + else { + g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); + } if (g_dummy_vbo == nullptr) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); - g_dummy_vbo = GPU_vertbuf_create_with_format(&format); + g_dummy_vbo = GPU_vertbuf_create_with_format_ex( + &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert); - /* Create vbo immediately to bind to texture buffer. */ + /* Create VBO immediately to bind to texture buffer. */ GPU_vertbuf_use(g_dummy_vbo); g_dummy_texture = GPU_texture_create_from_vertbuf("hair_dummy_attr", g_dummy_vbo); + + g_dummy_curves_info = MEM_new<blender::draw::UniformBuffer<CurvesInfos>>( + "g_dummy_curves_info"); + memset(g_dummy_curves_info->is_point_attribute, + 0, + sizeof(g_dummy_curves_info->is_point_attribute)); + g_dummy_curves_info->push_update(); } } @@ -135,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache if (final_points_len > 0) { GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); -#ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( - tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); -#else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); - pr_call->next = g_tf_calls; - pr_call->vbo = cache->final[subdiv].proc_buf; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); -#endif + DRWShadingGroup *tf_shgrp = nullptr; + if (GPU_transform_feedback_support()) { + tf_shgrp = DRW_shgroup_transform_feedback_create( + tf_shader, g_tf_pass, cache->final[subdiv].proc_buf); + } + else { + tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__); + pr_call->next = g_tf_calls; + pr_call->vbo = cache->final[subdiv].proc_buf; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + } + BLI_assert(tf_shgrp != nullptr); drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv); DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len); @@ -239,7 +247,7 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent); - /* TODO: optimize this. Only bind the ones GPUMaterial needs. */ + /* TODO: optimize this. Only bind the ones #GPUMaterial needs. */ for (int i = 0; i < hair_cache->num_uv_layers; i++) { for (int n = 0; n < MAX_LAYER_NAME_CT && hair_cache->uv_layer_names[i][n][0] != '\0'; n++) { DRW_shgroup_uniform_texture(shgrp, hair_cache->uv_layer_names[i][n], hair_cache->uv_tex[i]); @@ -276,6 +284,8 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, if (hair_cache->length_tex) { DRW_shgroup_uniform_texture(shgrp, "l", hair_cache->length_tex); } + + DRW_shgroup_uniform_block(shgrp, "drw_curves", *g_dummy_curves_info); DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1); DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res); DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape); @@ -293,85 +303,122 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, void DRW_hair_update() { -#ifndef USE_TRANSFORM_FEEDBACK - /** - * Workaround to transform feedback not working on mac. - * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). - * - * So instead of using transform feedback we render to a texture, - * read back the result to system memory and re-upload as VBO data. - * It is really not ideal performance wise, but it is the simplest - * and the most local workaround that still uses the power of the GPU. - */ - - if (g_tf_calls == nullptr) { - return; - } + if (!GPU_transform_feedback_support()) { + /** + * Workaround to transform feedback not working on mac. + * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). + * + * So instead of using transform feedback we render to a texture, + * read back the result to system memory and re-upload as VBO data. + * It is really not ideal performance wise, but it is the simplest + * and the most local workaround that still uses the power of the GPU. + */ + + if (g_tf_calls == nullptr) { + return; + } - /* Search ideal buffer size. */ - uint max_size = 0; - for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { - max_size = max_ii(max_size, pr_call->vert_len); - } + /* Search ideal buffer size. */ + uint max_size = 0; + for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { + max_size = max_ii(max_size, pr_call->vert_len); + } + + /* Create target Texture / Frame-buffer */ + /* Don't use max size as it can be really heavy and fail. + * Do chunks of maximum 2048 * 2048 hair points. */ + int width = 2048; + int height = min_ii(width, 1 + max_size / width); + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPUFrameBuffer *fb = nullptr; + GPU_framebuffer_ensure_config(&fb, + { + GPU_ATTACHMENT_NONE, + GPU_ATTACHMENT_TEXTURE(tex), + }); + + float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); + + GPU_framebuffer_bind(fb); + while (g_tf_calls != nullptr) { + ParticleRefineCall *pr_call = g_tf_calls; + g_tf_calls = g_tf_calls->next; + + g_tf_id_offset = 0; + while (pr_call->vert_len > 0) { + int max_read_px_len = min_ii(width * height, pr_call->vert_len); + + DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); + /* Read back result to main memory. */ + GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); + /* Upload back to VBO. */ + GPU_vertbuf_use(pr_call->vbo); + GPU_vertbuf_update_sub(pr_call->vbo, + sizeof(float[4]) * g_tf_id_offset, + sizeof(float[4]) * max_read_px_len, + data); + + g_tf_id_offset += max_read_px_len; + pr_call->vert_len -= max_read_px_len; + } - /* Create target Texture / Frame-buffer */ - /* Don't use max size as it can be really heavy and fail. - * Do chunks of maximum 2048 * 2048 hair points. */ - int width = 2048; - int height = min_ii(width, 1 + max_size / width); - GPUTexture *tex = DRW_texture_pool_query_2d( - width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update); - g_tf_target_height = height; - g_tf_target_width = width; - - GPUFrameBuffer *fb = nullptr; - GPU_framebuffer_ensure_config(&fb, - { - GPU_ATTACHMENT_NONE, - GPU_ATTACHMENT_TEXTURE(tex), - }); - - float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"); - - GPU_framebuffer_bind(fb); - while (g_tf_calls != nullptr) { - ParticleRefineCall *pr_call = g_tf_calls; - g_tf_calls = g_tf_calls->next; - - g_tf_id_offset = 0; - while (pr_call->vert_len > 0) { - int max_read_px_len = min_ii(width * height, pr_call->vert_len); - - DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); - /* Read back result to main memory. */ - GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data); - /* Upload back to VBO. */ - GPU_vertbuf_use(pr_call->vbo); - GPU_vertbuf_update_sub(pr_call->vbo, - sizeof(float[4]) * g_tf_id_offset, - sizeof(float[4]) * max_read_px_len, - data); - - g_tf_id_offset += max_read_px_len; - pr_call->vert_len -= max_read_px_len; + MEM_freeN(pr_call); } - MEM_freeN(pr_call); + MEM_freeN(data); + GPU_framebuffer_free(fb); } + else { + /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid + * side-effects from rendering in the active buffer. + * We also need to guarantee that a frame-buffer is active to perform any rendering work, + * even if there is no output. */ + GPUFrameBuffer *temp_fb = nullptr; + GPUFrameBuffer *prev_fb = nullptr; + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + if (!GPU_compute_shader_support()) { + prev_fb = GPU_framebuffer_active_get(); + char errorOut[256]; + /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */ + if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) { + int width = 64; + int height = 64; + GPUTexture *tex = DRW_texture_pool_query_2d( + width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update); + g_tf_target_height = height; + g_tf_target_width = width; + + GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)}); + + GPU_framebuffer_bind(temp_fb); + } + } + } + + /* Just render the pass when using compute shaders or transform feedback. */ + DRW_draw_pass(g_tf_pass); + if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } - MEM_freeN(data); - GPU_framebuffer_free(fb); -#else - /* Just render the pass when using compute shaders or transform feedback. */ - DRW_draw_pass(g_tf_pass); - if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) { - GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + /* Release temporary frame-buffer. */ + if (temp_fb != nullptr) { + GPU_framebuffer_free(temp_fb); + } + /* Rebind existing frame-buffer */ + if (prev_fb != nullptr) { + GPU_framebuffer_bind(prev_fb); + } } -#endif } void DRW_hair_free(void) { GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo); DRW_TEXTURE_FREE_SAFE(g_dummy_texture); + MEM_delete(g_dummy_curves_info); } diff --git a/source/blender/draw/intern/draw_hair_private.h b/source/blender/draw/intern/draw_hair_private.h index 5d84c8863f2..c7e9e1e22de 100644 --- a/source/blender/draw/intern/draw_hair_private.h +++ b/source/blender/draw/intern/draw_hair_private.h @@ -61,9 +61,9 @@ typedef struct ParticleHairCache { GPUTexture *uv_tex[MAX_MTFACE]; char uv_layer_names[MAX_MTFACE][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN]; - GPUVertBuf *proc_col_buf[MAX_MCOL]; - GPUTexture *col_tex[MAX_MCOL]; - char col_layer_names[MAX_MCOL][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN]; + GPUVertBuf **proc_col_buf; + GPUTexture **col_tex; + char (*col_layer_names)[MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN]; int num_uv_layers; int num_col_layers; diff --git a/source/blender/draw/intern/draw_handle.hh b/source/blender/draw/intern/draw_handle.hh new file mode 100644 index 00000000000..5f96bfa5dcd --- /dev/null +++ b/source/blender/draw/intern/draw_handle.hh @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * A unique identifier for each object component. + * It is used to access each component data such as matrices and object attributes. + * It is valid only for the current draw, it is not persistent. + * + * The most significant bit is used to encode if the object needs to invert the front face winding + * because of its object matrix handedness. This is handy because this means sorting inside + * #DrawGroup command will put all inverted commands last. + * + * Default value of 0 points toward an non-cull-able object with unit bounding box centered at + * the origin. + */ + +#include "draw_shader_shared.h" + +struct Object; +struct DupliObject; + +namespace blender::draw { + +struct ResourceHandle { + uint raw; + + ResourceHandle() = default; + ResourceHandle(uint raw_) : raw(raw_){}; + ResourceHandle(uint index, bool inverted_handedness) + { + raw = index; + SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u); + } + + bool has_inverted_handedness() const + { + return (raw & 0x80000000u) != 0; + } + + uint resource_index() const + { + return (raw & 0x7FFFFFFFu); + } +}; + +/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */ +struct ObjectRef { + Object *object; + /** Dupli object that corresponds to the current object. */ + DupliObject *dupli_object; + /** Object that created the dupli-list the current object is part of. */ + Object *dupli_parent; +}; + +}; // namespace blender::draw diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c index 0e4e67f3320..ac2aea4524d 100644 --- a/source/blender/draw/intern/draw_instance_data.c +++ b/source/blender/draw/intern/draw_instance_data.c @@ -27,6 +27,7 @@ #include "BKE_duplilist.h" #include "RNA_access.h" +#include "RNA_path.h" #include "BLI_bitmap.h" #include "BLI_memblock.h" @@ -563,7 +564,8 @@ typedef struct DRWUniformAttrBuf { struct DRWUniformAttrBuf *next_empty; } DRWUniformAttrBuf; -static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table, GPUUniformAttrList *key) +static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table, + const GPUUniformAttrList *key) { void **pkey, **pval; @@ -641,23 +643,16 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr, { copy_v4_fl(r_data, 0); - char idprop_name[(sizeof(attr->name) * 2) + 4]; - { - char attr_name_esc[sizeof(attr->name) * 2]; - BLI_str_escape(attr_name_esc, attr->name, sizeof(attr_name_esc)); - SNPRINTF(idprop_name, "[\"%s\"]", attr_name_esc); - } - /* If requesting instance data, check the parent particle system and object. */ if (attr->use_dupli) { if (dupli_source && dupli_source->particle_system) { ParticleSettings *settings = dupli_source->particle_system->part; - if (drw_uniform_property_lookup((ID *)settings, idprop_name, r_data) || + if (drw_uniform_property_lookup((ID *)settings, attr->name_id_prop, r_data) || drw_uniform_property_lookup((ID *)settings, attr->name, r_data)) { return; } } - if (drw_uniform_property_lookup((ID *)dupli_parent, idprop_name, r_data) || + if (drw_uniform_property_lookup((ID *)dupli_parent, attr->name_id_prop, r_data) || drw_uniform_property_lookup((ID *)dupli_parent, attr->name, r_data)) { return; } @@ -665,9 +660,9 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr, /* Check the object and mesh. */ if (ob) { - if (drw_uniform_property_lookup((ID *)ob, idprop_name, r_data) || + if (drw_uniform_property_lookup((ID *)ob, attr->name_id_prop, r_data) || drw_uniform_property_lookup((ID *)ob, attr->name, r_data) || - drw_uniform_property_lookup((ID *)ob->data, idprop_name, r_data) || + drw_uniform_property_lookup((ID *)ob->data, attr->name_id_prop, r_data) || drw_uniform_property_lookup((ID *)ob->data, attr->name, r_data)) { return; } @@ -675,7 +670,7 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr, } void drw_uniform_attrs_pool_update(GHash *table, - GPUUniformAttrList *key, + const GPUUniformAttrList *key, DRWResourceHandle *handle, Object *ob, Object *dupli_parent, @@ -696,7 +691,8 @@ void drw_uniform_attrs_pool_update(GHash *table, } } -DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table, struct GPUUniformAttrList *key) +DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table, + const struct GPUUniformAttrList *key) { DRWUniformAttrBuf *buffer = BLI_ghash_lookup(table, key); return buffer ? &buffer->ubos : NULL; diff --git a/source/blender/draw/intern/draw_instance_data.h b/source/blender/draw/intern/draw_instance_data.h index 4b5cf63bb3b..9053544d98a 100644 --- a/source/blender/draw/intern/draw_instance_data.h +++ b/source/blender/draw/intern/draw_instance_data.h @@ -106,4 +106,4 @@ struct GHash *DRW_uniform_attrs_pool_new(void); void DRW_uniform_attrs_pool_flush_all(struct GHash *table); void DRW_uniform_attrs_pool_clear_all(struct GHash *table); struct DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(struct GHash *table, - struct GPUUniformAttrList *key); + const struct GPUUniformAttrList *key); diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index bc9d0a3d02a..9761aa8c789 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -43,6 +43,7 @@ #include "DNA_camera_types.h" #include "DNA_mesh_types.h" #include "DNA_meshdata_types.h" +#include "DNA_userdef_types.h" #include "DNA_world_types.h" #include "ED_gpencil.h" @@ -84,6 +85,7 @@ #include "draw_cache_impl.h" #include "engines/basic/basic_engine.h" +#include "engines/compositor/compositor_engine.h" #include "engines/eevee/eevee_engine.h" #include "engines/eevee_next/eevee_engine.h" #include "engines/external/external_engine.h" @@ -179,7 +181,7 @@ static void drw_task_graph_deinit(void) bool DRW_object_is_renderable(const Object *ob) { - BLI_assert((ob->base_flag & BASE_VISIBLE_DEPSGRAPH) != 0); + BLI_assert((ob->base_flag & BASE_ENABLED_AND_MAYBE_VISIBLE_IN_VIEWPORT) != 0); if (ob->type == OB_MESH) { if ((ob == DST.draw_ctx.object_edit) || DRW_object_is_in_edit_mode(ob)) { @@ -212,17 +214,6 @@ int DRW_object_visibility_in_active_context(const Object *ob) return BKE_object_visibility(ob, mode); } -bool DRW_object_is_flat_normal(const Object *ob) -{ - if (ob->type == OB_MESH) { - const Mesh *me = ob->data; - if (me->mpoly && me->mpoly[0].flag & ME_SMOOTH) { - return false; - } - } - return true; -} - bool DRW_object_use_hide_faces(const struct Object *ob) { if (ob->type == OB_MESH) { @@ -235,7 +226,7 @@ bool DRW_object_use_hide_faces(const struct Object *ob) return (me->editflag & ME_EDIT_PAINT_FACE_SEL) != 0; case OB_MODE_VERTEX_PAINT: case OB_MODE_WEIGHT_PAINT: - return (me->editflag & (ME_EDIT_PAINT_FACE_SEL | ME_EDIT_PAINT_VERT_SEL)) != 0; + return true; } } @@ -1010,6 +1001,8 @@ static void drw_engines_init(void) static void drw_engines_cache_init(void) { + DRW_manager_begin_sync(); + DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) { if (data->text_draw_cache) { DRW_text_cache_destroy(data->text_draw_cache); @@ -1081,6 +1074,8 @@ static void drw_engines_cache_finish(void) engine->cache_finish(data); } } + + DRW_manager_end_sync(); } static void drw_engines_draw_scene(void) @@ -1225,6 +1220,31 @@ static void drw_engines_enable_editors(void) } } +static bool is_compositor_enabled(void) +{ + if (!U.experimental.use_realtime_compositor) { + return false; + } + + if (!(DST.draw_ctx.v3d->shading.flag & V3D_SHADING_COMPOSITOR)) { + return false; + } + + if (!(DST.draw_ctx.v3d->shading.type >= OB_MATERIAL)) { + return false; + } + + if (!DST.draw_ctx.scene->use_nodes) { + return false; + } + + if (!DST.draw_ctx.scene->nodetree) { + return false; + } + + return true; +} + static void drw_engines_enable(ViewLayer *UNUSED(view_layer), RenderEngineType *engine_type, bool gpencil_engine_needed) @@ -1237,6 +1257,11 @@ static void drw_engines_enable(ViewLayer *UNUSED(view_layer), if (gpencil_engine_needed && ((drawtype >= OB_SOLID) || !use_xray)) { use_drw_engine(&draw_engine_gpencil_type); } + + if (is_compositor_enabled()) { + use_drw_engine(&draw_engine_compositor_type); + } + drw_engines_enable_overlays(); #ifdef WITH_DRAW_DEBUG @@ -1299,13 +1324,14 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx) /* Reset before using it. */ drw_state_prepare_clean_for_draw(&DST); + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx = (DRWContextState){ .region = region, .rv3d = rv3d, .v3d = v3d, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .engine_type = engine_type, .depsgraph = depsgraph, .object_mode = OB_MODE_OBJECT, @@ -1323,11 +1349,7 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx) drw_engines_enable(view_layer, engine_type, gpencil_engine_needed); drw_engines_data_validate(); - DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) { - if (draw_engine->view_update) { - draw_engine->view_update(data); - } - } + DRW_view_data_engines_view_update(DST.view_data_active); drw_engines_disable(); } @@ -1356,13 +1378,14 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph, /* Reset before using it. */ drw_state_prepare_clean_for_draw(&DST); + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx = (DRWContextState){ .region = region, .rv3d = rv3d, .v3d = v3d, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .engine_type = engine_type, .depsgraph = depsgraph, }; @@ -1379,11 +1402,7 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph, drw_engines_enable(view_layer, engine_type, gpencil_engine_needed); drw_engines_data_validate(); - DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) { - if (draw_engine->view_update) { - draw_engine->view_update(data); - } - } + DRW_view_data_engines_view_update(DST.view_data_active); drw_engines_disable(); } @@ -1608,11 +1627,11 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph, GPUViewport *viewport, const bContext *evil_C) { - Scene *scene = DEG_get_evaluated_scene(depsgraph); ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph); RegionView3D *rv3d = region->regiondata; + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx.evil_C = evil_C; DST.draw_ctx = (DRWContextState){ .region = region, @@ -1620,7 +1639,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph, .v3d = v3d, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .engine_type = engine_type, .depsgraph = depsgraph, @@ -2127,12 +2146,13 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph, Scene *scene = DEG_get_evaluated_scene(depsgraph); ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph); + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx.evil_C = evil_C; DST.draw_ctx = (DRWContextState){ .region = region, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .depsgraph = depsgraph, .space_data = CTX_wm_space_data(evil_C), @@ -2333,7 +2353,9 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, Scene *scene = DEG_get_evaluated_scene(depsgraph); RenderEngineType *engine_type = ED_view3d_engine_type(scene, v3d->shading.type); ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph); - Object *obact = OBACT(view_layer); + + BKE_view_layer_synced_ensure(scene, view_layer); + Object *obact = BKE_view_layer_active_object_get(view_layer); Object *obedit = use_obedit_skip ? NULL : OBEDIT_FROM_OBACT(obact); #ifndef USE_GPU_SELECT UNUSED_VARS(scene, view_layer, v3d, region, rect); @@ -2442,7 +2464,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, drw_engines_world_update(scene); if (use_obedit) { - FOREACH_OBJECT_IN_MODE_BEGIN (view_layer, v3d, object_type, object_mode, ob_iter) { + FOREACH_OBJECT_IN_MODE_BEGIN (scene, view_layer, v3d, object_type, object_mode, ob_iter) { drw_engines_cache_populate(ob_iter); } FOREACH_OBJECT_IN_MODE_END; @@ -2463,7 +2485,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, } if (use_pose_exception && (ob->mode & OB_MODE_POSE)) { - if ((ob->base_flag & BASE_VISIBLE_VIEWLAYER) == 0) { + if ((ob->base_flag & BASE_ENABLED_AND_VISIBLE_IN_DEFAULT_VIEWPORT) == 0) { continue; } } @@ -2564,13 +2586,14 @@ static void drw_draw_depth_loop_impl(struct Depsgraph *depsgraph, DST.options.is_depth = true; /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */ + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx = (DRWContextState){ .region = region, .rv3d = rv3d, .v3d = v3d, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .engine_type = engine_type, .depsgraph = depsgraph, }; @@ -2683,7 +2706,7 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons GPUViewport *viewport = WM_draw_region_get_viewport(region); if (!viewport) { /* Selection engine requires a viewport. - * TODO(germano): This should be done internally in the engine. */ + * TODO(@germano): This should be done internally in the engine. */ sel_ctx->is_dirty = true; sel_ctx->objects_drawn_len = 0; sel_ctx->index_drawn_len = 1; @@ -2697,13 +2720,14 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons drw_state_prepare_clean_for_draw(&DST); /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */ + BKE_view_layer_synced_ensure(scene, view_layer); DST.draw_ctx = (DRWContextState){ .region = region, .rv3d = region->regiondata, .v3d = v3d, .scene = scene, .view_layer = view_layer, - .obact = OBACT(view_layer), + .obact = BKE_view_layer_active_object_get(view_layer), .depsgraph = depsgraph, }; drw_task_graph_init(); @@ -2959,6 +2983,7 @@ void DRW_engines_register(void) DRW_engine_register(&draw_engine_overlay_type); DRW_engine_register(&draw_engine_select_type); DRW_engine_register(&draw_engine_basic_type); + DRW_engine_register(&draw_engine_compositor_type); #ifdef WITH_DRAW_DEBUG DRW_engine_register(&draw_engine_debug_select_type); #endif @@ -2968,9 +2993,6 @@ void DRW_engines_register(void) /* setup callbacks */ { - BKE_mball_batch_cache_dirty_tag_cb = DRW_mball_batch_cache_dirty_tag; - BKE_mball_batch_cache_free_cb = DRW_mball_batch_cache_free; - BKE_curve_batch_cache_dirty_tag_cb = DRW_curve_batch_cache_dirty_tag; BKE_curve_batch_cache_free_cb = DRW_curve_batch_cache_free; @@ -3039,6 +3061,9 @@ void DRW_engines_free(void) DRW_stats_free(); DRW_globals_free(); + drw_debug_module_free(DST.debug); + DST.debug = NULL; + DRW_UBO_FREE_SAFE(G_draw.block_ubo); DRW_UBO_FREE_SAFE(G_draw.view_ubo); DRW_TEXTURE_FREE_SAFE(G_draw.ramp); diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc new file mode 100644 index 00000000000..169d86b2ea1 --- /dev/null +++ b/source/blender/draw/intern/draw_manager.cc @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "BKE_global.h" +#include "GPU_compute.h" + +#include "draw_debug.hh" +#include "draw_defines.h" +#include "draw_manager.h" +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader.h" + +namespace blender::draw { + +Manager::~Manager() +{ + for (GPUTexture *texture : acquired_textures) { + /* Decrease refcount and free if 0. */ + GPU_texture_free(texture); + } +} + +void Manager::begin_sync() +{ + /* TODO: This means the reference is kept until further redraw or manager tear-down. Instead, + * they should be released after each draw loop. But for now, mimics old DRW behavior. */ + for (GPUTexture *texture : acquired_textures) { + /* Decrease refcount and free if 0. */ + GPU_texture_free(texture); + } + + acquired_textures.clear(); + +#ifdef DEBUG + /* Detect uninitialized data. */ + memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data())); + memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data())); + memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data())); +#endif + resource_len_ = 0; + attribute_len_ = 0; + /* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */ + + object_active = DST.draw_ctx.obact; + + /* Init the 0 resource. */ + resource_handle(float4x4::identity()); +} + +void Manager::end_sync() +{ + GPU_debug_group_begin("Manager.end_sync"); + + matrix_buf.push_update(); + bounds_buf.push_update(); + infos_buf.push_update(); + attributes_buf.push_update(); + attributes_buf_legacy.push_update(); + + /* Useful for debugging the following resource finalize. But will trigger the drawing of the GPU + * debug draw/print buffers for every frame. Not nice for performance. */ + // debug_bind(); + + /* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */ + uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE); + GPUShader *shader = DRW_shader_draw_resource_finalize_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "resource_len", resource_len_); + GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf")); + GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf")); + GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf")); + GPU_compute_dispatch(shader, thread_groups, 1, 1); + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + + GPU_debug_group_end(); +} + +void Manager::debug_bind() +{ +#ifdef DEBUG + if (DST.debug == nullptr) { + return; + } + GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT); + GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT); +# ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER + /* Add a barrier to allow multiple shader writing to the same buffer. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); +# endif +#endif +} + +void Manager::resource_bind() +{ + GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT); + GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT); + GPU_storagebuf_bind(attributes_buf, DRW_OBJ_ATTR_SLOT); + /* 2 is the hardcoded location of the uniform attr UBO. */ + /* TODO(@fclem): Remove this workaround. */ + GPU_uniformbuf_bind(attributes_buf_legacy, 2); +} + +void Manager::submit(PassSimple &pass, View &view) +{ + view.bind(); + + debug_bind(); + + command::RecordingState state; + state.inverted_view = view.is_inverted(); + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_); + + resource_bind(); + + pass.submit(state); + + state.cleanup(); +} + +void Manager::submit(PassMain &pass, View &view) +{ + view.bind(); + + debug_bind(); + + bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d && + (DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0); + + view.compute_visibility(bounds_buf, resource_len_, freeze_culling); + + command::RecordingState state; + state.inverted_view = view.is_inverted(); + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_); + + resource_bind(); + + pass.submit(state); + + state.cleanup(); +} + +void Manager::submit(PassSortable &pass, View &view) +{ + pass.sort(); + + this->submit(static_cast<PassMain &>(pass), view); +} + +void Manager::submit(PassSimple &pass) +{ + debug_bind(); + + command::RecordingState state; + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_); + + resource_bind(); + + pass.submit(state); + + state.cleanup(); +} + +Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view) +{ + submit(pass, view); + + pass.draw_commands_buf_.resource_id_buf_.read(); + + Manager::SubmitDebugOutput output; + output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(), + pass.draw_commands_buf_.resource_id_count_}; + /* There is no visibility data for PassSimple. */ + output.visibility = {(uint *)view.visibility_buf_.data(), 0}; + return output; +} + +Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view) +{ + submit(pass, view); + + GPU_finish(); + + pass.draw_commands_buf_.resource_id_buf_.read(); + view.visibility_buf_.read(); + + Manager::SubmitDebugOutput output; + output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(), + pass.draw_commands_buf_.resource_id_count_}; + output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)}; + return output; +} + +Manager::DataDebugOutput Manager::data_debug() +{ + matrix_buf.read(); + bounds_buf.read(); + infos_buf.read(); + + Manager::DataDebugOutput output; + output.matrices = {matrix_buf.data(), resource_len_}; + output.bounds = {bounds_buf.data(), resource_len_}; + output.infos = {infos_buf.data(), resource_len_}; + return output; +} + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index 6d384c599d8..4f71e665390 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -188,6 +188,7 @@ typedef enum { DRW_CMD_DRAW_INSTANCE = 2, DRW_CMD_DRAW_INSTANCE_RANGE = 3, DRW_CMD_DRAW_PROCEDURAL = 4, + DRW_CMD_DRAW_INDIRECT = 5, /* Compute Commands. */ DRW_CMD_COMPUTE = 8, @@ -203,7 +204,7 @@ typedef enum { /* Needs to fit in 4bits */ } eDRWCommandType; -#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL +#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_INDIRECT typedef struct DRWCommandDraw { GPUBatch *batch; @@ -232,6 +233,12 @@ typedef struct DRWCommandDrawInstanceRange { uint inst_count; } DRWCommandDrawInstanceRange; +typedef struct DRWCommandDrawIndirect { + GPUBatch *batch; + DRWResourceHandle handle; + GPUStorageBuf *indirect_buf; +} DRWCommandDrawIndirect; + typedef struct DRWCommandCompute { int groups_x_len; int groups_y_len; @@ -286,6 +293,7 @@ typedef union DRWCommand { DRWCommandDrawInstance instance; DRWCommandDrawInstanceRange instance_range; DRWCommandDrawProcedural procedural; + DRWCommandDrawIndirect draw_indirect; DRWCommandCompute compute; DRWCommandComputeRef compute_ref; DRWCommandComputeIndirect compute_indirect; @@ -369,7 +377,7 @@ struct DRWUniform { /* DRW_UNIFORM_INT_COPY */ int ivalue[4]; /* DRW_UNIFORM_BLOCK_OBATTRS */ - struct GPUUniformAttrList *uniform_attrs; + const struct GPUUniformAttrList *uniform_attrs; }; int location; /* Uniform location or binding point for textures and UBO's. */ uint8_t type; /* #DRWUniformType */ @@ -395,7 +403,7 @@ struct DRWShadingGroup { DRWResourceHandle pass_handle; /* Memblock key to parent pass. */ /* Set of uniform attributes used by this shader. */ - struct GPUUniformAttrList *uniform_attrs; + const struct GPUUniformAttrList *uniform_attrs; }; /* This struct is used after cache populate if using the Z sorting. * It will not conflict with the above struct. */ @@ -493,20 +501,6 @@ typedef struct DRWCommandSmallChunk { BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16); #endif -/* ------------- DRAW DEBUG ------------ */ - -typedef struct DRWDebugLine { - struct DRWDebugLine *next; /* linked list */ - float pos[2][3]; - float color[4]; -} DRWDebugLine; - -typedef struct DRWDebugSphere { - struct DRWDebugSphere *next; /* linked list */ - float mat[4][4]; - float color[4]; -} DRWDebugSphere; - /* ------------- Memory Pools ------------ */ /* Contains memory pools information */ @@ -533,10 +527,12 @@ typedef struct DRWData { void *volume_grids_ubos; /* VolumeUniformBufPool */ /** List of smoke textures to free after drawing. */ ListBase smoke_textures; - /** Texture pool to reuse temp texture across engines. */ - /* TODO(@fclem): The pool could be shared even between view-ports. */ + /** + * Texture pool to reuse temp texture across engines. + * TODO(@fclem): The pool could be shared even between view-ports. + */ struct DRWTexturePool *texture_pool; - /** Per stereo view data. Contains engine data and default framebuffers. */ + /** Per stereo view data. Contains engine data and default frame-buffers. */ struct DRWViewData *view_data[2]; /** Per draw-call curves object data. */ struct CurvesUniformBufPool *curves_ubos; @@ -646,11 +642,7 @@ typedef struct DRWManager { GPUDrawList *draw_list; - struct { - /* TODO(@fclem): optimize: use chunks. */ - DRWDebugLine *lines; - DRWDebugSphere *spheres; - } debug; + DRWDebugModule *debug; } DRWManager; extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */ @@ -665,6 +657,9 @@ void drw_state_set(DRWState state); void drw_debug_draw(void); void drw_debug_init(void); +void drw_debug_module_free(DRWDebugModule *module); +GPUStorageBuf *drw_debug_gpu_draw_buf_get(void); +GPUStorageBuf *drw_debug_gpu_print_buf_get(void); eDRWCommandType command_type_get(const uint64_t *command_type_bits, int index); @@ -683,9 +678,10 @@ void drw_resource_buffer_finish(DRWData *vmempool); GPUBatch *drw_cache_procedural_points_get(void); GPUBatch *drw_cache_procedural_lines_get(void); GPUBatch *drw_cache_procedural_triangles_get(void); +GPUBatch *drw_cache_procedural_triangle_strips_get(void); void drw_uniform_attrs_pool_update(struct GHash *table, - struct GPUUniformAttrList *key, + const struct GPUUniformAttrList *key, DRWResourceHandle *handle, struct Object *ob, struct Object *dupli_parent, @@ -698,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_ void drw_engine_data_cache_release(GPUViewport *viewport); void drw_engine_data_free(GPUViewport *viewport); +void DRW_manager_begin_sync(void); +void DRW_manager_end_sync(void); + #ifdef __cplusplus } #endif diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh new file mode 100644 index 00000000000..fbd3d28d3f4 --- /dev/null +++ b/source/blender/draw/intern/draw_manager.hh @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * `draw::Manager` is the interface between scene data and viewport engines. + * + * It holds per component data (`ObjectInfo`, `ObjectMatrices`, ...) indexed per `ResourceHandle`. + * + * \note It is currently work in progress and should replace the old global draw manager. + */ + +#include "BLI_listbase_wrapper.hh" +#include "BLI_sys_types.h" +#include "GPU_material.h" + +#include "draw_resource.hh" +#include "draw_view.hh" + +#include <string> + +namespace blender::draw { + +/* Forward declarations. */ + +namespace detail { +template<typename T> class Pass; +} // namespace detail + +namespace command { +class DrawCommandBuf; +class DrawMultiBuf; +} // namespace command + +using PassSimple = detail::Pass<command::DrawCommandBuf>; +using PassMain = detail::Pass<command::DrawMultiBuf>; +class PassSortable; + +class Manager { + using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>; + using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>; + using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>; + using ObjectAttributeBuf = StorageArrayBuffer<ObjectAttribute, 128>; + /** + * TODO(@fclem): Remove once we get rid of old EEVEE code-base. + * `DRW_RESOURCE_CHUNK_LEN = 512`. + */ + using ObjectAttributeLegacyBuf = UniformArrayBuffer<float4, 8 * 512>; + + public: + struct SubmitDebugOutput { + /** Indexed by resource id. */ + Span<uint32_t> visibility; + /** Indexed by drawn instance. */ + Span<uint32_t> resource_id; + }; + + struct DataDebugOutput { + /** Indexed by resource id. */ + Span<ObjectMatrices> matrices; + /** Indexed by resource id. */ + Span<ObjectBounds> bounds; + /** Indexed by resource id. */ + Span<ObjectInfos> infos; + }; + + /** + * Buffers containing all object data. Referenced by resource index. + * Exposed as public members for shader access after sync. + */ + ObjectMatricesBuf matrix_buf; + ObjectBoundsBuf bounds_buf; + ObjectInfosBuf infos_buf; + + /** + * Object Attributes are reference by indirection data inside ObjectInfos. + * This is because attribute list is arbitrary. + */ + ObjectAttributeBuf attributes_buf; + /** + * TODO(@fclem): Remove once we get rid of old EEVEE code-base. + * Only here to satisfy bindings. + */ + ObjectAttributeLegacyBuf attributes_buf_legacy; + + /** + * List of textures coming from Image data-blocks. + * They need to be reference-counted in order to avoid being freed in another thread. + */ + Vector<GPUTexture *> acquired_textures; + + private: + /** Number of resource handle recorded. */ + uint resource_len_ = 0; + /** Number of object attribute recorded. */ + uint attribute_len_ = 0; + + Object *object_active = nullptr; + + public: + Manager(){}; + ~Manager(); + + /** + * Create a new resource handle for the given object. Can be called multiple time with the + * same object **successively** without duplicating the data. + */ + ResourceHandle resource_handle(const ObjectRef ref); + /** + * Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled + * and there won't be any associated object info / bounds. Assumes correct handedness / winding. + */ + ResourceHandle resource_handle(const float4x4 &model_matrix); + /** + * Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will + * be culled bute there won't be any associated object info / bounds. Assumes correct handedness + * / winding. + */ + ResourceHandle resource_handle(const float4x4 &model_matrix, + const float3 &bounds_center, + const float3 &bounds_half_extent); + + /** + * Populate additional per resource data on demand. + */ + void extract_object_attributes(ResourceHandle handle, + const ObjectRef &ref, + Span<GPUMaterial *> materials); + + /** + * Submit a pass for drawing. All resource reference will be dereferenced and commands will be + * sent to GPU. + */ + void submit(PassSimple &pass, View &view); + void submit(PassMain &pass, View &view); + void submit(PassSortable &pass, View &view); + /** + * Variant without any view. Must not contain any shader using `draw_view` create info. + */ + void submit(PassSimple &pass); + + /** + * Submit a pass for drawing but read back all data buffers for inspection. + */ + SubmitDebugOutput submit_debug(PassSimple &pass, View &view); + SubmitDebugOutput submit_debug(PassMain &pass, View &view); + + /** + * Check data buffers of the draw manager. Only to be used after end_sync(). + */ + DataDebugOutput data_debug(); + + /** + * Will acquire the texture using ref counting and release it after drawing. To be used for + * texture coming from blender Image. + */ + void acquire_texture(GPUTexture *texture) + { + GPU_texture_ref(texture); + acquired_textures.append(texture); + } + + /** TODO(fclem): The following should become private at some point. */ + void begin_sync(); + void end_sync(); + + void debug_bind(); + void resource_bind(); +}; + +inline ResourceHandle Manager::resource_handle(const ObjectRef ref) +{ + bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active; + matrix_buf.get_or_resize(resource_len_).sync(*ref.object); + bounds_buf.get_or_resize(resource_len_).sync(*ref.object); + infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object); + return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0); +} + +inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix) +{ + matrix_buf.get_or_resize(resource_len_).sync(model_matrix); + bounds_buf.get_or_resize(resource_len_).sync(); + infos_buf.get_or_resize(resource_len_).sync(); + return ResourceHandle(resource_len_++, false); +} + +inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix, + const float3 &bounds_center, + const float3 &bounds_half_extent) +{ + matrix_buf.get_or_resize(resource_len_).sync(model_matrix); + bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent); + infos_buf.get_or_resize(resource_len_).sync(); + return ResourceHandle(resource_len_++, false); +} + +inline void Manager::extract_object_attributes(ResourceHandle handle, + const ObjectRef &ref, + Span<GPUMaterial *> materials) +{ + ObjectInfos &infos = infos_buf.get_or_resize(handle.resource_index()); + infos.object_attrs_offset = attribute_len_; + + /* Simple cache solution to avoid duplicates. */ + Vector<uint32_t, 4> hash_cache; + + for (const GPUMaterial *mat : materials) { + const GPUUniformAttrList *attr_list = GPU_material_uniform_attributes(mat); + if (attr_list == nullptr) { + continue; + } + + LISTBASE_FOREACH (const GPUUniformAttr *, attr, &attr_list->list) { + /** WATCH: Linear Search. Avoid duplicate attributes across materials. */ + if ((mat != materials.first()) && (hash_cache.first_index_of_try(attr->hash_code) != -1)) { + /* Attribute has already been added to the attribute buffer by another material. */ + continue; + } + hash_cache.append(attr->hash_code); + if (attributes_buf.get_or_resize(attribute_len_).sync(ref, *attr)) { + infos.object_attrs_len++; + attribute_len_++; + } + } + } +} + +} // namespace blender::draw + +/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the + * callbacks. */ +blender::draw::Manager *DRW_manager_get(); +blender::draw::ObjectRef DRW_object_ref_get(Object *object); diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c index 188d9114cd7..c75049508f9 100644 --- a/source/blender/draw/intern/draw_manager_data.c +++ b/source/blender/draw/intern/draw_manager_data.c @@ -17,9 +17,14 @@ #include "BKE_pbvh.h" #include "BKE_volume.h" +/* For debug cursor position. */ +#include "WM_api.h" +#include "wm_window.h" + #include "DNA_curve_types.h" #include "DNA_mesh_types.h" #include "DNA_meta_types.h" +#include "DNA_screen_types.h" #include "BLI_alloca.h" #include "BLI_hash.h" @@ -39,6 +44,16 @@ #include "intern/gpu_codegen.h" +/** + * IMPORTANT: + * In order to be able to write to the same print buffer sequentially, we add a barrier to allow + * multiple shader calls writing to the same buffer. + * However, this adds explicit synchronization events which might change the rest of the + * application behavior and hide some bugs. If you know you are using shader debug print in only + * one shader pass, you can comment this out to remove the aforementioned barrier. + */ +#define DISABLE_DEBUG_SHADER_PRINT_BARRIER + /* -------------------------------------------------------------------- */ /** \name Uniform Buffer Object (DRW_uniformbuffer) * \{ */ @@ -878,6 +893,17 @@ static void drw_command_draw_procedural(DRWShadingGroup *shgroup, cmd->vert_count = vert_count; } +static void drw_command_draw_indirect(DRWShadingGroup *shgroup, + GPUBatch *batch, + DRWResourceHandle handle, + GPUStorageBuf *indirect_buf) +{ + DRWCommandDrawIndirect *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_INDIRECT); + cmd->batch = batch; + cmd->handle = handle; + cmd->indirect_buf = indirect_buf; +} + static void drw_command_set_select_id(DRWShadingGroup *shgroup, GPUVertBuf *buf, uint select_id) { /* Only one can be valid. */ @@ -1005,6 +1031,7 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf * drw_command_compute_indirect(shgroup, indirect_buf); } + void DRW_shgroup_barrier(DRWShadingGroup *shgroup, eGPUBarrier type) { BLI_assert(GPU_compute_shader_support()); @@ -1044,6 +1071,38 @@ void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *shgroup, Object *ob, drw_shgroup_call_procedural_add_ex(shgroup, geom, ob, tri_count * 3); } +void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup, + GPUPrimType primitive_type, + Object *ob, + GPUStorageBuf *indirect_buf) +{ + struct GPUBatch *geom = NULL; + switch (primitive_type) { + case GPU_PRIM_POINTS: + geom = drw_cache_procedural_points_get(); + break; + case GPU_PRIM_LINES: + geom = drw_cache_procedural_lines_get(); + break; + case GPU_PRIM_TRIS: + geom = drw_cache_procedural_triangles_get(); + break; + case GPU_PRIM_TRI_STRIP: + geom = drw_cache_procedural_triangle_strips_get(); + break; + default: + BLI_assert_msg(0, + "Unsupported primitive type in DRW_shgroup_call_procedural_indirect. Add new " + "one as needed."); + break; + } + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob); + drw_command_draw_indirect(shgroup, geom, handle, indirect_buf); +} + void DRW_shgroup_call_instances(DRWShadingGroup *shgroup, Object *ob, struct GPUBatch *geom, @@ -1129,16 +1188,15 @@ static void sculpt_draw_cb(DRWSculptCallbackData *scd, GPU_PBVH_Buffers *buffers DRW_shgroup_uniform_vec3( shgrp, "materialDiffuseColor", SCULPT_DEBUG_COLOR(scd->debug_node_nr++), 1); } + /* DRW_shgroup_call_no_cull reuses matrices calculations for all the drawcalls of this * object. */ DRW_shgroup_call_no_cull(shgrp, geom, scd->ob); } } -static void sculpt_debug_cb(void *user_data, - const float bmin[3], - const float bmax[3], - PBVHNodeFlags flag) +static void sculpt_debug_cb( + PBVHNode *node, void *user_data, const float bmin[3], const float bmax[3], PBVHNodeFlags flag) { int *debug_node_nr = (int *)user_data; BoundBox bb; @@ -1153,7 +1211,10 @@ static void sculpt_debug_cb(void *user_data, } #else /* Color coded leaf bounds. */ if (flag & PBVH_Leaf) { - DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR((*debug_node_nr)++)); + int color = (*debug_node_nr)++; + color += BKE_pbvh_debug_draw_gen_get(node); + + DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR(color)); } #endif } @@ -1246,8 +1307,8 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd) DRW_debug_modelmat(scd->ob->obmat); BKE_pbvh_draw_debug_cb( pbvh, - (void (*)( - void *d, const float min[3], const float max[3], PBVHNodeFlags f))sculpt_debug_cb, + (void (*)(PBVHNode * n, void *d, const float min[3], const float max[3], PBVHNodeFlags f)) + sculpt_debug_cb, &debug_node_nr); } } @@ -1466,6 +1527,27 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader) shgroup, view_ubo_location, DRW_UNIFORM_BLOCK, G_draw.view_ubo, 0, 0, 1); } +#ifdef DEBUG + int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT); + if (debug_print_location != -1) { + GPUStorageBuf *buf = drw_debug_gpu_print_buf_get(); + drw_shgroup_uniform_create_ex( + shgroup, debug_print_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1); +# ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER + /* Add a barrier to allow multiple shader writing to the same buffer. */ + DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE); +# endif + } + + int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS); + if (debug_draw_location != -1) { + GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get(); + drw_shgroup_uniform_create_ex( + shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1); + /* NOTE(fclem): No barrier as ordering is not important. */ + } +#endif + /* Not supported. */ BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV) == -1); BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW) == -1); @@ -1556,7 +1638,7 @@ void DRW_shgroup_add_material_resources(DRWShadingGroup *grp, struct GPUMaterial DRW_shgroup_uniform_block(grp, GPU_UBO_BLOCK_NAME, ubo); } - GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material); + const GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material); if (uattrs != NULL) { int loc = GPU_shader_get_uniform_block_binding(grp->shader, GPU_ATTRIBUTE_UBO_BLOCK_NAME); drw_shgroup_uniform_create_ex(grp, loc, DRW_UNIFORM_BLOCK_OBATTRS, uattrs, 0, 0, 1); @@ -1942,6 +2024,13 @@ DRWView *DRW_view_create(const float viewmat[4][4], copy_v4_fl4(view->storage.viewcamtexcofac, 1.0f, 1.0f, 0.0f, 0.0f); + if (DST.draw_ctx.evil_C && DST.draw_ctx.region) { + int region_origin[2] = {DST.draw_ctx.region->winrct.xmin, DST.draw_ctx.region->winrct.ymin}; + struct wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C); + wm_cursor_position_get(win, &view->storage.mouse_pixel[0], &view->storage.mouse_pixel[1]); + sub_v2_v2v2_int(view->storage.mouse_pixel, view->storage.mouse_pixel, region_origin); + } + DRW_view_update(view, viewmat, winmat, culling_viewmat, culling_winmat); return view; @@ -2041,6 +2130,14 @@ void DRW_view_update(DRWView *view, draw_frustum_bound_sphere_calc( &view->frustum_corners, viewinv, winmat, wininv, &view->frustum_bsphere); + /* TODO(fclem): Deduplicate. */ + for (int i = 0; i < 8; i++) { + copy_v3_v3(view->storage.frustum_corners[i], view->frustum_corners.vec[i]); + } + for (int i = 0; i < 6; i++) { + copy_v4_v4(view->storage.frustum_planes[i], view->frustum_planes[i]); + } + #ifdef DRW_DEBUG_CULLING if (G.debug_value != 0) { DRW_debug_sphere( diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c index e7e0e0ce41f..0e39cc1d3b9 100644 --- a/source/blender/draw/intern/draw_manager_exec.c +++ b/source/blender/draw/intern/draw_manager_exec.c @@ -318,6 +318,7 @@ void DRW_state_reset(void) DRW_state_reset_ex(DRW_STATE_DEFAULT); GPU_texture_unbind_all(); + GPU_texture_image_unbind_all(); GPU_uniformbuf_unbind_all(); GPU_storagebuf_unbind_all(); @@ -874,6 +875,25 @@ static void draw_call_single_do(DRWShadingGroup *shgroup, state->baseinst_loc); } +/* Not to be mistaken with draw_indirect_call which does batch many drawcalls together. This one + * only execute an indirect drawcall with user indirect buffer. */ +static void draw_call_indirect(DRWShadingGroup *shgroup, + DRWCommandsState *state, + GPUBatch *batch, + DRWResourceHandle handle, + GPUStorageBuf *indirect_buf) +{ + draw_call_batching_flush(shgroup, state); + draw_call_resource_bind(state, &handle); + + if (G.f & G_FLAG_PICKSEL) { + GPU_select_load_id(state->select_id); + } + + GPU_batch_set_shader(batch, shgroup->shader); + GPU_batch_draw_indirect(batch, indirect_buf, 0); +} + static void draw_call_batching_start(DRWCommandsState *state) { state->neg_scale = false; @@ -970,6 +990,7 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) /* Unbinding can be costly. Skip in normal condition. */ if (G.debug & G_DEBUG_GPU) { GPU_texture_unbind_all(); + GPU_texture_image_unbind_all(); GPU_uniformbuf_unbind_all(); GPU_storagebuf_unbind_all(); } @@ -996,12 +1017,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) while ((cmd = draw_command_iter_step(&iter, &cmd_type))) { switch (cmd_type) { + case DRW_CMD_DRAW_PROCEDURAL: case DRW_CMD_DRWSTATE: case DRW_CMD_STENCIL: draw_call_batching_flush(shgroup, &state); break; case DRW_CMD_DRAW: - case DRW_CMD_DRAW_PROCEDURAL: + case DRW_CMD_DRAW_INDIRECT: case DRW_CMD_DRAW_INSTANCE: if (draw_call_is_culled(&cmd->instance.handle, DST.view_active)) { continue; @@ -1055,6 +1077,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) 1, true); break; + case DRW_CMD_DRAW_INDIRECT: + draw_call_indirect(shgroup, + &state, + cmd->draw_indirect.batch, + cmd->draw_indirect.handle, + cmd->draw_indirect.indirect_buf); + break; case DRW_CMD_DRAW_INSTANCE: draw_call_single_do(shgroup, &state, diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c index 4bc3898c5e7..1ada99093c6 100644 --- a/source/blender/draw/intern/draw_manager_shader.c +++ b/source/blender/draw/intern/draw_manager_shader.c @@ -297,6 +297,18 @@ GPUShader *DRW_shader_create_with_lib_ex(const char *vert, return sh; } +GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp, + const DRWShaderLibrary *lib, + const char *defines, + const char *name) +{ + char *comp_with_lib = DRW_shader_library_create_shader_string(lib, comp); + GPUShader *sh = GPU_shader_create_compute(comp_with_lib, NULL, defines, name); + MEM_SAFE_FREE(comp_with_lib); + + return sh; +} + GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert, const char *geom, const char *frag, diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh new file mode 100644 index 00000000000..e1a0a6652ac --- /dev/null +++ b/source/blender/draw/intern/draw_pass.hh @@ -0,0 +1,1005 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Passes record draw commands. Commands are executed only when a pass is submitted for execution. + * + * `PassMain`: + * Should be used on heavy load passes such as ones that may contain scene objects. Draw call + * submission is optimized for large number of draw calls. But has a significant overhead per + * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of + * commands. \note The draw call order inside a batch of multiple draw with the exact same state is + * not guaranteed and is not even deterministic. Use a #PassSimple or #PassSortable if ordering is + * needed. \note As of now, it is also quite limited in the type of draw command it can record + * (no custom vertex count, no custom first vertex). + * + * `PassSimple`: + * Does not have the overhead of #PassMain but does not have the culling and batching optimization. + * It should be used for passes that needs a few commands or that needs guaranteed draw call order. + * + * `Pass<T>::Sub`: + * A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub() + * and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference + * is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are + * inserted inside the parent #Pass where the sub have been created during submission. + * + * `PassSortable`: + * This is a sort of `PassMain` augmented with a per sub-pass sorting value. They can't directly + * contain draw command, everything needs to be inside sub-passes. Sub-passes are automatically + * sorted before submission. + * + * \note A pass can be recorded once and resubmitted any number of time. This can be a good + * optimization for passes that are always the same for each frame. The only thing to be aware of + * is the life time of external resources. If a pass contains draw-calls with non default + * #ResourceHandle (not 0) or a reference to any non static resources + * (#GPUBatch, #PushConstant ref, #ResourceBind ref) it will have to be re-recorded + * if any of these reference becomes invalid. + */ + +#include "BKE_image.h" +#include "BLI_vector.hh" +#include "DRW_gpu_wrapper.hh" +#include "GPU_debug.h" +#include "GPU_material.h" + +#include "draw_command.hh" +#include "draw_handle.hh" +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader_shared.h" +#include "draw_state.h" + +#include "intern/gpu_codegen.h" + +namespace blender::draw { + +using namespace blender::draw; +using namespace blender::draw::command; + +class Manager; + +/* -------------------------------------------------------------------- */ +/** \name Pass API + * \{ */ + +namespace detail { + +/** + * Special container that never moves allocated items and has fast indexing. + */ +template<typename T, + /** Numbers of element of type T to allocate together. */ + int64_t block_size = 16> +class SubPassVector { + private: + Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_; + + public: + void clear() + { + blocks_.clear(); + } + + int64_t append_and_get_index(T &&elem) + { + /* Do not go over the inline size so that existing members never move. */ + if (blocks_.is_empty() || blocks_.last()->size() == block_size) { + blocks_.append(std::make_unique<Vector<T, block_size>>()); + } + return blocks_.last()->append_and_get_index(std::move(elem)) + + (blocks_.size() - 1) * block_size; + } + + T &operator[](int64_t index) + { + return (*blocks_[index / block_size])[index % block_size]; + } + + const T &operator[](int64_t index) const + { + return (*blocks_[index / block_size])[index % block_size]; + } +}; + +/** + * Public API of a draw pass. + */ +template< + /** Type of command buffer used to create the draw calls. */ + typename DrawCommandBufType> +class PassBase { + friend Manager; + + /** Will use texture own sampler state. */ + static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX; + + protected: + /** Highest level of the command stream. Split command stream in different command types. */ + Vector<command::Header, 0> headers_; + /** Commands referenced by headers (which contains their types). */ + Vector<command::Undetermined, 0> commands_; + /* Reference to draw commands buffer. Either own or from parent pass. */ + DrawCommandBufType &draw_commands_buf_; + /* Reference to sub-pass commands buffer. Either own or from parent pass. */ + SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_; + /** Currently bound shader. Used for interface queries. */ + GPUShader *shader_; + + public: + const char *debug_name; + + PassBase(const char *name, + DrawCommandBufType &draw_command_buf, + SubPassVector<PassBase<DrawCommandBufType>> &sub_passes, + GPUShader *shader = nullptr) + : draw_commands_buf_(draw_command_buf), + sub_passes_(sub_passes), + shader_(shader), + debug_name(name){}; + + /** + * Reset the pass command pool. + * \note Implemented in derived class. Not a virtual function to avoid indirection. Here only for + * API readability listing. + */ + void init(); + + /** + * Create a sub-pass inside this pass. + */ + PassBase<DrawCommandBufType> &sub(const char *name); + + /** + * Changes the fixed function pipeline state. + * Starts as DRW_STATE_NO_DRAW at the start of a Pass submission. + * SubPass inherit previous pass state. + * + * IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil() + * to ensure correct behavior of stencil aware draws. + */ + void state_set(DRWState state); + + /** + * Clear the current frame-buffer. + */ + void clear_color(float4 color); + void clear_depth(float depth); + void clear_stencil(uint8_t stencil); + void clear_depth_stencil(float depth, uint8_t stencil); + void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil); + + /** + * Reminders: + * - (compare_mask & reference) is what is tested against (compare_mask & stencil_value) + * stencil_value being the value stored in the stencil buffer. + * - (write-mask & reference) is what gets written if the test condition is fulfilled. + */ + void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask); + + /** + * Bind a shader. Any following bind() or push_constant() call will use its interface. + */ + void shader_set(GPUShader *shader); + + /** + * Bind a material shader along with its associated resources. Any following bind() or + * push_constant() call will use its interface. + * IMPORTANT: Assumes material is compiled and can be used (no compilation error). + */ + void material_set(Manager &manager, GPUMaterial *material); + + /** + * Record a draw call. + * \note Setting the count or first to -1 will use the values from the batch. + * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded. + */ + void draw(GPUBatch *batch, + uint instance_len = -1, + uint vertex_len = -1, + uint vertex_first = -1, + ResourceHandle handle = {0}); + + /** + * Shorter version for the common case. + * \note Implemented in derived class. Not a virtual function to avoid indirection. + */ + void draw(GPUBatch *batch, ResourceHandle handle); + + /** + * Record a procedural draw call. Geometry is **NOT** source from a GPUBatch. + * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded. + */ + void draw_procedural(GPUPrimType primitive, + uint instance_len, + uint vertex_len, + uint vertex_first = -1, + ResourceHandle handle = {0}); + + /** + * Indirect variants. + * \note If needed, the resource id need to also be set accordingly in the DrawCommand. + */ + void draw_indirect(GPUBatch *batch, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle = {0}); + void draw_procedural_indirect(GPUPrimType primitive, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle = {0}); + + /** + * Record a compute dispatch call. + */ + void dispatch(int3 group_len); + void dispatch(int3 *group_len); + void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer); + + /** + * Record a barrier call to synchronize arbitrary load/store operation between draw calls. + */ + void barrier(eGPUBarrier type); + + /** + * Bind a shader resource. + * + * Reference versions are to be used when the resource might be resize / realloc or even change + * between the time it is referenced and the time it is dereferenced for drawing. + * + * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data + * still alive until pass submission. + * + * \note Variations using slot will not query a shader interface and can be used before + * binding a shader. + */ + void bind_image(const char *name, GPUTexture *image); + void bind_image(const char *name, GPUTexture **image); + void bind_image(int slot, GPUTexture *image); + void bind_image(int slot, GPUTexture **image); + void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto); + void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto); + void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto); + void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto); + void bind_ssbo(const char *name, GPUStorageBuf *buffer); + void bind_ssbo(const char *name, GPUStorageBuf **buffer); + void bind_ssbo(int slot, GPUStorageBuf *buffer); + void bind_ssbo(int slot, GPUStorageBuf **buffer); + void bind_ubo(const char *name, GPUUniformBuf *buffer); + void bind_ubo(const char *name, GPUUniformBuf **buffer); + void bind_ubo(int slot, GPUUniformBuf *buffer); + void bind_ubo(int slot, GPUUniformBuf **buffer); + + /** + * Update a shader constant. + * + * Reference versions are to be used when the resource might change between the time it is + * referenced and the time it is dereferenced for drawing. + * + * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data + * still alive until pass submission. + * + * \note bool reference version is expected to take bool1 reference which is aliased to int. + */ + void push_constant(const char *name, const float &data); + void push_constant(const char *name, const float2 &data); + void push_constant(const char *name, const float3 &data); + void push_constant(const char *name, const float4 &data); + void push_constant(const char *name, const int &data); + void push_constant(const char *name, const int2 &data); + void push_constant(const char *name, const int3 &data); + void push_constant(const char *name, const int4 &data); + void push_constant(const char *name, const bool &data); + void push_constant(const char *name, const float4x4 &data); + void push_constant(const char *name, const float *data, int array_len = 1); + void push_constant(const char *name, const float2 *data, int array_len = 1); + void push_constant(const char *name, const float3 *data, int array_len = 1); + void push_constant(const char *name, const float4 *data, int array_len = 1); + void push_constant(const char *name, const int *data, int array_len = 1); + void push_constant(const char *name, const int2 *data, int array_len = 1); + void push_constant(const char *name, const int3 *data, int array_len = 1); + void push_constant(const char *name, const int4 *data, int array_len = 1); + void push_constant(const char *name, const float4x4 *data); + + /** + * Turn the pass into a string for inspection. + */ + std::string serialize(std::string line_prefix = "") const; + + friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass) + { + return stream << pass.serialize(); + } + + protected: + /** + * Internal Helpers + */ + + int push_constant_offset(const char *name); + + void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil); + + GPUBatch *procedural_batch_get(GPUPrimType primitive); + + /** + * Return a new command recorded with the given type. + */ + command::Undetermined &create_command(command::Type type); + + void submit(command::RecordingState &state) const; +}; + +template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> { + public: + using Sub = detail::PassBase<DrawCommandBufType>; + + private: + /** Sub-passes referenced by headers. */ + SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_; + /** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */ + DrawCommandBufType draw_commands_buf_main_; + + public: + Pass(const char *name) + : detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){}; + + void init() + { + this->headers_.clear(); + this->commands_.clear(); + this->sub_passes_.clear(); + this->draw_commands_buf_.clear(); + } +}; // namespace blender::draw + +} // namespace detail + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Pass types + * \{ */ + +/** + * Normal pass type. No visibility or draw-call optimization. + */ +// using PassSimple = detail::Pass<DrawCommandBuf>; + +/** + * Main pass type. + * Optimized for many draw calls and sub-pass. + * + * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially + * high overhead due to batching and culling optimizations. + */ +// using PassMain = detail::Pass<DrawMultiBuf>; + +/** + * Special pass type for rendering transparent objects. + * The base level can only be composed of sub passes that will be ordered by a sorting value. + */ +class PassSortable : public PassMain { + friend Manager; + + private: + /** Sorting value associated with each sub pass. */ + Vector<float> sorting_values_; + + bool sorted_ = false; + + public: + PassSortable(const char *name_) : PassMain(name_){}; + + void init() + { + sorting_values_.clear(); + sorted_ = false; + PassMain::init(); + } + + PassMain::Sub &sub(const char *name, float sorting_value) + { + int64_t index = sub_passes_.append_and_get_index( + PassBase(name, draw_commands_buf_, sub_passes_, shader_)); + headers_.append({Type::SubPass, static_cast<uint>(index)}); + sorting_values_.append(sorting_value); + return sub_passes_[index]; + } + + std::string serialize(std::string line_prefix = "") const + { + if (sorted_ == false) { + const_cast<PassSortable *>(this)->sort(); + } + return PassMain::serialize(line_prefix); + } + + protected: + void sort() + { + if (sorted_ == false) { + std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) { + BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass); + float a_val = sorting_values_[a.index]; + float b_val = sorting_values_[b.index]; + return a_val < b_val || (a_val == b_val && a.index < b.index); + }); + sorted_ = true; + } + } +}; + +/** \} */ + +namespace detail { + +/* -------------------------------------------------------------------- */ +/** \name PassBase Implementation + * \{ */ + +template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type) +{ + int64_t index = commands_.append_and_get_index({}); + headers_.append({type, static_cast<uint>(index)}); + return commands_[index]; +} + +template<class T> +inline void PassBase<T>::clear(eGPUFrameBufferBits planes, + float4 color, + float depth, + uint8_t stencil) +{ + create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color}; +} + +template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive) +{ + switch (primitive) { + case GPU_PRIM_POINTS: + return drw_cache_procedural_points_get(); + case GPU_PRIM_LINES: + return drw_cache_procedural_lines_get(); + case GPU_PRIM_TRIS: + return drw_cache_procedural_triangles_get(); + case GPU_PRIM_TRI_STRIP: + return drw_cache_procedural_triangle_strips_get(); + default: + /* Add new one as needed. */ + BLI_assert_unreachable(); + return nullptr; + } +} + +template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name) +{ + int64_t index = sub_passes_.append_and_get_index( + PassBase(name, draw_commands_buf_, sub_passes_, shader_)); + headers_.append({command::Type::SubPass, static_cast<uint>(index)}); + return sub_passes_[index]; +} + +template<class T> void PassBase<T>::submit(command::RecordingState &state) const +{ + GPU_debug_group_begin(debug_name); + + for (const command::Header &header : headers_) { + switch (header.type) { + default: + case Type::None: + break; + case Type::SubPass: + sub_passes_[header.index].submit(state); + break; + case command::Type::ShaderBind: + commands_[header.index].shader_bind.execute(state); + break; + case command::Type::ResourceBind: + commands_[header.index].resource_bind.execute(); + break; + case command::Type::PushConstant: + commands_[header.index].push_constant.execute(state); + break; + case command::Type::Draw: + commands_[header.index].draw.execute(state); + break; + case command::Type::DrawMulti: + commands_[header.index].draw_multi.execute(state); + break; + case command::Type::DrawIndirect: + commands_[header.index].draw_indirect.execute(state); + break; + case command::Type::Dispatch: + commands_[header.index].dispatch.execute(state); + break; + case command::Type::DispatchIndirect: + commands_[header.index].dispatch_indirect.execute(state); + break; + case command::Type::Barrier: + commands_[header.index].barrier.execute(); + break; + case command::Type::Clear: + commands_[header.index].clear.execute(); + break; + case command::Type::StateSet: + commands_[header.index].state_set.execute(state); + break; + case command::Type::StencilSet: + commands_[header.index].stencil_set.execute(); + break; + } + } + + GPU_debug_group_end(); +} + +template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const +{ + std::stringstream ss; + ss << line_prefix << "." << debug_name << std::endl; + line_prefix += " "; + for (const command::Header &header : headers_) { + switch (header.type) { + default: + case Type::None: + break; + case Type::SubPass: + ss << sub_passes_[header.index].serialize(line_prefix); + break; + case Type::ShaderBind: + ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl; + break; + case Type::ResourceBind: + ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl; + break; + case Type::PushConstant: + ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl; + break; + case Type::Draw: + ss << line_prefix << commands_[header.index].draw.serialize() << std::endl; + break; + case Type::DrawMulti: + ss << commands_[header.index].draw_multi.serialize(line_prefix); + break; + case Type::DrawIndirect: + ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl; + break; + case Type::Dispatch: + ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl; + break; + case Type::DispatchIndirect: + ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl; + break; + case Type::Barrier: + ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl; + break; + case Type::Clear: + ss << line_prefix << commands_[header.index].clear.serialize() << std::endl; + break; + case Type::StateSet: + ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl; + break; + case Type::StencilSet: + ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl; + break; + } + } + return ss.str(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw calls + * \{ */ + +template<class T> +inline void PassBase<T>::draw( + GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle) +{ + if (instance_len == 0 || vertex_len == 0) { + return; + } + BLI_assert(shader_); + draw_commands_buf_.append_draw( + headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle); +} + +template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle) +{ + this->draw(batch, -1, -1, -1, handle); +} + +template<class T> +inline void PassBase<T>::draw_procedural(GPUPrimType primitive, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) +{ + this->draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Indirect draw calls + * \{ */ + +template<class T> +inline void PassBase<T>::draw_indirect(GPUBatch *batch, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle) +{ + BLI_assert(shader_); + create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle}; +} + +template<class T> +inline void PassBase<T>::draw_procedural_indirect( + GPUPrimType primitive, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle) +{ + this->draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Compute Dispatch Implementation + * \{ */ + +template<class T> inline void PassBase<T>::dispatch(int3 group_len) +{ + BLI_assert(shader_); + create_command(Type::Dispatch).dispatch = {group_len}; +} + +template<class T> inline void PassBase<T>::dispatch(int3 *group_len) +{ + BLI_assert(shader_); + create_command(Type::Dispatch).dispatch = {group_len}; +} + +template<class T> +inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer) +{ + BLI_assert(shader_); + create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Clear Implementation + * \{ */ + +template<class T> inline void PassBase<T>::clear_color(float4 color) +{ + this->clear(GPU_COLOR_BIT, color, 0.0f, 0); +} + +template<class T> inline void PassBase<T>::clear_depth(float depth) +{ + this->clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0); +} + +template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil) +{ + this->clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil); +} + +template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil) +{ + this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil); +} + +template<class T> +inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil) +{ + this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Barrier Implementation + * \{ */ + +template<class T> inline void PassBase<T>::barrier(eGPUBarrier type) +{ + create_command(Type::Barrier).barrier = {type}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name State Implementation + * \{ */ + +template<class T> inline void PassBase<T>::state_set(DRWState state) +{ + create_command(Type::StateSet).state_set = {state}; +} + +template<class T> +inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask) +{ + create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask}; +} + +template<class T> inline void PassBase<T>::shader_set(GPUShader *shader) +{ + shader_ = shader; + create_command(Type::ShaderBind).shader_bind = {shader}; +} + +template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material) +{ + GPUPass *gpupass = GPU_material_get_pass(material); + shader_set(GPU_pass_shader_get(gpupass)); + + /* Bind all textures needed by the material. */ + ListBase textures = GPU_material_textures(material); + for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) { + if (tex->ima) { + /* Image */ + ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr; + if (tex->tiled_mapping_name[0]) { + GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr); + manager.acquire_texture(tiles); + bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state); + + GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr); + manager.acquire_texture(tile_map); + bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state); + } + else { + GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr); + manager.acquire_texture(texture); + bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state); + } + } + else if (tex->colorband) { + /* Color Ramp */ + bind_texture(tex->sampler_name, *tex->colorband); + } + } + + GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material); + if (ubo != nullptr) { + bind_ubo(GPU_UBO_BLOCK_NAME, ubo); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Resource bind Implementation + * \{ */ + +template<class T> inline int PassBase<T>::push_constant_offset(const char *name) +{ + return GPU_shader_get_uniform(shader_, name); +} + +template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer) +{ + this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer); +} + +template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer) +{ + this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer); +} + +template<class T> +inline void PassBase<T>::bind_texture(const char *name, + GPUTexture *texture, + eGPUSamplerState state) +{ + this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state); +} + +template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image) +{ + this->bind_image(GPU_shader_get_texture_binding(shader_, name), image); +} + +template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> +inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state) +{ + create_command(Type::ResourceBind).resource_bind = {slot, texture, state}; +} + +template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image) +{ + create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)}; +} + +template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer) +{ + this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer); +} + +template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer) +{ + this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer); +} + +template<class T> +inline void PassBase<T>::bind_texture(const char *name, + GPUTexture **texture, + eGPUSamplerState state) +{ + this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state); +} + +template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image) +{ + this->bind_image(GPU_shader_get_texture_binding(shader_, name), image); +} + +template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer) +{ + + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> +inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state) +{ + create_command(Type::ResourceBind).resource_bind = {slot, texture, state}; +} + +template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image) +{ + create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Push Constant Implementation + * \{ */ + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data) +{ + /* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4. + * This assumes that all commands are always stored in flat array of memory. */ + Undetermined commands[3]; + + PushConstant &cmd = commands[0].push_constant; + cmd.location = push_constant_offset(name); + cmd.array_len = 1; + cmd.comp_len = 16; + cmd.type = PushConstant::Type::FloatValue; + /* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */ + *reinterpret_cast<float4x4 *>(&cmd.float4_value) = data; + + create_command(Type::PushConstant) = commands[0]; + create_command(Type::None) = commands[1]; + create_command(Type::None) = commands[2]; +} + +/** \} */ + +} // namespace detail + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_resource.cc b/source/blender/draw/intern/draw_resource.cc new file mode 100644 index 00000000000..689df4edb31 --- /dev/null +++ b/source/blender/draw/intern/draw_resource.cc @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "DNA_particle_types.h" +#include "RNA_access.h" +#include "RNA_path.h" +#include "RNA_types.h" + +#include "draw_handle.hh" +#include "draw_manager.hh" +#include "draw_shader_shared.h" + +/* -------------------------------------------------------------------- */ +/** \name ObjectAttributes + * \{ */ + +/** + * Extract object attribute from RNA property. + * Returns true if the attribute was correctly extracted. + * This function mirrors lookup_property in cycles/blender/blender_object.cpp + */ +bool ObjectAttribute::id_property_lookup(ID *id, const char *name) +{ + PointerRNA ptr, id_ptr; + PropertyRNA *prop; + + if (id == nullptr) { + return false; + } + + RNA_id_pointer_create(id, &id_ptr); + + if (!RNA_path_resolve(&id_ptr, name, &ptr, &prop)) { + return false; + } + + if (prop == nullptr) { + return false; + } + + PropertyType type = RNA_property_type(prop); + int array_len = RNA_property_array_length(&ptr, prop); + + if (array_len == 0) { + float value; + + if (type == PROP_FLOAT) { + value = RNA_property_float_get(&ptr, prop); + } + else if (type == PROP_INT) { + value = RNA_property_int_get(&ptr, prop); + } + else { + return false; + } + + *reinterpret_cast<float4 *>(&data_x) = float4(value, value, value, 1.0f); + return true; + } + + if (type == PROP_FLOAT && array_len <= 4) { + *reinterpret_cast<float4 *>(&data_x) = float4(0.0f, 0.0f, 0.0f, 1.0f); + RNA_property_float_get_array(&ptr, prop, &data_x); + return true; + } + return false; +} + +/** + * Go through all possible source of the given object uniform attribute. + * Returns true if the attribute was correctly filled. + * This function mirrors lookup_instance_property in cycles/blender/blender_object.cpp + */ +bool ObjectAttribute::sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr) +{ + hash_code = attr.hash_code; + + /* If requesting instance data, check the parent particle system and object. */ + if (attr.use_dupli) { + if ((ref.dupli_object != nullptr) && (ref.dupli_object->particle_system != nullptr)) { + ParticleSettings *settings = ref.dupli_object->particle_system->part; + if (this->id_property_lookup((ID *)settings, attr.name_id_prop) || + this->id_property_lookup((ID *)settings, attr.name)) { + return true; + } + } + if (this->id_property_lookup((ID *)ref.dupli_parent, attr.name_id_prop) || + this->id_property_lookup((ID *)ref.dupli_parent, attr.name)) { + return true; + } + } + + /* Check the object and mesh. */ + if (ref.object != nullptr) { + if (this->id_property_lookup((ID *)ref.object, attr.name_id_prop) || + this->id_property_lookup((ID *)ref.object, attr.name) || + this->id_property_lookup((ID *)ref.object->data, attr.name_id_prop) || + this->id_property_lookup((ID *)ref.object->data, attr.name)) { + return true; + } + } + return false; +} + +/** \} */ diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh new file mode 100644 index 00000000000..2df38e32ed2 --- /dev/null +++ b/source/blender/draw/intern/draw_resource.hh @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Component / Object level resources like object attributes, matrices, visibility etc... + * Each of them are reference by resource index (#ResourceHandle). + */ + +#include "BKE_curve.h" +#include "BKE_duplilist.h" +#include "BKE_mesh.h" +#include "BKE_object.h" +#include "BKE_volume.h" +#include "BLI_hash.h" +#include "DNA_curve_types.h" +#include "DNA_layer_types.h" +#include "DNA_meta_types.h" +#include "DNA_object_types.h" + +#include "draw_handle.hh" +#include "draw_manager.hh" +#include "draw_shader_shared.h" + +/* -------------------------------------------------------------------- */ +/** \name ObjectMatrices + * \{ */ + +inline void ObjectMatrices::sync(const Object &object) +{ + model = object.obmat; + model_inverse = object.imat; +} + +inline void ObjectMatrices::sync(const float4x4 &model_matrix) +{ + model = model_matrix; + model_inverse = model_matrix.inverted(); +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices) +{ + stream << "ObjectMatrices(" << std::endl; + stream << "model=" << matrices.model << ", " << std::endl; + stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl; + return stream; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name ObjectInfos + * \{ */ + +ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE) + +inline void ObjectInfos::sync() +{ + object_attrs_len = 0; + object_attrs_offset = 0; + + flag = eObjectInfoFlag::OBJECT_NO_INFO; +} + +inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object) +{ + object_attrs_len = 0; + object_attrs_offset = 0; + + color = ref.object->color; + index = ref.object->index; + SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET); + SET_FLAG_FROM_TEST( + flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE); + + if (ref.dupli_object == nullptr) { + /* TODO(fclem): this is rather costly to do at draw time. Maybe we can + * put it in ob->runtime and make depsgraph ensure it is up to date. */ + random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) * + (1.0f / (float)0xFFFFFFFF); + } + else { + random = ref.dupli_object->random_id * (1.0f / (float)0xFFFFFFFF); + } + /* Default values. Set if needed. */ + random = 0.0f; + + if (ref.object->data == nullptr) { + orco_add = float3(0.0f); + orco_mul = float3(1.0f); + return; + } + + switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) { + case ID_VO: { + BoundBox &bbox = *BKE_volume_boundbox_get(ref.object); + orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */ + orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]); /* Size. */ + break; + } + case ID_ME: { + BKE_mesh_texspace_get(static_cast<Mesh *>(ref.object->data), orco_add, orco_mul); + break; + } + case ID_CU_LEGACY: { + Curve &cu = *static_cast<Curve *>(ref.object->data); + BKE_curve_texspace_ensure(&cu); + orco_add = cu.loc; + orco_mul = cu.size; + break; + } + case ID_MB: { + MetaBall &mb = *static_cast<MetaBall *>(ref.object->data); + orco_add = mb.loc; + orco_mul = mb.size; + break; + } + default: + orco_add = float3(0.0f); + orco_mul = float3(1.0f); + break; + } +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos) +{ + stream << "ObjectInfos("; + if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) { + stream << "skipped)" << std::endl; + return stream; + } + stream << "orco_add=" << infos.orco_add << ", "; + stream << "orco_mul=" << infos.orco_mul << ", "; + stream << "color=" << infos.color << ", "; + stream << "index=" << infos.index << ", "; + stream << "random=" << infos.random << ", "; + stream << "flag=" << infos.flag << ")" << std::endl; + return stream; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name ObjectBounds + * \{ */ + +inline void ObjectBounds::sync() +{ + bounding_sphere.w = -1.0f; /* Disable test. */ +} + +inline void ObjectBounds::sync(Object &ob) +{ + const BoundBox *bbox = BKE_object_boundbox_get(&ob); + if (bbox == nullptr) { + bounding_sphere.w = -1.0f; /* Disable test. */ + return; + } + *reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0]; + *reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4]; + *reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3]; + *reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1]; + bounding_sphere.w = 0.0f; /* Enable test. */ +} + +inline void ObjectBounds::sync(const float3 ¢er, const float3 &size) +{ + *reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size; + *reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z); + *reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z); + *reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z); + bounding_sphere.w = 0.0; /* Enable test. */ +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds) +{ + stream << "ObjectBounds("; + if (bounds.bounding_sphere.w == -1.0f) { + stream << "skipped)" << std::endl; + return stream; + } + stream << std::endl; + stream << ".bounding_corners[0]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl; + stream << ".bounding_corners[1]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl; + stream << ".bounding_corners[2]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl; + stream << ".bounding_corners[3]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl; + stream << ".sphere=(pos=" << float3(bounds.bounding_sphere) + << ", rad=" << bounds.bounding_sphere.w << std::endl; + stream << ")" << std::endl; + return stream; +} + +/** \} */ diff --git a/source/blender/draw/intern/draw_shader.cc b/source/blender/draw/intern/draw_shader.cc index 001ceb0ae8d..960348b4a94 100644 --- a/source/blender/draw/intern/draw_shader.cc +++ b/source/blender/draw/intern/draw_shader.cc @@ -17,13 +17,15 @@ #include "draw_shader.h" extern "C" char datatoc_common_hair_lib_glsl[]; - extern "C" char datatoc_common_hair_refine_vert_glsl[]; -extern "C" char datatoc_common_hair_refine_comp_glsl[]; -extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[]; static struct { struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER]; + struct GPUShader *debug_print_display_sh; + struct GPUShader *debug_draw_display_sh; + struct GPUShader *draw_visibility_compute_sh; + struct GPUShader *draw_resource_finalize_sh; + struct GPUShader *draw_command_generate_sh; } e_data = {{nullptr}}; /* -------------------------------------------------------------------- */ @@ -109,6 +111,47 @@ GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type, eParticleRefineSh return e_data.hair_refine_sh[type]; } +GPUShader *DRW_shader_debug_print_display_get() +{ + if (e_data.debug_print_display_sh == nullptr) { + e_data.debug_print_display_sh = GPU_shader_create_from_info_name("draw_debug_print_display"); + } + return e_data.debug_print_display_sh; +} + +GPUShader *DRW_shader_debug_draw_display_get() +{ + if (e_data.debug_draw_display_sh == nullptr) { + e_data.debug_draw_display_sh = GPU_shader_create_from_info_name("draw_debug_draw_display"); + } + return e_data.debug_draw_display_sh; +} + +GPUShader *DRW_shader_draw_visibility_compute_get() +{ + if (e_data.draw_visibility_compute_sh == nullptr) { + e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name( + "draw_visibility_compute"); + } + return e_data.draw_visibility_compute_sh; +} + +GPUShader *DRW_shader_draw_resource_finalize_get() +{ + if (e_data.draw_resource_finalize_sh == nullptr) { + e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize"); + } + return e_data.draw_resource_finalize_sh; +} + +GPUShader *DRW_shader_draw_command_generate_get() +{ + if (e_data.draw_command_generate_sh == nullptr) { + e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate"); + } + return e_data.draw_command_generate_sh; +} + /** \} */ void DRW_shaders_free() @@ -116,4 +159,9 @@ void DRW_shaders_free() for (int i = 0; i < PART_REFINE_MAX_SHADER; i++) { DRW_SHADER_FREE_SAFE(e_data.hair_refine_sh[i]); } + DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh); + DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh); } diff --git a/source/blender/draw/intern/draw_shader.h b/source/blender/draw/intern/draw_shader.h index 63d755cc334..3b8c0425fa9 100644 --- a/source/blender/draw/intern/draw_shader.h +++ b/source/blender/draw/intern/draw_shader.h @@ -30,6 +30,12 @@ struct GPUShader *DRW_shader_hair_refine_get(ParticleRefineShader refinement, struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type, eParticleRefineShaderType sh_type); +struct GPUShader *DRW_shader_debug_print_display_get(void); +struct GPUShader *DRW_shader_debug_draw_display_get(void); +struct GPUShader *DRW_shader_draw_visibility_compute_get(void); +struct GPUShader *DRW_shader_draw_resource_finalize_get(void); +struct GPUShader *DRW_shader_draw_command_generate_get(void); + void DRW_shaders_free(void); #ifdef __cplusplus diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h index 94c0c53dab7..bedbedcf438 100644 --- a/source/blender/draw/intern/draw_shader_shared.h +++ b/source/blender/draw/intern/draw_shader_shared.h @@ -1,14 +1,42 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef GPU_SHADER +# pragma once + # include "GPU_shader.h" # include "GPU_shader_shared_utils.h" +# include "draw_defines.h" typedef struct ViewInfos ViewInfos; typedef struct ObjectMatrices ObjectMatrices; typedef struct ObjectInfos ObjectInfos; +typedef struct ObjectBounds ObjectBounds; typedef struct VolumeInfos VolumeInfos; typedef struct CurvesInfos CurvesInfos; +typedef struct ObjectAttribute ObjectAttribute; +typedef struct DrawCommand DrawCommand; +typedef struct DispatchCommand DispatchCommand; +typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer; +typedef struct DRWDebugVert DRWDebugVert; +typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer; + +# ifdef __cplusplus +/* C++ only forward declarations. */ +struct Object; +struct ID; +struct GPUUniformAttr; + +namespace blender::draw { + +struct ObjectRef; + +} // namespace blender::draw + +# else /* __cplusplus */ +/* C only forward declarations. */ +typedef enum eObjectInfoFlag eObjectInfoFlag; + +# endif #endif #define DRW_SHADER_SHARED_H @@ -40,9 +68,18 @@ struct ViewInfos { float2 viewport_size_inverse; /** Frustum culling data. */ - /** NOTE: vec3 arrays are padded to vec4. */ + /** \note vec3 array padded to vec4. */ float4 frustum_corners[8]; float4 frustum_planes[6]; + float4 frustum_bound_sphere; + + /** For debugging purpose */ + /* Mouse pixel. */ + int2 mouse_pixel; + + /** True if facing needs to be inverted. */ + bool1 is_inverted; + int _pad0; }; BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) @@ -60,23 +97,89 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) # define CameraTexCoFactors drw_view.viewcamtexcofac #endif +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug draw shapes + * \{ */ + struct ObjectMatrices { - float4x4 drw_modelMatrix; - float4x4 drw_modelMatrixInverse; + float4x4 model; + float4x4 model_inverse; + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(const Object &object); + void sync(const float4x4 &model_matrix); +#endif +}; +BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16) + +enum eObjectInfoFlag { + OBJECT_SELECTED = (1u << 0u), + OBJECT_FROM_DUPLI = (1u << 1u), + OBJECT_FROM_SET = (1u << 2u), + OBJECT_ACTIVE = (1u << 3u), + OBJECT_NEGATIVE_SCALE = (1u << 4u), + /* Avoid skipped info to change culling. */ + OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE }; -BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) struct ObjectInfos { - float4 drw_OrcoTexCoFactors[2]; - float4 drw_ObjectColor; - float4 drw_Infos; +#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER) + /* TODO Rename to struct member for glsl too. */ + float4 orco_mul_bias[2]; + float4 color; + float4 infos; +#else + /** Uploaded as center + size. Converted to mul+bias to local coord. */ + float3 orco_add; + uint object_attrs_offset; + float3 orco_mul; + uint object_attrs_len; + + float4 color; + uint index; + uint _pad2; + float random; + eObjectInfoFlag flag; +#endif + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(); + void sync(const blender::draw::ObjectRef ref, bool is_active_object); +#endif }; -BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) +BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16) + +struct ObjectBounds { + /** + * Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order. + * Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling. + */ + float4 bounding_corners[4]; + /** Bounding sphere derived from the bounding corner. Computed on GPU. */ + float4 bounding_sphere; + /** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */ +#define _inner_sphere_radius bounding_corners[3].w + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(); + void sync(Object &ob); + void sync(const float3 ¢er, const float3 &size); +#endif +}; +BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Object attributes + * \{ */ struct VolumeInfos { - /* Object to grid-space. */ + /** Object to grid-space. */ float4x4 grids_xform[DRW_GRID_PER_VOLUME_MAX]; - /* NOTE: vec4 for alignment. Only float3 needed. */ + /** \note vec4 for alignment. Only float3 needed. */ float4 color_mul; float density_scale; float temperature_mul; @@ -86,13 +189,127 @@ struct VolumeInfos { BLI_STATIC_ASSERT_ALIGN(VolumeInfos, 16) struct CurvesInfos { - /* Per attribute scope, follows loading order. - * NOTE: uint as bool in GLSL is 4 bytes. */ - uint is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX]; - int _pad; + /** Per attribute scope, follows loading order. + * \note uint as bool in GLSL is 4 bytes. + * \note GLSL pad arrays of scalar to 16 bytes (std140). */ + uint4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX]; }; BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16) -#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors) -#define ObjectInfo (drw_infos[resource_id].drw_Infos) -#define ObjectColor (drw_infos[resource_id].drw_ObjectColor) +#pragma pack(push, 4) +struct ObjectAttribute { + /* Workaround the padding cost from alignment requirements. + * (see GL spec : 7.6.2.2 Standard Uniform Block Layout) */ + float data_x, data_y, data_z, data_w; + uint hash_code; + +#if !defined(GPU_SHADER) && defined(__cplusplus) + bool sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr); + bool id_property_lookup(ID *id, const char *name); +#endif +}; +#pragma pack(pop) +/** \note we only align to 4 bytes and fetch data manually so make sure + * C++ compiler gives us the same size. */ +BLI_STATIC_ASSERT_ALIGN(ObjectAttribute, 20) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Indirect commands structures. + * \{ */ + +struct DrawCommand { + /* TODO(fclem): Rename */ + uint vertex_len; + uint instance_len; + uint vertex_first; +#if defined(GPU_SHADER) + uint base_index; + /** \note base_index is i_first for non-indexed draw-calls. */ +# define _instance_first_array base_index +#else + union { + uint base_index; + /* Use this instead of instance_first_indexed for non indexed draw calls. */ + uint instance_first_array; + }; +#endif + + uint instance_first_indexed; + + uint _pad0, _pad1, _pad2; +}; +BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16) + +struct DispatchCommand { + uint num_groups_x; + uint num_groups_y; + uint num_groups_z; + uint _pad0; +}; +BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug print + * \{ */ + +/* Take the header (DrawCommand) into account. */ +#define DRW_DEBUG_PRINT_MAX (8 * 1024) - 4 +/** \note Cannot be more than 255 (because of column encoding). */ +#define DRW_DEBUG_PRINT_WORD_WRAP_COLUMN 120u + +/* The debug print buffer is laid-out as the following struct. + * But we use plain array in shader code instead because of driver issues. */ +struct DRWDebugPrintBuffer { + DrawCommand command; + /** Each character is encoded as 3 `uchar` with char_index, row and column position. */ + uint char_array[DRW_DEBUG_PRINT_MAX]; +}; +BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16) + +/* Use number of char as vertex count. Equivalent to `DRWDebugPrintBuffer.command.v_count`. */ +#define drw_debug_print_cursor drw_debug_print_buf[0] +/* Reuse first instance as row index as we don't use instancing. Equivalent to + * `DRWDebugPrintBuffer.command.i_first`. */ +#define drw_debug_print_row_shared drw_debug_print_buf[3] +/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(uint)`. + * This is needed because we bind the whole buffer as a `uint` array. */ +#define drw_debug_print_offset 8 + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug draw shapes + * \{ */ + +struct DRWDebugVert { + /* This is a weird layout, but needed to be able to use DRWDebugVert as + * a DrawCommand and avoid alignment issues. See drw_debug_verts_buf[] definition. */ + uint pos0; + uint pos1; + uint pos2; + uint color; +}; +BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16) + +/* Take the header (DrawCommand) into account. */ +#define DRW_DEBUG_DRAW_VERT_MAX (64 * 1024) - 1 + +/* The debug draw buffer is laid-out as the following struct. + * But we use plain array in shader code instead because of driver issues. */ +struct DRWDebugDrawBuffer { + DrawCommand command; + DRWDebugVert verts[DRW_DEBUG_DRAW_VERT_MAX]; +}; +BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16) + +/* Equivalent to `DRWDebugDrawBuffer.command.v_count`. */ +#define drw_debug_draw_v_count drw_debug_verts_buf[0].pos0 +/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(DRWDebugVert)`. + * This is needed because we bind the whole buffer as a `DRWDebugVert` array. */ +#define drw_debug_draw_offset 2 + +/** \} */ diff --git a/source/blender/draw/intern/draw_state.h b/source/blender/draw/intern/draw_state.h new file mode 100644 index 00000000000..bf1e63e0852 --- /dev/null +++ b/source/blender/draw/intern/draw_state.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/** \file + * \ingroup draw + * + * Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single + * enum. + */ + +/** + * DRWState is a bit-mask that stores the current render state and the desired render state. Based + * on the differences the minimum state changes can be invoked to setup the desired render state. + * + * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive + * therefore they aren't ordered as a bit mask. + */ +typedef enum { + /** To be used for compute passes. */ + DRW_STATE_NO_DRAW = 0, + /** Write mask */ + DRW_STATE_WRITE_DEPTH = (1 << 0), + DRW_STATE_WRITE_COLOR = (1 << 1), + /* Write Stencil. These options are mutual exclusive and packed into 2 bits */ + DRW_STATE_WRITE_STENCIL = (1 << 2), + DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2), + DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2), + /** Depth test. These options are mutual exclusive and packed into 3 bits */ + DRW_STATE_DEPTH_ALWAYS = (1 << 4), + DRW_STATE_DEPTH_LESS = (2 << 4), + DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4), + DRW_STATE_DEPTH_EQUAL = (4 << 4), + DRW_STATE_DEPTH_GREATER = (5 << 4), + DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4), + /** Culling test */ + DRW_STATE_CULL_BACK = (1 << 7), + DRW_STATE_CULL_FRONT = (1 << 8), + /** Stencil test. These options are mutually exclusive and packed into 2 bits. */ + DRW_STATE_STENCIL_ALWAYS = (1 << 9), + DRW_STATE_STENCIL_EQUAL = (2 << 9), + DRW_STATE_STENCIL_NEQUAL = (3 << 9), + + /** Blend state. These options are mutual exclusive and packed into 4 bits */ + DRW_STATE_BLEND_ADD = (1 << 11), + /** Same as additive but let alpha accumulate without pre-multiply. */ + DRW_STATE_BLEND_ADD_FULL = (2 << 11), + /** Standard alpha blending. */ + DRW_STATE_BLEND_ALPHA = (3 << 11), + /** Use that if color is already pre-multiply by alpha. */ + DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11), + DRW_STATE_BLEND_BACKGROUND = (5 << 11), + DRW_STATE_BLEND_OIT = (6 << 11), + DRW_STATE_BLEND_MUL = (7 << 11), + DRW_STATE_BLEND_SUB = (8 << 11), + /** Use dual source blending. WARNING: Only one color buffer allowed. */ + DRW_STATE_BLEND_CUSTOM = (9 << 11), + DRW_STATE_LOGIC_INVERT = (10 << 11), + DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11), + + DRW_STATE_IN_FRONT_SELECT = (1 << 27), + DRW_STATE_SHADOW_OFFSET = (1 << 28), + DRW_STATE_CLIP_PLANES = (1 << 29), + DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30), + /** DO NOT USE. Assumed always enabled. Only used internally. */ + DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31), +} DRWState; + +ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE); + +#define DRW_STATE_DEFAULT \ + (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL) +#define DRW_STATE_BLEND_ENABLED \ + (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \ + DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \ + DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT) +#define DRW_STATE_RASTERIZER_ENABLED \ + (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \ + DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) +#define DRW_STATE_DEPTH_TEST_ENABLED \ + (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \ + DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL) +#define DRW_STATE_STENCIL_TEST_ENABLED \ + (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL) +#define DRW_STATE_WRITE_STENCIL_ENABLED \ + (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \ + DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus + +namespace blender::draw { + +/* -------------------------------------------------------------------- */ +/** \name DRWState to GPU state conversion + * \{ */ + +static inline eGPUWriteMask to_write_mask(DRWState state) +{ + eGPUWriteMask write_mask = GPU_WRITE_NONE; + if (state & DRW_STATE_WRITE_DEPTH) { + write_mask |= GPU_WRITE_DEPTH; + } + if (state & DRW_STATE_WRITE_COLOR) { + write_mask |= GPU_WRITE_COLOR; + } + if (state & DRW_STATE_WRITE_STENCIL_ENABLED) { + write_mask |= GPU_WRITE_STENCIL; + } + return write_mask; +} + +static inline eGPUFaceCullTest to_face_cull_test(DRWState state) +{ + switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) { + case DRW_STATE_CULL_BACK: + return GPU_CULL_BACK; + case DRW_STATE_CULL_FRONT: + return GPU_CULL_FRONT; + default: + return GPU_CULL_NONE; + } +} + +static inline eGPUDepthTest to_depth_test(DRWState state) +{ + switch (state & DRW_STATE_DEPTH_TEST_ENABLED) { + case DRW_STATE_DEPTH_LESS: + return GPU_DEPTH_LESS; + case DRW_STATE_DEPTH_LESS_EQUAL: + return GPU_DEPTH_LESS_EQUAL; + case DRW_STATE_DEPTH_EQUAL: + return GPU_DEPTH_EQUAL; + case DRW_STATE_DEPTH_GREATER: + return GPU_DEPTH_GREATER; + case DRW_STATE_DEPTH_GREATER_EQUAL: + return GPU_DEPTH_GREATER_EQUAL; + case DRW_STATE_DEPTH_ALWAYS: + return GPU_DEPTH_ALWAYS; + default: + return GPU_DEPTH_NONE; + } +} + +static inline eGPUStencilOp to_stencil_op(DRWState state) +{ + switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) { + case DRW_STATE_WRITE_STENCIL: + return GPU_STENCIL_OP_REPLACE; + case DRW_STATE_WRITE_STENCIL_SHADOW_PASS: + return GPU_STENCIL_OP_COUNT_DEPTH_PASS; + case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL: + return GPU_STENCIL_OP_COUNT_DEPTH_FAIL; + default: + return GPU_STENCIL_OP_NONE; + } +} + +static inline eGPUStencilTest to_stencil_test(DRWState state) +{ + switch (state & DRW_STATE_STENCIL_TEST_ENABLED) { + case DRW_STATE_STENCIL_ALWAYS: + return GPU_STENCIL_ALWAYS; + case DRW_STATE_STENCIL_EQUAL: + return GPU_STENCIL_EQUAL; + case DRW_STATE_STENCIL_NEQUAL: + return GPU_STENCIL_NEQUAL; + default: + return GPU_STENCIL_NONE; + } +} + +static inline eGPUBlend to_blend(DRWState state) +{ + switch (state & DRW_STATE_BLEND_ENABLED) { + case DRW_STATE_BLEND_ADD: + return GPU_BLEND_ADDITIVE; + case DRW_STATE_BLEND_ADD_FULL: + return GPU_BLEND_ADDITIVE_PREMULT; + case DRW_STATE_BLEND_ALPHA: + return GPU_BLEND_ALPHA; + case DRW_STATE_BLEND_ALPHA_PREMUL: + return GPU_BLEND_ALPHA_PREMULT; + case DRW_STATE_BLEND_BACKGROUND: + return GPU_BLEND_BACKGROUND; + case DRW_STATE_BLEND_OIT: + return GPU_BLEND_OIT; + case DRW_STATE_BLEND_MUL: + return GPU_BLEND_MULTIPLY; + case DRW_STATE_BLEND_SUB: + return GPU_BLEND_SUBTRACT; + case DRW_STATE_BLEND_CUSTOM: + return GPU_BLEND_CUSTOM; + case DRW_STATE_LOGIC_INVERT: + return GPU_BLEND_INVERT; + case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL: + return GPU_BLEND_ALPHA_UNDER_PREMUL; + default: + return GPU_BLEND_NONE; + } +} + +static inline eGPUProvokingVertex to_provoking_vertex(DRWState state) +{ + switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) { + case DRW_STATE_FIRST_VERTEX_CONVENTION: + return GPU_VERTEX_FIRST; + default: + return GPU_VERTEX_LAST; + } +} + +/** \} */ + +}; // namespace blender::draw + +#endif diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h index 2d9f4713feb..37b025e761d 100644 --- a/source/blender/draw/intern/draw_subdivision.h +++ b/source/blender/draw/intern/draw_subdivision.h @@ -177,6 +177,10 @@ typedef struct DRWSubdivCache { /* UBO to store settings for the various compute shaders. */ struct GPUUniformBuf *ubo; + + /* Extra flags, passed to the UBO. */ + bool is_edit_mode; + bool use_hide; } DRWSubdivCache; /* Only frees the data of the cache, caller is responsible to free the cache itself if necessary. @@ -195,6 +199,7 @@ void DRW_create_subdivision(struct Object *ob, const float obmat[4][4], const bool do_final, const bool do_uvedit, + const bool do_cage, const ToolSettings *ts, const bool use_hide); diff --git a/source/blender/draw/intern/draw_texture_pool.cc b/source/blender/draw/intern/draw_texture_pool.cc index b36cb5c809e..017ecec7be2 100644 --- a/source/blender/draw/intern/draw_texture_pool.cc +++ b/source/blender/draw/intern/draw_texture_pool.cc @@ -160,6 +160,19 @@ void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex) pool->tmp_tex_released.append(tmp_tex); } +void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex) +{ + pool->tmp_tex_acquired.remove_first_occurrence_and_reorder(tex); +} + +void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex) +{ + BLI_assert(pool->tmp_tex_acquired.first_index_of_try(tex) == -1 && + pool->tmp_tex_released.first_index_of_try(tex) == -1 && + pool->tmp_tex_pruned.first_index_of_try(tex) == -1); + pool->tmp_tex_acquired.append(tex); +} + void DRW_texture_pool_reset(DRWTexturePool *pool) { pool->last_user_id = -1; diff --git a/source/blender/draw/intern/draw_texture_pool.h b/source/blender/draw/intern/draw_texture_pool.h index 1c30ea88552..9fbbf630833 100644 --- a/source/blender/draw/intern/draw_texture_pool.h +++ b/source/blender/draw/intern/draw_texture_pool.h @@ -26,6 +26,7 @@ void DRW_texture_pool_free(DRWTexturePool *pool); /** * Try to find a texture corresponding to params into the texture pool. * If no texture was found, create one and add it to the pool. + * DEPRECATED: Use DRW_texture_pool_texture_acquire instead and do it just before rendering. */ GPUTexture *DRW_texture_pool_query( DRWTexturePool *pool, int width, int height, eGPUTextureFormat format, void *user); @@ -40,6 +41,22 @@ GPUTexture *DRW_texture_pool_texture_acquire(DRWTexturePool *pool, * Releases a previously acquired texture. */ void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex); + +/** + * This effectively remove a texture from the texture pool, giving full ownership to the caller. + * The given texture needs to be been acquired through DRW_texture_pool_texture_acquire(). + * IMPORTANT: This removes the need for a DRW_texture_pool_texture_release() call on this texture. + */ +void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex); +/** + * This Inserts a texture into the texture pool, giving full ownership to the texture pool. + * The texture needs not to be in the pool already. + * The texture may be reused in a latter call to DRW_texture_pool_texture_acquire(); + * IMPORTANT: DRW_texture_pool_texture_release() still needs to be called on this texture + * after usage. + */ +void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex); + /** * Resets the user bits for each texture in the pool and delete unused ones. */ diff --git a/source/blender/draw/intern/draw_view.c b/source/blender/draw/intern/draw_view.c index 817f97cbea4..35ff8891a0f 100644 --- a/source/blender/draw/intern/draw_view.c +++ b/source/blender/draw/intern/draw_view.c @@ -175,7 +175,7 @@ void DRW_draw_cursor(void) GPU_matrix_scale_2f(U.widget_unit, U.widget_unit); GPUBatch *cursor_batch = DRW_cache_cursor_get(is_aligned); - GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR); + GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR); GPU_batch_set_shader(cursor_batch, shader); GPU_batch_draw(cursor_batch); @@ -241,7 +241,7 @@ void DRW_draw_cursor_2d_ex(const ARegion *region, const float cursor[2]) GPUBatch *cursor_batch = DRW_cache_cursor_get(true); - GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR); + GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR); GPU_batch_set_shader(cursor_batch, shader); GPU_batch_draw(cursor_batch); diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc new file mode 100644 index 00000000000..cb0e1370c28 --- /dev/null +++ b/source/blender/draw/intern/draw_view.cc @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "BLI_math_geom.h" +#include "GPU_compute.h" +#include "GPU_debug.h" + +#include "draw_debug.hh" +#include "draw_shader.h" +#include "draw_view.hh" + +namespace blender::draw { + +void View::sync(const float4x4 &view_mat, const float4x4 &win_mat) +{ + data_.viewmat = view_mat; + data_.viewinv = view_mat.inverted(); + data_.winmat = win_mat; + data_.wininv = win_mat.inverted(); + data_.persmat = data_.winmat * data_.viewmat; + data_.persinv = data_.persmat.inverted(); + /* Should not be used anymore. */ + data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f); + + data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr())); + + update_view_vectors(); + + BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners); + BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere); + frustum_boundbox_calc(bound_box); + frustum_culling_planes_calc(); + frustum_culling_sphere_calc(bound_box, bound_sphere); + + dirty_ = true; +} + +void View::frustum_boundbox_calc(BoundBox &bbox) +{ + /* Extract the 8 corners from a Projection Matrix. */ +#if 0 /* Equivalent to this but it has accuracy problems. */ + BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f)); + for (int i = 0; i < 8; i++) { + mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]); + } +#endif + + float left, right, bottom, top, near, far; + bool is_persp = data_.winmat[3][3] == 0.0f; + + projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far); + + bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near; + bbox.vec[0][0] = bbox.vec[3][0] = left; + bbox.vec[4][0] = bbox.vec[7][0] = right; + bbox.vec[0][1] = bbox.vec[4][1] = bottom; + bbox.vec[7][1] = bbox.vec[3][1] = top; + + /* Get the coordinates of the far plane. */ + if (is_persp) { + float sca_far = far / near; + left *= sca_far; + right *= sca_far; + bottom *= sca_far; + top *= sca_far; + } + + bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far; + bbox.vec[1][0] = bbox.vec[2][0] = left; + bbox.vec[6][0] = bbox.vec[5][0] = right; + bbox.vec[1][1] = bbox.vec[5][1] = bottom; + bbox.vec[2][1] = bbox.vec[6][1] = top; + + /* Transform into world space. */ + for (int i = 0; i < 8; i++) { + mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]); + } +} + +void View::frustum_culling_planes_calc() +{ + planes_from_projmat(data_.persmat.ptr(), + data_.frustum_planes[0], + data_.frustum_planes[5], + data_.frustum_planes[1], + data_.frustum_planes[3], + data_.frustum_planes[4], + data_.frustum_planes[2]); + + /* Normalize. */ + for (int p = 0; p < 6; p++) { + data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]); + } +} + +void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere) +{ + /* Extract Bounding Sphere */ + if (data_.winmat[3][3] != 0.0f) { + /* Orthographic */ + /* The most extreme points on the near and far plane. (normalized device coords). */ + const float *nearpoint = bbox.vec[0]; + const float *farpoint = bbox.vec[6]; + + /* just use median point */ + mid_v3_v3v3(bsphere.center, farpoint, nearpoint); + bsphere.radius = len_v3v3(bsphere.center, farpoint); + } + else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) { + /* Perspective with symmetrical frustum. */ + + /* We obtain the center and radius of the circumscribed circle of the + * isosceles trapezoid composed by the diagonals of the near and far clipping plane */ + + /* center of each clipping plane */ + float mid_min[3], mid_max[3]; + mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]); + mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]); + + /* square length of the diagonals of each clipping plane */ + float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]); + float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]); + + /* distance squared between clipping planes */ + float h_sq = len_squared_v3v3(mid_min, mid_max); + + float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq); + + /* The goal is to get the smallest sphere, + * not the sphere that passes through each corner */ + CLAMP(fac, 0.0f, 1.0f); + + interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac); + + /* distance from the center to one of the points of the far plane (1, 2, 5, 6) */ + bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]); + } + else { + /* Perspective with asymmetrical frustum. */ + + /* We put the sphere center on the line that goes from origin + * to the center of the far clipping plane. */ + + /* Detect which of the corner of the far clipping plane is the farthest to the origin */ + float nfar[4]; /* most extreme far point in NDC space */ + float farxy[2]; /* far-point projection onto the near plane */ + float farpoint[3] = {0.0f}; /* most extreme far point in camera coordinate */ + float nearpoint[3]; /* most extreme near point in camera coordinate */ + float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */ + float F = -1.0f, N; /* square distance of far and near point to origin */ + float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */ + float e, s; /* far and near clipping distance (<0) */ + float c; /* slope of center line = distance of far clipping center + * to z axis / far clipping distance. */ + float z; /* projection of sphere center on z axis (<0) */ + + /* Find farthest corner and center of far clip plane. */ + float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */ + for (int i = 0; i < 4; i++) { + float point[3]; + mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner); + float len = len_squared_v3(point); + if (len > F) { + copy_v3_v3(nfar, corner); + copy_v3_v3(farpoint, point); + F = len; + } + add_v3_v3(farcenter, point); + /* rotate by 90 degree to walk through the 4 points of the far clip plane */ + float tmp = corner[0]; + corner[0] = -corner[1]; + corner[1] = tmp; + } + + /* the far center is the average of the far clipping points */ + mul_v3_fl(farcenter, 0.25f); + /* the extreme near point is the opposite point on the near clipping plane */ + copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f); + mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar); + /* this is a frustum projection */ + N = len_squared_v3(nearpoint); + e = farpoint[2]; + s = nearpoint[2]; + /* distance to view Z axis */ + f = len_v2(farpoint); + /* get corresponding point on the near plane */ + mul_v2_v2fl(farxy, farpoint, s / e); + /* this formula preserve the sign of n */ + sub_v2_v2(nearpoint, farxy); + n = f * s / e - len_v2(nearpoint); + c = len_v2(farcenter) / e; + /* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */ + z = (F - N) / (2.0f * (e - s + c * (f - n))); + + bsphere.center[0] = farcenter[0] * z / e; + bsphere.center[1] = farcenter[1] * z / e; + bsphere.center[2] = z; + + /* For XR, the view matrix may contain a scale factor. Then, transforming only the center + * into world space after calculating the radius will result in incorrect behavior. */ + mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */ + mul_m4_v3(data_.viewinv.ptr(), farpoint); + bsphere.radius = len_v3v3(bsphere.center, farpoint); + } +} + +void View::set_clip_planes(Span<float4> planes) +{ + BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes)); + int i = 0; + for (const auto &plane : planes) { + data_.clip_planes[i++] = plane; + } +} + +void View::update_viewport_size() +{ + float4 viewport; + GPU_viewport_size_get_f(viewport); + float2 viewport_size = float2(viewport.z, viewport.w); + if (assign_if_different(data_.viewport_size, viewport_size)) { + dirty_ = true; + } +} + +void View::update_view_vectors() +{ + bool is_persp = data_.winmat[3][3] == 0.0f; + + /* Near clip distance. */ + data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) : + -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2]; + + /* Far clip distance. */ + data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) : + -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2]; + + /* View vectors for the corners of the view frustum. + * Can be used to recreate the world space position easily */ + float3 view_vecs[4] = { + {-1.0f, -1.0f, -1.0f}, + {1.0f, -1.0f, -1.0f}, + {-1.0f, 1.0f, -1.0f}, + {-1.0f, -1.0f, 1.0f}, + }; + + /* Convert the view vectors to view space */ + for (int i = 0; i < 4; i++) { + mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]); + /* Normalized trick see: + * http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */ + if (is_persp) { + view_vecs[i].x /= view_vecs[i].z; + view_vecs[i].y /= view_vecs[i].z; + } + } + + /** + * - If orthographic: + * `view_vecs[0]` is the near-bottom-left corner of the frustum and + * `view_vecs[1]` is the vector going from the near-bottom-left corner to + * the far-top-right corner. + * - If perspective: + * `view_vecs[0].xy` and `view_vecs[1].xy` are respectively the bottom-left corner + * when `Z = 1`, and top-left corner if `Z = 1`. + * `view_vecs[0].z` the near clip distance and `view_vecs[1].z` is the (signed) + * distance from the near plane to the far clip plane. + */ + copy_v3_v3(data_.viewvecs[0], view_vecs[0]); + + /* we need to store the differences */ + data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0]; + data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1]; + data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2]; +} + +void View::bind() +{ + update_viewport_size(); + + if (dirty_) { + dirty_ = false; + data_.push_update(); + } + + GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT); +} + +void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze) +{ + if (debug_freeze && frozen_ == false) { + data_freeze_ = static_cast<ViewInfos>(data_); + data_freeze_.push_update(); + } +#ifdef DEBUG + if (debug_freeze) { + drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1)); + } +#endif + frozen_ = debug_freeze; + + GPU_debug_group_begin("View.compute_visibility"); + + /* TODO(fclem): Early out if visibility hasn't changed. */ + /* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */ + visibility_buf_.resize(divide_ceil_u(resource_len, 128)); + + uint32_t data = 0xFFFFFFFFu; + GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data); + + if (do_visibility_) { + GPUShader *shader = DRW_shader_draw_visibility_compute_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "resource_len", resource_len); + GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf")); + GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf")); + GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT); + GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1); + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + if (frozen_) { + /* Bind back the non frozen data. */ + GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT); + } + + GPU_debug_group_end(); +} + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_view.hh b/source/blender/draw/intern/draw_view.hh new file mode 100644 index 00000000000..27e7a7a0028 --- /dev/null +++ b/source/blender/draw/intern/draw_view.hh @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + */ + +#include "DRW_gpu_wrapper.hh" +#include "DRW_render.h" + +#include "draw_shader_shared.h" + +namespace blender::draw { + +class Manager; + +/* TODO: de-duplicate. */ +using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>; +/** \note Using uint4 for declaration but bound as uint. */ +using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>; + +class View { + friend Manager; + + private: + UniformBuffer<ViewInfos> data_; + /** Frozen version of data_ used for debugging culling. */ + UniformBuffer<ViewInfos> data_freeze_; + /** Result of the visibility computation. 1 bit per resource ID. */ + VisibilityBuf visibility_buf_; + + const char *debug_name_; + + bool do_visibility_ = true; + bool dirty_ = true; + bool frozen_ = false; + + public: + View(const char *name) : visibility_buf_(name), debug_name_(name){}; + /* For compatibility with old system. Will be removed at some point. */ + View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name) + { + float4x4 view_mat, win_mat; + DRW_view_viewmat_get(view, view_mat.ptr(), false); + DRW_view_winmat_get(view, win_mat.ptr(), false); + this->sync(view_mat, win_mat); + } + + void set_clip_planes(Span<float4> planes); + + void sync(const float4x4 &view_mat, const float4x4 &win_mat); + + bool is_persp() const + { + return data_.winmat[3][3] == 0.0f; + } + + bool is_inverted() const + { + return data_.is_inverted; + } + + float far_clip() const + { + if (is_persp()) { + return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f); + } + return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2]; + } + + float near_clip() const + { + if (is_persp()) { + return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f); + } + return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2]; + } + + private: + /** Called from draw manager. */ + void bind(); + void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze); + + void update_view_vectors(); + void update_viewport_size(); + + void frustum_boundbox_calc(BoundBox &bbox); + void frustum_culling_planes_calc(); + void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere); +}; + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_view_data.cc b/source/blender/draw/intern/draw_view_data.cc index 3dc28dc9a9a..58d826e0218 100644 --- a/source/blender/draw/intern/draw_view_data.cc +++ b/source/blender/draw/intern/draw_view_data.cc @@ -7,6 +7,7 @@ #include "BLI_vector.hh" +#include "GPU_capabilities.h" #include "GPU_viewport.h" #include "DRW_render.h" @@ -16,6 +17,7 @@ #include "draw_manager_text.h" #include "draw_manager.h" +#include "draw_manager.hh" #include "draw_view_data.h" using namespace blender; @@ -33,6 +35,22 @@ struct DRWViewData { Vector<ViewportEngineData> engines; Vector<ViewportEngineData *> enabled_engines; + + /** New per view/viewport manager. Null if not supported by current hardware. */ + draw::Manager *manager = nullptr; + + DRWViewData() + { + /* Only for GL >= 4.3 implementation for now. */ + if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) { + manager = new draw::Manager(); + } + }; + + ~DRWViewData() + { + delete manager; + }; }; DRWViewData *DRW_view_data_create(ListBase *engine_types) @@ -197,6 +215,16 @@ void DRW_view_data_free_unused(DRWViewData *view_data) } } +void DRW_view_data_engines_view_update(DRWViewData *view_data) +{ + for (ViewportEngineData &engine_data : view_data->engines) { + DrawEngineType *draw_engine = engine_data.engine_type->draw_engine; + if (draw_engine->view_update) { + draw_engine->view_update(&engine_data); + } + } +} + double *DRW_view_data_cache_time_get(DRWViewData *view_data) { return &view_data->cache_time; @@ -227,3 +255,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it ViewportEngineData *engine = iterator->engines[iterator->id++]; return engine; } + +draw::Manager *DRW_manager_get() +{ + BLI_assert(DST.view_data_active->manager); + return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager); +} + +draw::ObjectRef DRW_object_ref_get(Object *object) +{ + BLI_assert(DST.view_data_active->manager); + return {object, DST.dupli_source, DST.dupli_parent}; +} + +void DRW_manager_begin_sync() +{ + if (DST.view_data_active->manager == nullptr) { + return; + } + reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync(); +} + +void DRW_manager_end_sync() +{ + if (DST.view_data_active->manager == nullptr) { + return; + } + reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync(); +} diff --git a/source/blender/draw/intern/draw_view_data.h b/source/blender/draw/intern/draw_view_data.h index 918b9e81f87..f2c34c15f08 100644 --- a/source/blender/draw/intern/draw_view_data.h +++ b/source/blender/draw/intern/draw_view_data.h @@ -107,6 +107,7 @@ ViewportEngineData *DRW_view_data_engine_data_get_ensure(DRWViewData *view_data, void DRW_view_data_use_engine(DRWViewData *view_data, struct DrawEngineType *engine_type); void DRW_view_data_reset(DRWViewData *view_data); void DRW_view_data_free_unused(DRWViewData *view_data); +void DRW_view_data_engines_view_update(DRWViewData *view_data); double *DRW_view_data_cache_time_get(DRWViewData *view_data); DefaultFramebufferList *DRW_view_data_default_framebuffer_list_get(DRWViewData *view_data); DefaultTextureList *DRW_view_data_default_texture_list_get(DRWViewData *view_data); diff --git a/source/blender/draw/intern/draw_volume.cc b/source/blender/draw/intern/draw_volume.cc index c4e58ab24cb..8f4383a98d8 100644 --- a/source/blender/draw/intern/draw_volume.cc +++ b/source/blender/draw/intern/draw_volume.cc @@ -89,6 +89,10 @@ void DRW_volume_free(void) static GPUTexture *grid_default_texture(eGPUDefaultValue default_value) { + if (g_data.dummy_one == nullptr) { + drw_volume_globals_init(); + } + switch (default_value) { case GPU_DEFAULT_0: return g_data.dummy_zero; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh index 8052b277d45..10b94291e35 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh @@ -29,7 +29,6 @@ struct DRWSubdivCache; enum eMRExtractType { MR_EXTRACT_BMESH, - MR_EXTRACT_MAPPED, MR_EXTRACT_MESH, }; @@ -81,11 +80,18 @@ struct MeshRenderData { BMFace *efa_act_uv; /* Data created on-demand (usually not for #BMesh based data). */ MLoopTri *mlooptri; + const int *material_indices; const float (*vert_normals)[3]; const float (*poly_normals)[3]; + const bool *hide_vert; + const bool *hide_edge; + const bool *hide_poly; float (*loop_normals)[3]; int *lverts, *ledges; + const char *active_color_name; + const char *default_color_name; + struct { int *tri_first_index; int *mat_tri_len; @@ -93,6 +99,82 @@ struct MeshRenderData { } poly_sorted; }; +BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me) +{ + if (me->edit_mesh != nullptr) { + Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object); + if (editmesh_eval_final != nullptr) { + return editmesh_eval_final; + } + } + + return me; +} + +BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me) +{ + switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: + case ME_WRAPPER_TYPE_MDATA: + return &me->ldata; + break; + case ME_WRAPPER_TYPE_BMESH: + return &me->edit_mesh->bm->ldata; + break; + } + + BLI_assert(0); + return &me->ldata; +} + +BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me) +{ + switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: + case ME_WRAPPER_TYPE_MDATA: + return &me->pdata; + break; + case ME_WRAPPER_TYPE_BMESH: + return &me->edit_mesh->bm->pdata; + break; + } + + BLI_assert(0); + return &me->pdata; +} + +BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me) +{ + switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: + case ME_WRAPPER_TYPE_MDATA: + return &me->edata; + break; + case ME_WRAPPER_TYPE_BMESH: + return &me->edit_mesh->bm->edata; + break; + } + + BLI_assert(0); + return &me->edata; +} + +BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me) +{ + switch ((eMeshWrapperType)me->runtime.wrapper_type) { + case ME_WRAPPER_TYPE_SUBD: + case ME_WRAPPER_TYPE_MDATA: + return &me->vdata; + break; + case ME_WRAPPER_TYPE_BMESH: + return &me->edit_mesh->bm->vdata; + break; + } + + BLI_assert(0); + return &me->vdata; +} + BLI_INLINE BMFace *bm_original_face_get(const MeshRenderData *mr, int idx) { return ((mr->p_origindex != NULL) && (mr->p_origindex[idx] != ORIGINDEX_NONE) && mr->bm) ? diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc index 9824602b129..2f2e59c8c3b 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc @@ -22,7 +22,7 @@ struct MeshExtract_EditUvElem_Data { }; static void extract_edituv_tris_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -59,17 +59,15 @@ static void extract_edituv_tris_iter_looptri_mesh(const MeshRenderData *mr, void *_data) { MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); - const MPoly *mp = &mr->mpoly[mlt->poly]; - edituv_tri_add(data, - (mp->flag & ME_HIDE) != 0, - (mp->flag & ME_FACE_SEL) != 0, - mlt->tri[0], - mlt->tri[1], - mlt->tri[2]); + const BMFace *efa = bm_original_face_get(mr, mlt->poly); + const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + + edituv_tri_add(data, mp_hidden, mp_select, mlt->tri[0], mlt->tri[1], mlt->tri[2]); } static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -117,7 +115,7 @@ static void extract_edituv_tris_iter_subdiv_bm(const DRWSubdivCache *UNUSED(subd } static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(subdiv_cache), - const MeshRenderData *UNUSED(mr), + const MeshRenderData *mr, void *_data, uint subdiv_quad_index, const MPoly *coarse_quad) @@ -125,24 +123,17 @@ static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(su MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); const uint loop_idx = subdiv_quad_index * 4; - edituv_tri_add(data, - (coarse_quad->flag & ME_HIDE) != 0, - (coarse_quad->flag & ME_FACE_SEL) != 0, - loop_idx, - loop_idx + 1, - loop_idx + 2); + const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly); + const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; - edituv_tri_add(data, - (coarse_quad->flag & ME_HIDE) != 0, - (coarse_quad->flag & ME_FACE_SEL) != 0, - loop_idx, - loop_idx + 2, - loop_idx + 3); + edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 1, loop_idx + 2); + edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 2, loop_idx + 3); } static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -176,7 +167,7 @@ constexpr MeshExtract create_extractor_edituv_tris() * \{ */ static void extract_edituv_lines_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -214,12 +205,24 @@ static void extract_edituv_lines_iter_poly_bm(const MeshRenderData *UNUSED(mr), static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr, const MPoly *mp, - const int UNUSED(mp_index), + const int mp_index, void *_data) { MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; + + bool mp_hidden, mp_select; + if (mr->bm) { + const BMFace *efa = bm_original_face_get(mr, mp_index); + mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + } + else { + mp_hidden = (mr->hide_poly) ? mr->hide_poly[mp_index] : false; + mp_select = (mp->flag & ME_FACE_SEL) != 0; + } + for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { const MLoop *ml = &mloop[ml_index]; @@ -227,16 +230,12 @@ static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr, const int ml_index_next = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1); const bool real_edge = (mr->e_origindex == nullptr || mr->e_origindex[ml->e] != ORIGINDEX_NONE); - edituv_edge_add(data, - (mp->flag & ME_HIDE) != 0 || !real_edge, - (mp->flag & ME_FACE_SEL) != 0, - ml_index, - ml_index_next); + edituv_edge_add(data, mp_hidden || !real_edge, mp_select, ml_index, ml_index_next); } } static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -266,6 +265,9 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index); + const bool mp_hidden = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN); + const bool mp_select = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT); + uint start_loop_idx = subdiv_quad_index * 4; uint end_loop_idx = (subdiv_quad_index + 1) * 4; for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) { @@ -274,8 +276,8 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac (mr->e_origindex == nullptr || mr->e_origindex[edge_origindex] != ORIGINDEX_NONE)); edituv_edge_add(data, - BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN) != 0 || !real_edge, - BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT) != 0, + mp_hidden || !real_edge, + mp_select, loop_idx, (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1)); } @@ -290,6 +292,17 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index); + bool mp_hidden, mp_select; + if (mr->bm) { + const BMFace *efa = bm_original_face_get(mr, coarse_poly - mr->mpoly); + mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + } + else { + mp_hidden = (mr->hide_poly) ? mr->hide_poly[coarse_poly - mr->mpoly] : false; + mp_select = (coarse_poly->flag & ME_FACE_SEL) != 0; + } + uint start_loop_idx = subdiv_quad_index * 4; uint end_loop_idx = (subdiv_quad_index + 1) * 4; for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) { @@ -298,8 +311,8 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c (mr->e_origindex == nullptr || mr->e_origindex[edge_origindex] != ORIGINDEX_NONE)); edituv_edge_add(data, - (coarse_poly->flag & ME_HIDE) != 0 || !real_edge, - (coarse_poly->flag & ME_FACE_SEL) != 0, + mp_hidden || !real_edge, + mp_select, loop_idx, (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1)); } @@ -307,7 +320,7 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -341,7 +354,7 @@ constexpr MeshExtract create_extractor_edituv_lines() * \{ */ static void extract_edituv_points_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -378,23 +391,27 @@ static void extract_edituv_points_iter_poly_bm(const MeshRenderData *UNUSED(mr), static void extract_edituv_points_iter_poly_mesh(const MeshRenderData *mr, const MPoly *mp, - const int UNUSED(mp_index), + const int mp_index, void *_data) { MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + + const BMFace *efa = bm_original_face_get(mr, mp_index); + const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { const MLoop *ml = &mloop[ml_index]; const bool real_vert = !mr->v_origindex || mr->v_origindex[ml->v] != ORIGINDEX_NONE; - edituv_point_add( - data, ((mp->flag & ME_HIDE) != 0) || !real_vert, (mp->flag & ME_FACE_SEL) != 0, ml_index); + edituv_point_add(data, mp_hidden || !real_vert, mp_select, ml_index); } } static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -444,22 +461,23 @@ static void extract_edituv_points_iter_subdiv_mesh(const DRWSubdivCache *subdiv_ MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index); + const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly); + const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + uint start_loop_idx = subdiv_quad_index * 4; uint end_loop_idx = (subdiv_quad_index + 1) * 4; for (uint i = start_loop_idx; i < end_loop_idx; i++) { const int vert_origindex = subdiv_loop_vert_index[i]; const bool real_vert = !mr->v_origindex || (vert_origindex != -1 && mr->v_origindex[vert_origindex] != ORIGINDEX_NONE); - edituv_point_add(data, - ((coarse_quad->flag & ME_HIDE) != 0) || !real_vert, - (coarse_quad->flag & ME_FACE_SEL) != 0, - i); + edituv_point_add(data, mp_hidden || !real_vert, mp_select, i); } } static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -493,7 +511,7 @@ constexpr MeshExtract create_extractor_edituv_points() * \{ */ static void extract_edituv_fdots_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -533,6 +551,11 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr, void *_data) { MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data); + + const BMFace *efa = bm_original_face_get(mr, mp_index); + const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true; + const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false; + if (mr->use_subsurf_fdots) { const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags; @@ -543,21 +566,17 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr, const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE); const bool subd_fdot = BLI_BITMAP_TEST(facedot_tags, ml->v); - edituv_facedot_add(data, - ((mp->flag & ME_HIDE) != 0) || !real_fdot || !subd_fdot, - (mp->flag & ME_FACE_SEL) != 0, - mp_index); + edituv_facedot_add(data, mp_hidden || !real_fdot || !subd_fdot, mp_select, mp_index); } } else { const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE); - edituv_facedot_add( - data, ((mp->flag & ME_HIDE) != 0) || !real_fdot, (mp->flag & ME_FACE_SEL) != 0, mp_index); + edituv_facedot_add(data, mp_hidden || !real_fdot, mp_select, mp_index); } } static void extract_edituv_fdots_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc index 4eebea1b79f..8dc00617039 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc @@ -15,7 +15,7 @@ namespace blender::draw { * \{ */ static void extract_fdots_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *tls_data) { @@ -42,6 +42,8 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr, const int mp_index, void *_userdata) { + const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mp - mr->mpoly]; + GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata); if (mr->use_subsurf_fdots) { const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags; @@ -50,7 +52,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr, const int ml_index_end = mp->loopstart + mp->totloop; for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { const MLoop *ml = &mloop[ml_index]; - if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !(mr->use_hide && (mp->flag & ME_HIDE))) { + if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !hidden) { GPU_indexbuf_set_point_vert(elb, mp_index, mp_index); return; } @@ -58,7 +60,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr, GPU_indexbuf_set_point_restart(elb, mp_index); } else { - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { + if (!hidden) { GPU_indexbuf_set_point_vert(elb, mp_index, mp_index); } else { @@ -68,7 +70,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr, } static void extract_fdots_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_userdata) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc index 4e89b34c0a0..9c564c2cdda 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc @@ -18,7 +18,7 @@ namespace blender::draw { * \{ */ static void extract_lines_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *tls_data) { @@ -58,16 +58,13 @@ static void extract_lines_iter_poly_mesh(const MeshRenderData *mr, GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data); /* Using poly & loop iterator would complicate accessing the adjacent loop. */ const MLoop *mloop = mr->mloop; - const MEdge *medge = mr->medge; - if (mr->use_hide || (mr->extract_type == MR_EXTRACT_MAPPED) || (mr->e_origindex != nullptr)) { + if (mr->use_hide || (mr->e_origindex != nullptr)) { const int ml_index_last = mp->loopstart + (mp->totloop - 1); int ml_index = ml_index_last, ml_index_next = mp->loopstart; do { const MLoop *ml = &mloop[ml_index]; - const MEdge *med = &medge[ml->e]; - if (!((mr->use_hide && (med->flag & ME_HIDE)) || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) && - (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) { + if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[ml->e]) || + ((mr->e_origindex) && (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) { GPU_indexbuf_set_line_verts(elb, ml->e, ml_index, ml_index_next); } else { @@ -111,9 +108,8 @@ static void extract_lines_iter_ledge_mesh(const MeshRenderData *mr, GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data); const int l_index_offset = mr->edge_len + ledge_index; const int e_index = mr->ledges[ledge_index]; - if (!((mr->use_hide && (med->flag & ME_HIDE)) || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) && - (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) { + if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[med - mr->medge]) || + ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) { const int l_index = mr->loop_len + ledge_index * 2; GPU_indexbuf_set_line_verts(elb, l_index_offset, l_index, l_index + 1); } @@ -132,7 +128,7 @@ static void extract_lines_task_reduce(void *_userdata_to, void *_userdata_from) } static void extract_lines_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *data) { @@ -143,7 +139,7 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr), static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buffer, void *UNUSED(data)) { @@ -183,17 +179,54 @@ static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, uint *flags_data = static_cast<uint *>(GPU_vertbuf_get_data(flags)); - if (mr->extract_type == MR_EXTRACT_MESH) { - const MEdge *medge = mr->medge; - for (DRWSubdivLooseEdge edge : loose_edges) { - *flags_data++ = (medge[edge.coarse_edge_index].flag & ME_HIDE) != 0; + switch (mr->extract_type) { + case MR_EXTRACT_MESH: { + if (mr->e_origindex == nullptr) { + const bool *hide_edge = mr->hide_edge; + if (hide_edge) { + for (DRWSubdivLooseEdge edge : loose_edges) { + *flags_data++ = hide_edge[edge.coarse_edge_index]; + } + } + else { + MutableSpan<uint>(flags_data, loose_edges.size()).fill(0); + } + } + else { + if (mr->bm) { + for (DRWSubdivLooseEdge edge : loose_edges) { + const BMEdge *bm_edge = bm_original_edge_get(mr, edge.coarse_edge_index); + *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0; + } + } + else { + const bool *hide_edge = mr->hide_edge; + if (hide_edge) { + for (DRWSubdivLooseEdge edge : loose_edges) { + int e = edge.coarse_edge_index; + + if (mr->e_origindex && mr->e_origindex[e] != ORIGINDEX_NONE) { + *flags_data++ = hide_edge[edge.coarse_edge_index]; + } + else { + *flags_data++ = false; + } + } + } + else { + MutableSpan<uint>(flags_data, loose_edges.size()).fill(0); + } + } + } + break; } - } - else { - BMesh *bm = mr->bm; - for (DRWSubdivLooseEdge edge : loose_edges) { - const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index); - *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0; + case MR_EXTRACT_BMESH: { + BMesh *bm = mr->bm; + for (DRWSubdivLooseEdge edge : loose_edges) { + const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index); + *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0; + } + break; } } @@ -229,7 +262,7 @@ constexpr MeshExtract create_extractor_lines() /** \name Extract Lines and Loose Edges Sub Buffer * \{ */ -static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshBatchCache *cache) +static void extract_lines_loose_subbuffer(const MeshRenderData *mr, MeshBatchCache *cache) { BLI_assert(cache->final.buff.ibo.lines); /* Multiply by 2 because these are edges indices. */ @@ -241,7 +274,7 @@ static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshB } static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *data) { @@ -253,7 +286,7 @@ static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr, static void extract_lines_with_lines_loose_finish_subdiv(const struct DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *UNUSED(buf), void *UNUSED(_data)) { @@ -292,7 +325,7 @@ constexpr MeshExtract create_extractor_lines_with_lines_loose() * \{ */ static void extract_lines_loose_only_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *UNUSED(tls_data)) { @@ -303,7 +336,7 @@ static void extract_lines_loose_only_init(const MeshRenderData *mr, static void extract_lines_loose_only_init_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc index 9ba9453dada..d6c246c51a9 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc @@ -42,7 +42,7 @@ static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data, } static void extract_lines_adjacency_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *tls_data) { @@ -119,20 +119,21 @@ static void extract_lines_adjacency_iter_looptri_mesh(const MeshRenderData *mr, void *_data) { MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data); - const MPoly *mp = &mr->mpoly[mlt->poly]; - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { - lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v, - mr->mloop[mlt->tri[1]].v, - mr->mloop[mlt->tri[2]].v, - mlt->tri[0], - mlt->tri[1], - mlt->tri[2], - data); + const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly]; + if (hidden) { + return; } + lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v, + mr->mloop[mlt->tri[1]].v, + mr->mloop[mlt->tri[2]].v, + mlt->tri[0], + mlt->tri[1], + mlt->tri[2], + data); } static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *_data) { @@ -166,7 +167,7 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr), static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *_data) { @@ -222,7 +223,7 @@ static void extract_lines_adjacency_iter_subdiv_mesh(const DRWSubdivCache *subdi static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc index 713a533492f..31e5c515129 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc @@ -26,7 +26,7 @@ struct MeshExtract_LinePaintMask_Data { }; static void extract_lines_paint_mask_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -47,10 +47,8 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr, const MLoop *ml = &mloop[ml_index]; const int e_index = ml->e; - const MEdge *me = &mr->medge[e_index]; - if (!((mr->use_hide && (me->flag & ME_HIDE)) || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) && - (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) { + if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[e_index]) || + ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) { const int ml_index_last = mp->totloop + mp->loopstart - 1; const int ml_index_other = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1); @@ -78,7 +76,7 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr, } static void extract_lines_paint_mask_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { @@ -122,11 +120,10 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd GPU_indexbuf_set_line_restart(&data->elb, subdiv_edge_index); } else { - const MEdge *me = &mr->medge[coarse_edge_index]; - if (!((mr->use_hide && (me->flag & ME_HIDE)) || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) && - (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) { - const uint ml_index_other = (loop_idx == end_loop_idx) ? start_loop_idx : loop_idx + 1; + if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[coarse_edge_index]) || + ((mr->e_origindex) && (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) { + const uint ml_index_other = (loop_idx == (end_loop_idx - 1)) ? start_loop_idx : + loop_idx + 1; if (coarse_quad->flag & ME_FACE_SEL) { if (BLI_BITMAP_TEST_AND_SET_ATOMIC(data->select_map, coarse_edge_index)) { /* Hide edge as it has more than 2 selected loop. */ @@ -154,7 +151,7 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd static void extract_lines_paint_mask_finish_subdiv( const struct DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc index e746b37fd30..48eeb86e5ee 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc @@ -19,7 +19,7 @@ namespace blender::draw { * \{ */ static void extract_points_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *tls_data) { @@ -43,10 +43,9 @@ BLI_INLINE void vert_set_mesh(GPUIndexBufBuilder *elb, const int v_index, const int l_index) { - const MVert *mv = &mr->mvert[v_index]; - if (!((mr->use_hide && (mv->flag & ME_HIDE)) || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) && - (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) { + const bool hidden = mr->use_hide && mr->hide_vert && mr->hide_vert[v_index]; + + if (!(hidden || ((mr->v_origindex) && (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) { GPU_indexbuf_set_point_vert(elb, v_index, l_index); } else { @@ -131,7 +130,7 @@ static void extract_points_task_reduce(void *_userdata_to, void *_userdata_from) } static void extract_points_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_userdata) { @@ -142,7 +141,7 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr), static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buffer), void *data) { @@ -181,8 +180,7 @@ static void extract_points_iter_subdiv_common(GPUIndexBufBuilder *elb, } } else { - const MVert *mv = &mr->mvert[coarse_vertex_index]; - if (mr->use_hide && (mv->flag & ME_HIDE)) { + if (mr->use_hide && mr->hide_vert && mr->hide_vert[coarse_vertex_index]) { GPU_indexbuf_set_point_restart(elb, coarse_vertex_index); continue; } @@ -285,7 +283,7 @@ static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache, static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache), const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_userdata) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc index 4c8d1d0002a..2e3e6c7b6b1 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc @@ -25,7 +25,7 @@ static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_fro * \{ */ static void extract_tris_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -81,7 +81,7 @@ static void extract_tris_iter_poly_mesh(const MeshRenderData *mr, } static void extract_tris_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *_data) { @@ -111,7 +111,7 @@ static void extract_tris_finish(const MeshRenderData *mr, static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { @@ -157,7 +157,7 @@ constexpr MeshExtract create_extractor_tris() * \{ */ static void extract_tris_single_mat_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(ibo), void *tls_data) { @@ -189,17 +189,17 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr, void *_data) { GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data); - const MPoly *mp = &mr->mpoly[mlt->poly]; - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { - GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]); + const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly]; + if (hidden) { + GPU_indexbuf_set_tri_restart(elb, mlt_index); } else { - GPU_indexbuf_set_tri_restart(elb, mlt_index); + GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]); } } static void extract_tris_single_mat_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc index fb6b5e1904b..64ade020418 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc @@ -9,6 +9,7 @@ #include <functional> +#include "BLI_color.hh" #include "BLI_math_vec_types.hh" #include "BLI_string.h" @@ -57,7 +58,6 @@ template<typename AttributeType, typename VBOType> struct AttributeTypeConverter } }; -/* Similar to the one in #extract_mesh_vcol_vbo.cc */ struct gpuMeshCol { ushort r, g, b, a; }; @@ -74,6 +74,18 @@ template<> struct AttributeTypeConverter<MPropCol, gpuMeshCol> { } }; +template<> struct AttributeTypeConverter<ColorGeometry4b, gpuMeshCol> { + static gpuMeshCol convert_value(ColorGeometry4b value) + { + gpuMeshCol result; + result.r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.r]); + result.g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.g]); + result.b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.b]); + result.a = unit_float_to_ushort_clamp(value.a * (1.0f / 255.0f)); + return result; + } +}; + /* Return the number of component for the attribute's value type, or 0 if is it unsupported. */ static uint gpu_component_size_for_attribute_type(eCustomDataType type) { @@ -90,6 +102,7 @@ static uint gpu_component_size_for_attribute_type(eCustomDataType type) case CD_PROP_FLOAT3: return 3; case CD_PROP_COLOR: + case CD_PROP_BYTE_COLOR: return 4; default: return 0; @@ -102,6 +115,7 @@ static GPUVertFetchMode get_fetch_mode_for_type(eCustomDataType type) case CD_PROP_INT32: return GPU_FETCH_INT_TO_FLOAT; case CD_PROP_COLOR: + case CD_PROP_BYTE_COLOR: return GPU_FETCH_INT_TO_FLOAT_UNIT; default: return GPU_FETCH_FLOAT; @@ -114,13 +128,14 @@ static GPUVertCompType get_comp_type_for_type(eCustomDataType type) case CD_PROP_INT32: return GPU_COMP_I32; case CD_PROP_COLOR: + case CD_PROP_BYTE_COLOR: return GPU_COMP_U16; default: return GPU_COMP_F32; } } -static void init_vbo_for_attribute(const MeshRenderData *mr, +static void init_vbo_for_attribute(const MeshRenderData &mr, GPUVertBuf *vbo, const DRW_AttributeRequest &request, bool build_on_device, @@ -132,11 +147,8 @@ static void init_vbo_for_attribute(const MeshRenderData *mr, /* We should not be here if the attribute type is not supported. */ BLI_assert(comp_size != 0); - const CustomData *custom_data = get_custom_data_for_domain(mr, request.domain); char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME]; - const char *layer_name = CustomData_get_layer_name( - custom_data, request.cd_type, request.layer_index); - GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME); + GPU_vertformat_safe_attr_name(request.attribute_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME); /* Attributes use auto-name. */ BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name); @@ -144,6 +156,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr, GPU_vertformat_deinterleave(&format); GPU_vertformat_attr_add(&format, attr_name, comp_type, comp_size, fetch_mode); + if (mr.active_color_name && STREQ(request.attribute_name, mr.active_color_name)) { + GPU_vertformat_alias_add(&format, "ac"); + } + if (mr.default_color_name && STREQ(request.attribute_name, mr.default_color_name)) { + GPU_vertformat_alias_add(&format, "c"); + } + if (build_on_device) { GPU_vertbuf_init_build_on_device(vbo, &format, len); } @@ -258,18 +277,15 @@ static void extract_attr_generic(const MeshRenderData *mr, } } -static void extract_attr_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buf, - void *UNUSED(tls_data), - int index) +static void extract_attr_init( + const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *UNUSED(tls_data), int index) { const DRW_Attributes *attrs_used = &cache->attr_used; const DRW_AttributeRequest &request = attrs_used->requests[index]; GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len)); + init_vbo_for_attribute(*mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len)); /* TODO(@kevindietrich): float3 is used for scalar attributes as the implicit conversion done by * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the @@ -297,6 +313,9 @@ static void extract_attr_init(const MeshRenderData *mr, case CD_PROP_COLOR: extract_attr_generic<MPropCol, gpuMeshCol>(mr, vbo, request); break; + case CD_PROP_BYTE_COLOR: + extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, vbo, request); + break; default: BLI_assert_unreachable(); } @@ -345,17 +364,24 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache, case CD_PROP_COLOR: extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request); break; + case CD_PROP_BYTE_COLOR: + extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, src_data, request); + break; default: BLI_assert_unreachable(); } GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer); - init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops); + init_vbo_for_attribute(*mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops); /* Ensure data is uploaded properly. */ GPU_vertbuf_tag_dirty(src_data); - draw_subdiv_interp_custom_data( - subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0, false); + draw_subdiv_interp_custom_data(subdiv_cache, + src_data, + dst_buffer, + static_cast<int>(dimensions), + 0, + ELEM(request.cd_type, CD_PROP_COLOR, CD_PROP_BYTE_COLOR)); GPU_vertbuf_discard(src_data); } @@ -364,13 +390,13 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache, * extract. The overall API does not allow us to pass this in a convenient way. */ #define EXTRACT_INIT_WRAPPER(index) \ static void extract_attr_init##index( \ - const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \ + const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *tls_data) \ { \ extract_attr_init(mr, cache, buf, tls_data, index); \ } \ static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \ const MeshRenderData *mr, \ - struct MeshBatchCache *cache, \ + MeshBatchCache *cache, \ void *buf, \ void *tls_data) \ { \ diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc index a11f740239a..50c37f6397c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc @@ -43,7 +43,7 @@ static float loop_edge_factor_get(const float f_no[3], } static void extract_edge_fac_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -167,14 +167,14 @@ static void extract_edge_fac_iter_ledge_mesh(const MeshRenderData *mr, } static void extract_edge_fac_finish(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *_data) { GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data); - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { /* Some AMD drivers strangely crash with VBO's with a one byte format. * To workaround we reinitialize the VBO with another format and convert * all bytes to floats. */ @@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format() { static GPUVertFormat format = {0}; if (format.attr_len == 0) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); } else { @@ -218,7 +218,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format() static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { @@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach uint offset = subdiv_cache->num_subdiv_loops; for (int i = 0; i < loose_geom.edge_len; i++) { - if (GPU_crappy_amd_driver()) { + if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) { float loose_edge_fac[2] = {1.0f, 1.0f}; GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac); } diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc index 3bb706e82cd..27fd6546b8c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc @@ -112,7 +112,7 @@ static GPUVertFormat *get_edit_data_format() } static void extract_edit_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc index 6d54fce2a0d..0b9043e3289 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc @@ -43,7 +43,7 @@ static void extract_edituv_data_init_common(const MeshRenderData *mr, } static void extract_edituv_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc index 5d6dd14b57a..e4714aabf34 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc @@ -74,7 +74,7 @@ static void edituv_get_edituv_stretch_angle(float auv[2][2], } static void extract_edituv_stretch_angle_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -97,7 +97,7 @@ static void extract_edituv_stretch_angle_init(const MeshRenderData *mr, data->cd_ofs = CustomData_get_offset(&mr->bm->ldata, CD_MLOOPUV); } else { - BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH)); + BLI_assert(mr->extract_type == MR_EXTRACT_MESH); data->luv = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV); } } @@ -212,7 +212,7 @@ static GPUVertFormat *get_edituv_stretch_angle_format_subdiv() static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(tls_data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc index 70dcc24f946..9679c0523f8 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc @@ -20,14 +20,14 @@ namespace blender::draw { * \{ */ static void extract_edituv_stretch_area_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); static GPUVertFormat format = {0}; if (format.attr_len == 0) { - GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_I16, 1, GPU_FETCH_INT_TO_FLOAT_UNIT); + GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); } GPU_vertbuf_init_with_format(vbo, &format); @@ -37,15 +37,14 @@ static void extract_edituv_stretch_area_init(const MeshRenderData *mr, BLI_INLINE float area_ratio_get(float area, float uvarea) { if (area >= FLT_EPSILON && uvarea >= FLT_EPSILON) { - /* Tag inversion by using the sign. */ - return (area > uvarea) ? (uvarea / area) : -(area / uvarea); + return uvarea / area; } return 0.0f; } -BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio) +BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio) { - ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio; + ratio *= tot_ratio; return (ratio > 1.0f) ? (1.0f / ratio) : ratio; } @@ -72,7 +71,7 @@ static void compute_area_ratio(const MeshRenderData *mr, } } else { - BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH)); + BLI_assert(mr->extract_type == MR_EXTRACT_MESH); const MLoopUV *uv_data = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV); const MPoly *mp = mr->mpoly; for (int mp_index = 0; mp_index < mr->poly_len; mp_index++, mp++) { @@ -89,7 +88,7 @@ static void compute_area_ratio(const MeshRenderData *mr, } static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *UNUSED(data)) { @@ -97,14 +96,8 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__)); compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area); - /* Convert in place to avoid an extra allocation */ - uint16_t *poly_stretch = (uint16_t *)area_ratio; - for (int mp_index = 0; mp_index < mr->poly_len; mp_index++) { - poly_stretch[mp_index] = area_ratio[mp_index] * SHRT_MAX; - } - /* Copy face data for each loop. */ - uint16_t *loop_stretch = (uint16_t *)GPU_vertbuf_get_data(vbo); + float *loop_stretch = (float *)GPU_vertbuf_get_data(vbo); if (mr->extract_type == MR_EXTRACT_BMESH) { BMFace *efa; @@ -112,16 +105,16 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, int f, l_index = 0; BM_ITER_MESH_INDEX (efa, &f_iter, mr->bm, BM_FACES_OF_MESH, f) { for (int i = 0; i < efa->len; i++, l_index++) { - loop_stretch[l_index] = poly_stretch[f]; + loop_stretch[l_index] = area_ratio[f]; } } } else { - BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH)); + BLI_assert(mr->extract_type == MR_EXTRACT_MESH); const MPoly *mp = mr->mpoly; for (int mp_index = 0, l_index = 0; mp_index < mr->poly_len; mp_index++, mp++) { for (int i = 0; i < mp->totloop; i++, l_index++) { - loop_stretch[l_index] = poly_stretch[mp_index]; + loop_stretch[l_index] = area_ratio[mp_index]; } } } @@ -131,7 +124,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr, static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc index 64bec0adad4..27d1975d67b 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc @@ -21,7 +21,7 @@ struct MeshExtract_EditUVFdotData_Data { }; static void extract_fdots_edituv_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc index 8d189db9f12..c47cde63630 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc @@ -19,7 +19,7 @@ namespace blender::draw { #define NOR_AND_FLAG_HIDDEN -2 static void extract_fdots_nor_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { @@ -34,7 +34,7 @@ static void extract_fdots_nor_init(const MeshRenderData *mr, } static void extract_fdots_nor_finish(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(data)) { @@ -48,8 +48,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr, for (int f = 0; f < mr->poly_len; f++) { efa = BM_face_at_index(mr->bm, f); const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN); - if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex && - mr->p_origindex[f] == ORIGINDEX_NONE)) { + if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) { nor[f] = GPU_normal_convert_i10_v3(invalid_normal); nor[f].w = NOR_AND_FLAG_HIDDEN; } @@ -66,8 +65,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr, for (int f = 0; f < mr->poly_len; f++) { efa = bm_original_face_get(mr, f); const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN); - if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex && - mr->p_origindex[f] == ORIGINDEX_NONE)) { + if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) { nor[f] = GPU_normal_convert_i10_v3(invalid_normal); nor[f].w = NOR_AND_FLAG_HIDDEN; } @@ -101,7 +99,7 @@ constexpr MeshExtract create_extractor_fdots_nor() * \{ */ static void extract_fdots_nor_hq_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { @@ -116,7 +114,7 @@ static void extract_fdots_nor_hq_init(const MeshRenderData *mr, } static void extract_fdots_nor_hq_finish(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(data)) { @@ -130,8 +128,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr, for (int f = 0; f < mr->poly_len; f++) { efa = BM_face_at_index(mr->bm, f); const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN); - if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex && - mr->p_origindex[f] == ORIGINDEX_NONE)) { + if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) { normal_float_to_short_v3(&nor[f * 4], invalid_normal); nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN; } @@ -148,8 +145,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr, for (int f = 0; f < mr->poly_len; f++) { efa = bm_original_face_get(mr, f); const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN); - if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex && - mr->p_origindex[f] == ORIGINDEX_NONE)) { + if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) { normal_float_to_short_v3(&nor[f * 4], invalid_normal); nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN; } diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc index 822b5928c49..c391cb6ca5a 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc @@ -36,7 +36,7 @@ static GPUVertFormat *get_fdots_nor_format_subdiv() } static void extract_fdots_pos_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -101,7 +101,7 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr, static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc index de21c63e5fd..b0403cf7c4c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc @@ -22,7 +22,7 @@ struct MeshExtract_FdotUV_Data { }; static void extract_fdots_uv_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc index 42a9a58bbe4..01d07fa5f83 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc @@ -16,7 +16,7 @@ namespace blender::draw { * \{ */ static void extract_lnor_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -62,6 +62,8 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr, const int mp_index, void *data) { + const bool hidden = mr->hide_poly && mr->hide_poly[mp_index]; + const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { @@ -78,10 +80,10 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr, } /* Flag for paint mode overlay. - * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. + * Only use origindex in edit mode where it is used to display the edge-normals. * In paint mode it will use the un-mapped data to draw the wire-frame. */ - if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && - (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) { + if (hidden || + (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) { lnor_data->w = -1; } else if (mp->flag & ME_FACE_SEL) { @@ -105,7 +107,7 @@ static GPUVertFormat *get_subdiv_lnor_format() static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { @@ -141,7 +143,7 @@ struct gpuHQNor { }; static void extract_lnor_hq_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -185,6 +187,8 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr, const int mp_index, void *data) { + const bool hidden = mr->hide_poly && mr->hide_poly[mp_index]; + const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { @@ -201,10 +205,10 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr, } /* Flag for paint mode overlay. - * Only use #MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. + * Only use origindex in edit mode where it is used to display the edge-normals. * In paint mode it will use the un-mapped data to draw the wire-frame. */ - if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && - (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) { + if (hidden || + (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) { lnor_data->w = -1; } else if (mp->flag & ME_FACE_SEL) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc index b57e2f6b807..fe2a02b6b63 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc @@ -23,7 +23,7 @@ namespace blender::draw { * \{ */ static void extract_mesh_analysis_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { @@ -259,7 +259,8 @@ static void statvis_calc_thickness(const MeshRenderData *mr, float *r_thickness) } struct BVHTree_OverlapData { - const Mesh *me; + const MVert *verts; + const MLoop *loops; const MLoopTri *mlooptri; float epsilon; }; @@ -267,7 +268,6 @@ struct BVHTree_OverlapData { static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED(thread)) { struct BVHTree_OverlapData *data = static_cast<struct BVHTree_OverlapData *>(userdata); - const Mesh *me = data->me; const MLoopTri *tri_a = &data->mlooptri[index_a]; const MLoopTri *tri_b = &data->mlooptri[index_b]; @@ -276,12 +276,12 @@ static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED( return false; } - const float *tri_a_co[3] = {me->mvert[me->mloop[tri_a->tri[0]].v].co, - me->mvert[me->mloop[tri_a->tri[1]].v].co, - me->mvert[me->mloop[tri_a->tri[2]].v].co}; - const float *tri_b_co[3] = {me->mvert[me->mloop[tri_b->tri[0]].v].co, - me->mvert[me->mloop[tri_b->tri[1]].v].co, - me->mvert[me->mloop[tri_b->tri[2]].v].co}; + const float *tri_a_co[3] = {data->verts[data->loops[tri_a->tri[0]].v].co, + data->verts[data->loops[tri_a->tri[1]].v].co, + data->verts[data->loops[tri_a->tri[2]].v].co}; + const float *tri_b_co[3] = {data->verts[data->loops[tri_b->tri[0]].v].co, + data->verts[data->loops[tri_b->tri[1]].v].co, + data->verts[data->loops[tri_b->tri[2]].v].co}; float ix_pair[2][3]; int verts_shared = 0; @@ -342,7 +342,8 @@ static void statvis_calc_intersect(const MeshRenderData *mr, float *r_intersect) BVHTree *tree = BKE_bvhtree_from_mesh_get(&treeData, mr->me, BVHTREE_FROM_LOOPTRI, 4); struct BVHTree_OverlapData data = {nullptr}; - data.me = mr->me; + data.verts = mr->mvert; + data.loops = mr->mloop; data.mlooptri = mr->mlooptri; data.epsilon = BLI_bvhtree_get_epsilon(tree); @@ -587,7 +588,7 @@ static void statvis_calc_sharp(const MeshRenderData *mr, float *r_sharp) } static void extract_analysis_iter_finish_mesh(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc index 68d838e9e62..4fcbdb1fc7c 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc @@ -19,7 +19,7 @@ struct MeshExtract_Orco_Data { }; static void extract_orco_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc index 313744bdd27..a822845c688 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc @@ -28,7 +28,7 @@ struct MeshExtract_PosNor_Data { }; static void extract_pos_nor_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -83,10 +83,11 @@ static void extract_pos_nor_iter_poly_bm(const MeshRenderData *mr, static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr, const MPoly *mp, - const int UNUSED(mp_index), + const int mp_index, void *_data) { MeshExtract_PosNor_Data *data = static_cast<MeshExtract_PosNor_Data *>(_data); + const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp_index]; const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; @@ -95,12 +96,12 @@ static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr, PosNorLoop *vert = &data->vbo_data[ml_index]; const MVert *mv = &mr->mvert[ml->v]; + const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v]; copy_v3_v3(vert->pos, mv->co); vert->nor = data->normals[ml->v].low; /* Flag for paint mode overlay. */ - if (mp->flag & ME_HIDE || mv->flag & ME_HIDE || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) && - (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) { + if (poly_hidden || vert_hidden || + ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) { vert->nor.w = -1; } else if (mv->flag & SELECT) { @@ -171,7 +172,7 @@ static void extract_pos_nor_iter_lvert_mesh(const MeshRenderData *mr, } static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *_data) { @@ -201,7 +202,7 @@ static GPUVertFormat *get_custom_normals_format() static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { @@ -372,7 +373,7 @@ struct MeshExtract_PosNorHQ_Data { }; static void extract_pos_nor_hq_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -432,20 +433,22 @@ static void extract_pos_nor_hq_iter_poly_mesh(const MeshRenderData *mr, void *_data) { MeshExtract_PosNorHQ_Data *data = static_cast<MeshExtract_PosNorHQ_Data *>(_data); + const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp - mr->mpoly]; + const MLoop *mloop = mr->mloop; const int ml_index_end = mp->loopstart + mp->totloop; for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) { const MLoop *ml = &mloop[ml_index]; + const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v]; PosNorHQLoop *vert = &data->vbo_data[ml_index]; const MVert *mv = &mr->mvert[ml->v]; copy_v3_v3(vert->pos, mv->co); copy_v3_v3_short(vert->nor, data->normals[ml->v].high); /* Flag for paint mode overlay. */ - if (mp->flag & ME_HIDE || mv->flag & ME_HIDE || - ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) && - (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) { + if (poly_hidden || vert_hidden || + ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) { vert->nor[3] = -1; } else if (mv->flag & SELECT) { @@ -521,7 +524,7 @@ static void extract_pos_nor_hq_iter_lvert_mesh(const MeshRenderData *mr, } static void extract_pos_nor_hq_finish(const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *UNUSED(buf), void *_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc index 0d959e324f8..6202fdd312d 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc @@ -9,6 +9,7 @@ #include "BLI_string.h" +#include "BKE_mesh.h" #include "BKE_paint.h" #include "draw_subdivision.h" @@ -31,7 +32,7 @@ static GPUVertFormat *get_sculpt_data_format() } static void extract_sculpt_data_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { @@ -113,7 +114,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr, static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buffer, void *UNUSED(data)) { @@ -128,6 +129,9 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache, GPUVertBuf *subdiv_mask_vbo = nullptr; const float *cd_mask = (const float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK); + const Span<MPoly> coarse_polys = coarse_mesh->polys(); + const Span<MLoop> coarse_loops = coarse_mesh->loops(); + if (cd_mask) { GPUVertFormat mask_format = {0}; GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT); @@ -138,11 +142,11 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache, float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo)); for (int i = 0; i < coarse_mesh->totpoly; i++) { - const MPoly *mpoly = &coarse_mesh->mpoly[i]; + const MPoly *mpoly = &coarse_polys[i]; for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop; loop_index++) { - const MLoop *ml = &coarse_mesh->mloop[loop_index]; + const MLoop *ml = &coarse_loops[loop_index]; *v_mask++ = cd_mask[ml->v]; } } diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc index 6230e1974be..9e0d171c9e4 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc @@ -30,7 +30,7 @@ static void extract_select_idx_init_impl(const MeshRenderData *UNUSED(mr), } static void extract_select_idx_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { @@ -366,7 +366,7 @@ constexpr MeshExtract create_extractor_vert_idx() } static void extract_fdot_idx_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *tls_data) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc index a275f247cad..f7655658bdd 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc @@ -19,7 +19,7 @@ struct SkinRootData { }; static void extract_skin_roots_init(const MeshRenderData *mr, - struct MeshBatchCache *UNUSED(cache), + MeshBatchCache *UNUSED(cache), void *buf, void *UNUSED(tls_data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc index 83453d6ef38..049fa416523 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc @@ -25,7 +25,7 @@ namespace blender::draw { * \{ */ static void extract_tan_init_common(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, GPUVertFormat *format, GPUVertCompType comp_type, GPUVertFetchMode fetch_mode, @@ -161,7 +161,7 @@ static void extract_tan_init_common(const MeshRenderData *mr, } static void extract_tan_ex_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, GPUVertBuf *vbo, const bool do_hq) { @@ -235,7 +235,7 @@ static void extract_tan_ex_init(const MeshRenderData *mr, } static void extract_tan_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *UNUSED(tls_data)) { @@ -254,7 +254,7 @@ static GPUVertFormat *get_coarse_tan_format() static void extract_tan_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { @@ -344,7 +344,7 @@ constexpr MeshExtract create_extractor_tan() * \{ */ static void extract_tan_hq_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *UNUSED(tls_data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc index ddb8ed9b25b..6606912850d 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc @@ -19,7 +19,7 @@ namespace blender::draw { /* Initialize the vertex format to be used for UVs. Return true if any UV layer is * found, false otherwise. */ static bool mesh_extract_uv_format_init(GPUVertFormat *format, - struct MeshBatchCache *cache, + MeshBatchCache *cache, CustomData *cd_ldata, eMRExtractType extract_type, uint32_t &r_uv_layers) @@ -72,7 +72,7 @@ static bool mesh_extract_uv_format_init(GPUVertFormat *format, } static void extract_uv_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *UNUSED(tls_data)) { @@ -120,7 +120,7 @@ static void extract_uv_init(const MeshRenderData *mr, static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *UNUSED(mr), - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *UNUSED(data)) { diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc deleted file mode 100644 index 84ab20f8f90..00000000000 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc +++ /dev/null @@ -1,387 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2021 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup draw - */ - -#include "MEM_guardedalloc.h" - -#include "BKE_attribute.h" -#include "BLI_string.h" -#include "BLI_vector.hh" - -#include "draw_subdivision.h" -#include "extract_mesh.hh" - -namespace blender::draw { - -struct VColRef { - const CustomDataLayer *layer; - eAttrDomain domain; -}; - -/** Get all vcol layers as AttributeRefs. - * - * \param vcol_layers: bitmask to filter vcol layers by, each bit - * corresponds to the integer position of the attribute - * within the global color attribute list. - */ -static Vector<VColRef> get_vcol_refs(const CustomData *cd_vdata, - const CustomData *cd_ldata, - const uint vcol_layers) -{ - Vector<VColRef> refs; - uint layeri = 0; - - auto buildList = [&](const CustomData *cdata, eAttrDomain domain) { - for (int i = 0; i < cdata->totlayer; i++) { - const CustomDataLayer *layer = cdata->layers + i; - - if (!(CD_TYPE_AS_MASK(layer->type) & CD_MASK_COLOR_ALL)) { - continue; - } - - if (layer->flag & CD_FLAG_TEMPORARY) { - continue; - } - - if (!(vcol_layers & (1UL << layeri))) { - layeri++; - continue; - } - - VColRef ref = {}; - ref.domain = domain; - ref.layer = layer; - - refs.append(ref); - layeri++; - } - }; - - buildList(cd_vdata, ATTR_DOMAIN_POINT); - buildList(cd_ldata, ATTR_DOMAIN_CORNER); - - return refs; -} - -/* ---------------------------------------------------------------------- */ -/** \name Extract VCol - * \{ */ - -/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */ -static void init_vcol_format(GPUVertFormat *format, - const MeshBatchCache *cache, - const CustomData *cd_vdata, - const CustomData *cd_ldata, - const CustomDataLayer *active, - const CustomDataLayer *render) -{ - GPU_vertformat_deinterleave(format); - - const uint32_t vcol_layers = cache->cd_used.vcol; - - Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers); - - for (const VColRef &ref : refs) { - char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME]; - - GPU_vertformat_safe_attr_name(ref.layer->name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME); - - /* VCol layer name. */ - BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name); - GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); - - /* Active layer name. */ - if (ref.layer == active) { - GPU_vertformat_alias_add(format, "ac"); - } - - /* Active render layer name. */ - if (ref.layer == render) { - GPU_vertformat_alias_add(format, "c"); - } - } -} - -/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision - * case. */ -static GPUVertFormat *get_coarse_vcol_format() -{ - static GPUVertFormat format = {0}; - if (format.attr_len == 0) { - GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT); - GPU_vertformat_alias_add(&format, "c"); - GPU_vertformat_alias_add(&format, "ac"); - } - return &format; -} - -struct gpuMeshVcol { - ushort r, g, b, a; -}; - -static void extract_vcol_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buf, - void *UNUSED(tls_data)) -{ - GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf); - GPUVertFormat format = {0}; - - const CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata : - &mr->me->vdata; - const CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : - &mr->me->ldata; - - Mesh me_query = blender::dna::shallow_zero_initialize(); - - BKE_id_attribute_copy_domains_temp( - ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id); - - const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id); - const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id); - - const uint32_t vcol_layers = cache->cd_used.vcol; - init_vcol_format(&format, cache, cd_vdata, cd_ldata, active_color, render_color); - - GPU_vertbuf_init_with_format(vbo, &format); - GPU_vertbuf_data_alloc(vbo, mr->loop_len); - - gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo); - - Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers); - - for (const VColRef &ref : refs) { - const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata; - - if (mr->extract_type == MR_EXTRACT_BMESH) { - int cd_ofs = ref.layer->offset; - - if (cd_ofs == -1) { - vcol_data += ref.domain == ATTR_DOMAIN_POINT ? mr->bm->totvert : mr->bm->totloop; - continue; - } - - BMIter iter; - const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR; - const bool is_point = ref.domain == ATTR_DOMAIN_POINT; - - BMFace *f; - BM_ITER_MESH (f, &iter, mr->bm, BM_FACES_OF_MESH) { - const BMLoop *l_iter = f->l_first; - do { - const BMElem *elem = is_point ? reinterpret_cast<const BMElem *>(l_iter->v) : - reinterpret_cast<const BMElem *>(l_iter); - if (is_byte) { - const MLoopCol *mloopcol = (const MLoopCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs); - vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]); - vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]); - vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]); - vcol_data->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f)); - vcol_data++; - } - else { - const MPropCol *mpcol = (const MPropCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs); - vcol_data->r = unit_float_to_ushort_clamp(mpcol->color[0]); - vcol_data->g = unit_float_to_ushort_clamp(mpcol->color[1]); - vcol_data->b = unit_float_to_ushort_clamp(mpcol->color[2]); - vcol_data->a = unit_float_to_ushort_clamp(mpcol->color[3]); - vcol_data++; - } - } while ((l_iter = l_iter->next) != f->l_first); - } - } - else { - int totloop = mr->loop_len; - const int idx = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name); - - const MLoopCol *mcol = nullptr; - const MPropCol *pcol = nullptr; - const MLoop *mloop = mr->mloop; - - if (ref.layer->type == CD_PROP_COLOR) { - pcol = static_cast<const MPropCol *>(cdata->layers[idx].data); - } - else { - mcol = static_cast<const MLoopCol *>(cdata->layers[idx].data); - } - - const bool is_corner = ref.domain == ATTR_DOMAIN_CORNER; - - for (int i = 0; i < totloop; i++, mloop++) { - const int v_i = is_corner ? i : mloop->v; - - if (mcol) { - vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].r]); - vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].g]); - vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].b]); - vcol_data->a = unit_float_to_ushort_clamp(mcol[v_i].a * (1.0f / 255.0f)); - vcol_data++; - } - else if (pcol) { - vcol_data->r = unit_float_to_ushort_clamp(pcol[v_i].color[0]); - vcol_data->g = unit_float_to_ushort_clamp(pcol[v_i].color[1]); - vcol_data->b = unit_float_to_ushort_clamp(pcol[v_i].color[2]); - vcol_data->a = unit_float_to_ushort_clamp(pcol[v_i].color[3]); - vcol_data++; - } - } - } - } -} - -static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache, - const MeshRenderData *mr, - struct MeshBatchCache *cache, - void *buffer, - void *UNUSED(data)) -{ - GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer); - const Mesh *coarse_mesh = subdiv_cache->mesh; - - bool extract_bmesh = mr->extract_type == MR_EXTRACT_BMESH; - - const CustomData *cd_vdata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->vdata : - &coarse_mesh->vdata; - const CustomData *cd_ldata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->ldata : - &coarse_mesh->ldata; - const int totloop = extract_bmesh ? coarse_mesh->edit_mesh->bm->totloop : coarse_mesh->totloop; - - Mesh me_query = blender::dna::shallow_copy(*coarse_mesh); - BKE_id_attribute_copy_domains_temp( - ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id); - - const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id); - const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id); - - GPUVertFormat format = {0}; - init_vcol_format( - &format, cache, &coarse_mesh->vdata, &coarse_mesh->ldata, active_color, render_color); - - GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops); - - GPUVertBuf *src_data = GPU_vertbuf_calloc(); - /* Dynamic as we upload and interpolate layers one at a time. */ - GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC); - - GPU_vertbuf_data_alloc(src_data, totloop); - - gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data); - - const uint vcol_layers = cache->cd_used.vcol; - - Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers); - - /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in - * a single buffer. */ - int pack_layer_index = 0; - for (const VColRef &ref : refs) { - /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */ - const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++; - - const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata; - int layer_i = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name); - - if (layer_i == -1) { - printf("%s: missing color layer %s\n", __func__, ref.layer->name); - continue; - } - - gpuMeshVcol *vcol = mesh_vcol; - - const bool is_vert = ref.domain == ATTR_DOMAIN_POINT; - - if (extract_bmesh) { - BMesh *bm = coarse_mesh->edit_mesh->bm; - BMIter iter; - BMFace *f; - int cd_ofs = cdata->layers[layer_i].offset; - const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR; - - BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) { - const BMLoop *l_iter = f->l_first; - - do { - const BMElem *elem = is_vert ? reinterpret_cast<const BMElem *>(l_iter->v) : - reinterpret_cast<const BMElem *>(l_iter); - - if (is_byte) { - const MLoopCol *mcol2 = static_cast<const MLoopCol *>( - BM_ELEM_CD_GET_VOID_P(elem, cd_ofs)); - - vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->r]); - vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->g]); - vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->b]); - vcol->a = unit_float_to_ushort_clamp(mcol2->a * (1.0f / 255.0f)); - } - else { - const MPropCol *pcol2 = static_cast<const MPropCol *>( - BM_ELEM_CD_GET_VOID_P(elem, cd_ofs)); - - vcol->r = unit_float_to_ushort_clamp(pcol2->color[0]); - vcol->g = unit_float_to_ushort_clamp(pcol2->color[1]); - vcol->b = unit_float_to_ushort_clamp(pcol2->color[2]); - vcol->a = unit_float_to_ushort_clamp(pcol2->color[3]); - } - - vcol++; - } while ((l_iter = l_iter->next) != f->l_first); - } - } - else { - const MLoop *ml = coarse_mesh->mloop; - const MLoopCol *mcol = nullptr; - const MPropCol *pcol = nullptr; - - if (ref.layer->type == CD_PROP_COLOR) { - pcol = static_cast<const MPropCol *>(cdata->layers[layer_i].data); - } - else { - mcol = static_cast<const MLoopCol *>(cdata->layers[layer_i].data); - } - - for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, ml++) { - int idx = is_vert ? ml->v : ml_index; - - if (mcol) { - vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].r]); - vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].g]); - vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].b]); - vcol->a = unit_float_to_ushort_clamp(mcol[idx].a * (1.0f / 255.0f)); - } - else if (pcol) { - vcol->r = unit_float_to_ushort_clamp(pcol[idx].color[0]); - vcol->g = unit_float_to_ushort_clamp(pcol[idx].color[1]); - vcol->b = unit_float_to_ushort_clamp(pcol[idx].color[2]); - vcol->a = unit_float_to_ushort_clamp(pcol[idx].color[3]); - } - } - } - - /* Ensure data is uploaded properly. */ - GPU_vertbuf_tag_dirty(src_data); - draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset, true); - } - - GPU_vertbuf_discard(src_data); -} - -constexpr MeshExtract create_extractor_vcol() -{ - MeshExtract extractor = {nullptr}; - extractor.init = extract_vcol_init; - extractor.init_subdiv = extract_vcol_init_subdiv; - extractor.data_type = MR_DATA_NONE; - extractor.data_size = 0; - extractor.use_threading = false; - extractor.mesh_buffer_offset = offsetof(MeshBufferList, vbo.vcol); - return extractor; -} - -/** \} */ - -} // namespace blender::draw - -const MeshExtract extract_vcol = blender::draw::create_extractor_vcol(); diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc index c64cca4dff5..4db5a8c23a4 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc @@ -8,6 +8,7 @@ #include "MEM_guardedalloc.h" #include "BKE_deform.h" +#include "BKE_mesh.h" #include "draw_subdivision.h" #include "extract_mesh.hh" @@ -79,7 +80,7 @@ static float evaluate_vertex_weight(const MDeformVert *dvert, const DRW_MeshWeig } static void extract_weights_init(const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buf, void *tls_data) { @@ -105,7 +106,7 @@ static void extract_weights_init(const MeshRenderData *mr, data->cd_ofs = CustomData_get_offset(&mr->bm->vdata, CD_MDEFORMVERT); } else { - data->dvert = (const MDeformVert *)CustomData_get_layer(&mr->me->vdata, CD_MDEFORMVERT); + data->dvert = mr->me->deform_verts().data(); data->cd_ofs = -1; } } @@ -154,7 +155,7 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr, static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache, const MeshRenderData *mr, - struct MeshBatchCache *cache, + MeshBatchCache *cache, void *buffer, void *_data) { @@ -171,8 +172,9 @@ static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache, extract_weights_init(mr, cache, coarse_weights, _data); if (mr->extract_type != MR_EXTRACT_BMESH) { - for (int i = 0; i < coarse_mesh->totpoly; i++) { - const MPoly *mpoly = &coarse_mesh->mpoly[i]; + const Span<MPoly> coarse_polys = coarse_mesh->polys(); + for (const int i : coarse_polys.index_range()) { + const MPoly *mpoly = &coarse_polys[i]; extract_weights_iter_poly_mesh(mr, mpoly, i, _data); } } diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl new file mode 100644 index 00000000000..b5f664a6779 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl @@ -0,0 +1,59 @@ + +#pragma BLENDER_REQUIRE(common_shape_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Axis Aligned Bound Box + * \{ */ + +struct AABB { + vec3 min, max; +}; + +AABB aabb_init_min_max() +{ + AABB aabb; + aabb.min = vec3(1.0e30); + aabb.max = vec3(-1.0e30); + return aabb; +} + +void aabb_merge(inout AABB aabb, vec3 v) +{ + aabb.min = min(aabb.min, v); + aabb.max = max(aabb.max, v); +} + +/** + * Return true if there is any intersection. + */ +bool aabb_intersect(AABB a, AABB b) +{ + return all(greaterThanEqual(min(a.max, b.max), max(a.min, b.min))); +} + +/** + * Compute intersect intersection volume of \a a and \a b. + * Return true if the resulting volume is not empty. + */ +bool aabb_clip(AABB a, AABB b, out AABB c) +{ + c.min = max(a.min, b.min); + c.max = min(a.max, b.max); + return all(greaterThanEqual(c.max, c.min)); +} + +Box aabb_to_box(AABB aabb) +{ + Box box; + box.corners[0] = aabb.min; + box.corners[1] = vec3(aabb.max.x, aabb.min.y, aabb.min.z); + box.corners[2] = vec3(aabb.max.x, aabb.max.y, aabb.min.z); + box.corners[3] = vec3(aabb.min.x, aabb.max.y, aabb.min.z); + box.corners[4] = vec3(aabb.min.x, aabb.min.y, aabb.max.z); + box.corners[5] = vec3(aabb.max.x, aabb.min.y, aabb.max.z); + box.corners[6] = aabb.max; + box.corners[7] = vec3(aabb.min.x, aabb.max.y, aabb.max.z); + return box; +} + +/** \} */ diff --git a/source/blender/draw/intern/shaders/common_attribute_lib.glsl b/source/blender/draw/intern/shaders/common_attribute_lib.glsl index ce5e49c7f63..6b5b6fcc846 100644 --- a/source/blender/draw/intern/shaders/common_attribute_lib.glsl +++ b/source/blender/draw/intern/shaders/common_attribute_lib.glsl @@ -25,3 +25,4 @@ float attr_load_float(sampler3D tex); float attr_load_temperature_post(float attr); vec4 attr_load_color_post(vec4 attr); +vec4 attr_load_uniform(vec4 attr, const uint attr_hash); diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl new file mode 100644 index 00000000000..3287897e73c --- /dev/null +++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl @@ -0,0 +1,215 @@ + +/** + * Debugging drawing library + * + * Quick way to draw debug geometry. All input should be in world space and + * will be rendered in the default view. No additional setup required. + **/ + +/** Global switch option. */ +bool drw_debug_draw_enable = true; +const vec4 drw_debug_default_color = vec4(1.0, 0.0, 0.0, 1.0); + +/* -------------------------------------------------------------------- */ +/** \name Internals + * \{ */ + +uint drw_debug_start_draw(uint v_needed) +{ + uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed); + vertid += drw_debug_draw_offset; + return vertid; +} + +uint drw_debug_color_pack(vec4 color) +{ + color = clamp(color, 0.0, 1.0); + uint result = 0; + result |= uint(color.x * 255.0) << 0u; + result |= uint(color.y * 255.0) << 8u; + result |= uint(color.z * 255.0) << 16u; + result |= uint(color.w * 255.0) << 24u; + return result; +} + +void drw_debug_line(inout uint vertid, vec3 v1, vec3 v2, uint color) +{ + drw_debug_verts_buf[vertid++] = DRWDebugVert( + floatBitsToUint(v1.x), floatBitsToUint(v1.y), floatBitsToUint(v1.z), color); + drw_debug_verts_buf[vertid++] = DRWDebugVert( + floatBitsToUint(v2.x), floatBitsToUint(v2.y), floatBitsToUint(v2.z), color); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name API + * \{ */ + +/** + * Draw a line. + */ +void drw_debug_line(vec3 v1, vec3 v2, vec4 color) +{ + if (!drw_debug_draw_enable) { + return; + } + const uint v_needed = 2; + uint vertid = drw_debug_start_draw(v_needed); + if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) { + drw_debug_line(vertid, v1, v2, drw_debug_color_pack(color)); + } +} +void drw_debug_line(vec3 v1, vec3 v2) +{ + drw_debug_line(v1, v2, drw_debug_default_color); +} + +/** + * Draw a quad contour. + */ +void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4, vec4 color) +{ + if (!drw_debug_draw_enable) { + return; + } + const uint v_needed = 8; + uint vertid = drw_debug_start_draw(v_needed); + if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) { + uint pcolor = drw_debug_color_pack(color); + drw_debug_line(vertid, v1, v2, pcolor); + drw_debug_line(vertid, v2, v3, pcolor); + drw_debug_line(vertid, v3, v4, pcolor); + drw_debug_line(vertid, v4, v1, pcolor); + } +} +void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4) +{ + drw_debug_quad(v1, v2, v3, v4, drw_debug_default_color); +} + +/** + * Draw a point as octahedron wireframe. + */ +void drw_debug_point(vec3 p, float radius, vec4 color) +{ + if (!drw_debug_draw_enable) { + return; + } + vec3 c = vec3(radius, -radius, 0); + vec3 v1 = p + c.xzz; + vec3 v2 = p + c.zxz; + vec3 v3 = p + c.yzz; + vec3 v4 = p + c.zyz; + vec3 v5 = p + c.zzx; + vec3 v6 = p + c.zzy; + + const uint v_needed = 12 * 2; + uint vertid = drw_debug_start_draw(v_needed); + if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) { + uint pcolor = drw_debug_color_pack(color); + drw_debug_line(vertid, v1, v2, pcolor); + drw_debug_line(vertid, v2, v3, pcolor); + drw_debug_line(vertid, v3, v4, pcolor); + drw_debug_line(vertid, v4, v1, pcolor); + drw_debug_line(vertid, v1, v5, pcolor); + drw_debug_line(vertid, v2, v5, pcolor); + drw_debug_line(vertid, v3, v5, pcolor); + drw_debug_line(vertid, v4, v5, pcolor); + drw_debug_line(vertid, v1, v6, pcolor); + drw_debug_line(vertid, v2, v6, pcolor); + drw_debug_line(vertid, v3, v6, pcolor); + drw_debug_line(vertid, v4, v6, pcolor); + } +} +void drw_debug_point(vec3 p, float radius) +{ + drw_debug_point(p, radius, drw_debug_default_color); +} +void drw_debug_point(vec3 p) +{ + drw_debug_point(p, 0.01); +} + +/** + * Draw a sphere wireframe as 3 axes circle. + */ +void drw_debug_sphere(vec3 p, float radius, vec4 color) +{ + if (!drw_debug_draw_enable) { + return; + } + const int circle_resolution = 16; + const uint v_needed = circle_resolution * 2 * 3; + uint vertid = drw_debug_start_draw(v_needed); + if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) { + uint pcolor = drw_debug_color_pack(color); + for (int axis = 0; axis < 3; axis++) { + for (int edge = 0; edge < circle_resolution; edge++) { + float angle1 = (2.0 * 3.141592) * float(edge + 0) / float(circle_resolution); + vec3 p1 = vec3(cos(angle1), sin(angle1), 0.0) * radius; + p1 = vec3(p1[(0 + axis) % 3], p1[(1 + axis) % 3], p1[(2 + axis) % 3]); + + float angle2 = (2.0 * 3.141592) * float(edge + 1) / float(circle_resolution); + vec3 p2 = vec3(cos(angle2), sin(angle2), 0.0) * radius; + p2 = vec3(p2[(0 + axis) % 3], p2[(1 + axis) % 3], p2[(2 + axis) % 3]); + + drw_debug_line(vertid, p + p1, p + p2, pcolor); + } + } + } +} +void drw_debug_sphere(vec3 p, float radius) +{ + drw_debug_sphere(p, radius, drw_debug_default_color); +} + +/** + * Draw a matrix transformation as 3 colored axes. + */ +void drw_debug_matrix(mat4 mat, vec4 color) +{ + vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1)); + for (int i = 0; i < 4; i++) { + p[i] = mat * p[i]; + p[i].xyz /= p[i].w; + } + drw_debug_line(p[0].xyz, p[0].xyz, vec4(1, 0, 0, 1)); + drw_debug_line(p[0].xyz, p[1].xyz, vec4(0, 1, 0, 1)); + drw_debug_line(p[0].xyz, p[2].xyz, vec4(0, 0, 1, 1)); +} +void drw_debug_matrix(mat4 mat) +{ + drw_debug_matrix(mat, drw_debug_default_color); +} + +/** + * Draw a matrix as a 2 units length bounding box, centered on origin. + */ +void drw_debug_matrix_as_bbox(mat4 mat, vec4 color) +{ + vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1), + vec4(1, -1, -1, 1), + vec4(1, 1, -1, 1), + vec4(-1, 1, -1, 1), + vec4(-1, -1, 1, 1), + vec4(1, -1, 1, 1), + vec4(1, 1, 1, 1), + vec4(-1, 1, 1, 1)); + for (int i = 0; i < 8; i++) { + p[i] = mat * p[i]; + p[i].xyz /= p[i].w; + } + drw_debug_quad(p[0].xyz, p[1].xyz, p[2].xyz, p[3].xyz, color); + drw_debug_line(p[0].xyz, p[4].xyz, color); + drw_debug_line(p[1].xyz, p[5].xyz, color); + drw_debug_line(p[2].xyz, p[6].xyz, color); + drw_debug_line(p[3].xyz, p[7].xyz, color); + drw_debug_quad(p[4].xyz, p[5].xyz, p[6].xyz, p[7].xyz, color); +} +void drw_debug_matrix_as_bbox(mat4 mat) +{ + drw_debug_matrix_as_bbox(mat, drw_debug_default_color); +} + +/** \} */ diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl new file mode 100644 index 00000000000..89d1729b52d --- /dev/null +++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl @@ -0,0 +1,388 @@ + +/** + * Debug print implementation for shaders. + * + * `print()`: + * Log variable or strings inside the viewport. + * Using a unique non string argument will print the variable name with it. + * Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`. + * `drw_print_no_endl()`: + * Same as `print()` but does not finish the line. + * `drw_print_value()`: + * Display only the value of a variable. Does not finish the line. + * `drw_print_value_hex()`: + * Display only the hex representation of a variable. Does not finish the line. + * `drw_print_value_binary()`: Display only the binary representation of a + * variable. Does not finish the line. + * + * IMPORTANT: As it is now, it is not yet thread safe. Only print from one thread. You can use the + * IS_DEBUG_MOUSE_FRAGMENT macro in fragment shader to filter using mouse position or + * IS_FIRST_INVOCATION in compute shaders. + * + * NOTE: Floating point representation might not be very precise (see drw_print_value(float)). + * + * IMPORTANT: Multipler drawcalls can write to the buffer in sequence (if they are from different + * shgroups). However, we add barriers to support this case and it might change the application + * behavior. Uncomment DISABLE_DEBUG_SHADER_drw_print_BARRIER to remove the barriers if that + * happens. But then you are limited to a single invocation output. + * + * IMPORTANT: All of these are copied to the CPU debug libs (draw_debug.cc). They need to be kept + * in sync to write the same data. + */ + +/** Global switch option when you want to silence all prints from all shaders at once. */ +bool drw_debug_print_enable = true; + +/* Set drw_print_col to max value so we will start by creating a new line and get the correct + * threadsafe row. */ +uint drw_print_col = DRW_DEBUG_PRINT_WORD_WRAP_COLUMN; +uint drw_print_row = 0u; + +void drw_print_newline() +{ + if (!drw_debug_print_enable) { + return; + } + drw_print_col = 0u; + drw_print_row = atomicAdd(drw_debug_print_row_shared, 1u) + 1u; +} + +void drw_print_string_start(uint len) +{ + if (!drw_debug_print_enable) { + return; + } + /* Break before word. */ + if (drw_print_col + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) { + drw_print_newline(); + } +} + +void drw_print_char4(uint data) +{ + if (!drw_debug_print_enable) { + return; + } + /* Convert into char stream. */ + for (; data != 0u; data >>= 8u) { + uint char1 = data & 0xFFu; + /* Check for null terminator. */ + if (char1 == 0x00) { + break; + } + uint cursor = atomicAdd(drw_debug_print_cursor, 1u); + cursor += drw_debug_print_offset; + if (cursor < DRW_DEBUG_PRINT_MAX) { + /* For future usage. (i.e: Color) */ + uint flags = 0u; + uint col = drw_print_col++; + uint drw_print_header = (flags << 24u) | (drw_print_row << 16u) | (col << 8u); + drw_debug_print_buf[cursor] = drw_print_header | char1; + /* Break word. */ + if (drw_print_col > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) { + drw_print_newline(); + } + } + } +} + +/** + * NOTE(fclem): Strange behavior emerge when trying to increment the digit + * counter inside the append function. It looks like the compiler does not see + * it is referenced as an index for char4 and thus do not capture the right + * reference. I do not know if this is undefined behavior. As a matter of + * precaution, we implement all the append function separately. This behavior + * was observed on both Mesa & amdgpu-pro. + */ +/* Using ascii char code. Expect char1 to be less or equal to 0xFF. Appends chars to the right. */ +void drw_print_append_char(uint char1, inout uint char4) +{ + char4 = (char4 << 8u) | char1; +} + +void drw_print_append_digit(uint digit, inout uint char4) +{ + const uint char_A = 0x41u; + const uint char_0 = 0x30u; + bool is_hexadecimal = digit > 9u; + char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit)); +} + +void drw_print_append_space(inout uint char4) +{ + char4 = (char4 << 8u) | 0x20u; +} + +void drw_print_value_binary(uint value) +{ + drw_print_no_endl("0b"); + drw_print_string_start(10u * 4u); + uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u); + uint digit = 0u; + for (uint i = 0u; i < 32u; i++) { + drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]); + digit++; + if ((i % 4u) == 3u) { + drw_print_append_space(digits[digit / 4u]); + digit++; + } + } + /* Numbers are written from right to left. So we need to reverse the order. */ + for (int j = 9; j >= 0; j--) { + drw_print_char4(digits[j]); + } +} + +void drw_print_value_binary(int value) +{ + drw_print_value_binary(uint(value)); +} + +void drw_print_value_binary(float value) +{ + drw_print_value_binary(floatBitsToUint(value)); +} + +void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned) +{ + drw_print_string_start(3u * 4u); + const uint blank_value = hex ? 0x30303030u : 0x20202020u; + const uint prefix = hex ? 0x78302020u : 0x20202020u; + uint digits[3] = uint[3](blank_value, blank_value, prefix); + const uint base = hex ? 16u : 10u; + uint digit = 0u; + /* Add `u` suffix. */ + if (is_unsigned) { + drw_print_append_char('u', digits[digit / 4u]); + digit++; + } + /* Number's digits. */ + for (; value != 0u || digit == uint(is_unsigned); value /= base) { + drw_print_append_digit(value % base, digits[digit / 4u]); + digit++; + } + /* Add negative sign. */ + if (is_negative) { + drw_print_append_char('-', digits[digit / 4u]); + digit++; + } + /* Need to pad to uint alignment because we are issuing chars in "reverse". */ + for (uint i = digit % 4u; i < 4u && i > 0u; i++) { + drw_print_append_space(digits[digit / 4u]); + digit++; + } + /* Numbers are written from right to left. So we need to reverse the order. */ + for (int j = 2; j >= 0; j--) { + drw_print_char4(digits[j]); + } +} + +void drw_print_value_hex(uint value) +{ + drw_print_value_uint(value, true, false, false); +} + +void drw_print_value_hex(int value) +{ + drw_print_value_uint(uint(value), true, false, false); +} + +void drw_print_value_hex(float value) +{ + drw_print_value_uint(floatBitsToUint(value), true, false, false); +} + +void drw_print_value(uint value) +{ + drw_print_value_uint(value, false, false, true); +} + +void drw_print_value(int value) +{ + drw_print_value_uint(uint(abs(value)), false, (value < 0), false); +} + +void drw_print_value(bool value) +{ + if (value) { + drw_print_no_endl("true "); + } + else { + drw_print_no_endl("false"); + } +} + +/* NOTE(@fclem): This is homebrew and might not be 100% accurate (accuracy has + * not been tested and might dependent on compiler implementation). If unsure, + * use drw_print_value_hex and transcribe the value manually with another tool. */ +void drw_print_value(float val) +{ + /* We pad the string to match normal float values length. */ + if (isnan(val)) { + drw_print_no_endl(" NaN"); + return; + } + if (isinf(val)) { + if (sign(val) < 0.0) { + drw_print_no_endl(" -Inf"); + } + else { + drw_print_no_endl(" Inf"); + } + return; + } + + /* Adjusted for significant digits (6) with sign (1), decimal separator (1) + * and exponent (4). */ + const float significant_digits = 6.0; + drw_print_string_start(3u * 4u); + uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u); + + float exponent = floor(log(abs(val)) / log(10.0)); + bool display_exponent = exponent >= (significant_digits) || + exponent <= (-significant_digits + 1.0); + + float int_significant_digits = min(exponent + 1.0, significant_digits); + float dec_significant_digits = max(0.0, significant_digits - int_significant_digits); + /* Power to get to the rounding point. */ + float rounding_power = dec_significant_digits; + + if (val == 0.0 || isinf(exponent)) { + display_exponent = false; + int_significant_digits = dec_significant_digits = 1.0; + } + /* Remap to keep significant numbers count. */ + if (display_exponent) { + int_significant_digits = 1.0; + dec_significant_digits = significant_digits - int_significant_digits; + rounding_power = -exponent + dec_significant_digits; + } + /* Round at the last significant digit. */ + val = round(val * pow(10.0, rounding_power)); + /* Get back to final exponent. */ + val *= pow(10.0, -dec_significant_digits); + + float int_part; + float dec_part = modf(val, int_part); + + dec_part *= pow(10.0, dec_significant_digits); + + const uint base = 10u; + uint digit = 0u; + /* Exponent */ + uint value = uint(abs(exponent)); + if (display_exponent) { + for (int i = 0; value != 0u || i == 0; i++, value /= base) { + drw_print_append_digit(value % base, digits[digit / 4u]); + digit++; + } + /* Exponent sign. */ + uint sign_char = (exponent < 0.0) ? '-' : '+'; + drw_print_append_char(sign_char, digits[digit / 4u]); + digit++; + /* Exponent `e` suffix. */ + drw_print_append_char(0x65u, digits[digit / 4u]); + digit++; + } + /* Decimal part. */ + value = uint(abs(dec_part)); +#if 0 /* We don't do that because it makes unstable values really hard to \ + read. */ + /* Trim trailing zeros. */ + while ((value % base) == 0u) { + value /= base; + if (value == 0u) { + break; + } + } +#endif + if (value != 0u) { + for (int i = 0; value != 0u || i == 0; i++, value /= base) { + drw_print_append_digit(value % base, digits[digit / 4u]); + digit++; + } + /* Point separator. */ + drw_print_append_char('.', digits[digit / 4u]); + digit++; + } + /* Integer part. */ + value = uint(abs(int_part)); + for (int i = 0; value != 0u || i == 0; i++, value /= base) { + drw_print_append_digit(value % base, digits[digit / 4u]); + digit++; + } + /* Negative sign. */ + if (val < 0.0) { + drw_print_append_char('-', digits[digit / 4u]); + digit++; + } + /* Need to pad to uint alignment because we are issuing chars in "reverse". */ + for (uint i = digit % 4u; i < 4u && i > 0u; i++) { + drw_print_append_space(digits[digit / 4u]); + digit++; + } + /* Numbers are written from right to left. So we need to reverse the order. */ + for (int j = 2; j >= 0; j--) { + drw_print_char4(digits[j]); + } +} + +void drw_print_value(vec2 value) +{ + drw_print_no_endl("vec2(", value[0], ", ", value[1], ")"); +} + +void drw_print_value(vec3 value) +{ + drw_print_no_endl("vec3(", value[0], ", ", value[1], ", ", value[1], ")"); +} + +void drw_print_value(vec4 value) +{ + drw_print_no_endl("vec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +void drw_print_value(ivec2 value) +{ + drw_print_no_endl("ivec2(", value[0], ", ", value[1], ")"); +} + +void drw_print_value(ivec3 value) +{ + drw_print_no_endl("ivec3(", value[0], ", ", value[1], ", ", value[1], ")"); +} + +void drw_print_value(ivec4 value) +{ + drw_print_no_endl("ivec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +void drw_print_value(uvec2 value) +{ + drw_print_no_endl("uvec2(", value[0], ", ", value[1], ")"); +} + +void drw_print_value(uvec3 value) +{ + drw_print_no_endl("uvec3(", value[0], ", ", value[1], ", ", value[1], ")"); +} + +void drw_print_value(uvec4 value) +{ + drw_print_no_endl("uvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} + +void drw_print_value(bvec2 value) +{ + drw_print_no_endl("bvec2(", value[0], ", ", value[1], ")"); +} + +void drw_print_value(bvec3 value) +{ + drw_print_no_endl("bvec3(", value[0], ", ", value[1], ", ", value[1], ")"); +} + +void drw_print_value(bvec4 value) +{ + drw_print_no_endl("bvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")"); +} diff --git a/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl new file mode 100644 index 00000000000..538c55ce544 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl @@ -0,0 +1,57 @@ + +/** + * Debug drawing of shapes. + */ + +#pragma BLENDER_REQUIRE(common_debug_draw_lib.glsl) +#pragma BLENDER_REQUIRE(common_shape_lib.glsl) + +void drw_debug(Box shape, vec4 color) +{ + drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color); + drw_debug_line(shape.corners[0], shape.corners[4], color); + drw_debug_line(shape.corners[1], shape.corners[5], color); + drw_debug_line(shape.corners[2], shape.corners[6], color); + drw_debug_line(shape.corners[3], shape.corners[7], color); + drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color); +} +void drw_debug(Box shape) +{ + drw_debug(shape, drw_debug_default_color); +} + +void drw_debug(Frustum shape, vec4 color) +{ + drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color); + drw_debug_line(shape.corners[0], shape.corners[4], color); + drw_debug_line(shape.corners[1], shape.corners[5], color); + drw_debug_line(shape.corners[2], shape.corners[6], color); + drw_debug_line(shape.corners[3], shape.corners[7], color); + drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color); +} +void drw_debug(Frustum shape) +{ + drw_debug(shape, drw_debug_default_color); +} + +void drw_debug(Pyramid shape, vec4 color) +{ + drw_debug_line(shape.corners[0], shape.corners[1], color); + drw_debug_line(shape.corners[0], shape.corners[2], color); + drw_debug_line(shape.corners[0], shape.corners[3], color); + drw_debug_line(shape.corners[0], shape.corners[4], color); + drw_debug_quad(shape.corners[1], shape.corners[2], shape.corners[3], shape.corners[4], color); +} +void drw_debug(Pyramid shape) +{ + drw_debug(shape, drw_debug_default_color); +} + +void drw_debug(Sphere shape, vec4 color) +{ + drw_debug_sphere(shape.center, shape.radius, color); +} +void drw_debug(Sphere shape) +{ + drw_debug(shape, drw_debug_default_color); +} diff --git a/source/blender/draw/intern/shaders/common_hair_lib.glsl b/source/blender/draw/intern/shaders/common_hair_lib.glsl index e235da91e8d..b82df4a51dc 100644 --- a/source/blender/draw/intern/shaders/common_hair_lib.glsl +++ b/source/blender/draw/intern/shaders/common_hair_lib.glsl @@ -164,16 +164,15 @@ float hair_shaperadius(float shape, float root, float tip, float time) in float dummy; # endif -void hair_get_pos_tan_binor_time(bool is_persp, - mat4 invmodel_mat, - vec3 camera_pos, - vec3 camera_z, - out vec3 wpos, - out vec3 wtan, - out vec3 wbinor, - out float time, - out float thickness, - out float thick_time) +void hair_get_center_pos_tan_binor_time(bool is_persp, + mat4 invmodel_mat, + vec3 camera_pos, + vec3 camera_z, + out vec3 wpos, + out vec3 wtan, + out vec3 wbinor, + out float time, + out float thickness) { int id = hair_get_base_id(); vec4 data = texelFetch(hairPointBuffer, id); @@ -202,15 +201,27 @@ void hair_get_pos_tan_binor_time(bool is_persp, wbinor = normalize(cross(camera_vec, wtan)); thickness = hair_shaperadius(hairRadShape, hairRadRoot, hairRadTip, time); +} +void hair_get_pos_tan_binor_time(bool is_persp, + mat4 invmodel_mat, + vec3 camera_pos, + vec3 camera_z, + out vec3 wpos, + out vec3 wtan, + out vec3 wbinor, + out float time, + out float thickness, + out float thick_time) +{ + hair_get_center_pos_tan_binor_time( + is_persp, invmodel_mat, camera_pos, camera_z, wpos, wtan, wbinor, time, thickness); if (hairThicknessRes > 1) { thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1); thick_time = thickness * (thick_time * 2.0 - 1.0); - /* Take object scale into account. * NOTE: This only works fine with uniform scaling. */ float scale = 1.0 / length(mat3(invmodel_mat) * wbinor); - wpos += wbinor * thick_time * scale; } else { diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl new file mode 100644 index 00000000000..83223f89277 --- /dev/null +++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl @@ -0,0 +1,466 @@ + +/** + * Intersection library used for culling. + * Results are meant to be conservative. + */ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) +#pragma BLENDER_REQUIRE(common_shape_lib.glsl) + +/* ---------------------------------------------------------------------- */ +/** \name Plane extraction functions. + * \{ */ + +/** \a v1 and \a v2 are vectors on the plane. \a p is a point on the plane. */ +vec4 isect_plane_setup(vec3 p, vec3 v1, vec3 v2) +{ + vec3 normal_to_plane = normalize(cross(v1, v2)); + return vec4(normal_to_plane, -dot(normal_to_plane, p)); +} + +struct IsectPyramid { + vec3 corners[5]; + vec4 planes[5]; +}; + +IsectPyramid isect_data_setup(Pyramid shape) +{ + vec3 A1 = shape.corners[1] - shape.corners[0]; + vec3 A2 = shape.corners[2] - shape.corners[0]; + vec3 A3 = shape.corners[3] - shape.corners[0]; + vec3 A4 = shape.corners[4] - shape.corners[0]; + vec3 S4 = shape.corners[4] - shape.corners[1]; + vec3 S2 = shape.corners[2] - shape.corners[1]; + + IsectPyramid data; + data.planes[0] = isect_plane_setup(shape.corners[0], A2, A1); + data.planes[1] = isect_plane_setup(shape.corners[0], A3, A2); + data.planes[2] = isect_plane_setup(shape.corners[0], A4, A3); + data.planes[3] = isect_plane_setup(shape.corners[0], A1, A4); + data.planes[4] = isect_plane_setup(shape.corners[1], S2, S4); + for (int i = 0; i < 5; i++) { + data.corners[i] = shape.corners[i]; + } + return data; +} + +struct IsectBox { + vec3 corners[8]; + vec4 planes[6]; +}; + +IsectBox isect_data_setup(Box shape) +{ + vec3 A1 = shape.corners[1] - shape.corners[0]; + vec3 A3 = shape.corners[3] - shape.corners[0]; + vec3 A4 = shape.corners[4] - shape.corners[0]; + + IsectBox data; + data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1); + data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3); + data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4); + /* Assumes that the box is actually a box! */ + data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, shape.corners[6])); + data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, shape.corners[6])); + data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, shape.corners[6])); + for (int i = 0; i < 8; i++) { + data.corners[i] = shape.corners[i]; + } + return data; +} + +/* Construct box from 1 corner point + 3 side vectors. */ +IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z) +{ + IsectBox data; + data.corners[0] = origin; + data.corners[1] = origin + side_x; + data.corners[2] = origin + side_y + side_x; + data.corners[3] = origin + side_y; + data.corners[4] = data.corners[0] + side_z; + data.corners[5] = data.corners[1] + side_z; + data.corners[6] = data.corners[2] + side_z; + data.corners[7] = data.corners[3] + side_z; + + data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z); + data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y); + data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x); + /* Assumes that the box is actually a box! */ + data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6])); + data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6])); + data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6])); + + return data; +} + +struct IsectFrustum { + vec3 corners[8]; + vec4 planes[6]; +}; + +IsectFrustum isect_data_setup(Frustum shape) +{ + vec3 A1 = shape.corners[1] - shape.corners[0]; + vec3 A3 = shape.corners[3] - shape.corners[0]; + vec3 A4 = shape.corners[4] - shape.corners[0]; + vec3 B5 = shape.corners[5] - shape.corners[6]; + vec3 B7 = shape.corners[7] - shape.corners[6]; + vec3 B2 = shape.corners[2] - shape.corners[6]; + + IsectFrustum data; + data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1); + data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3); + data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4); + data.planes[3] = isect_plane_setup(shape.corners[6], B7, B5); + data.planes[4] = isect_plane_setup(shape.corners[6], B5, B2); + data.planes[5] = isect_plane_setup(shape.corners[6], B2, B7); + for (int i = 0; i < 8; i++) { + data.corners[i] = shape.corners[i]; + } + return data; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name View Intersection functions. + * \{ */ + +bool intersect_view(Pyramid pyramid) +{ + bool intersects = true; + + /* Do Pyramid vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 5; ++v) { + float test = dot(drw_view.frustum_planes[p], vec4(pyramid.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + /* Now do Frustum vertices vs Pyramid planes. */ + IsectPyramid i_pyramid = isect_data_setup(pyramid); + for (int p = 0; p < 5; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_pyramid.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + return intersects; +} + +bool intersect_view(Box box) +{ + bool intersects = true; + + /* Do Box vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(drw_view.frustum_planes[p], vec4(box.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + /* Now do Frustum vertices vs Box planes. */ + IsectBox i_box = isect_data_setup(box); + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + return intersects; +} + +bool intersect_view(IsectBox i_box) +{ + bool intersects = true; + + /* Do Box vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + return intersects; +} + +bool intersect_view(Sphere sphere) +{ + bool intersects = true; + + for (int p = 0; p < 6 && intersects; ++p) { + float dist_to_plane = dot(drw_view.frustum_planes[p], vec4(sphere.center, 1.0)); + if (dist_to_plane < -sphere.radius) { + intersects = false; + } + } + /* TODO reject false positive. */ + return intersects; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Shape vs. Shape Intersection functions. + * \{ */ + +bool intersect(IsectPyramid i_pyramid, Box box) +{ + bool intersects = true; + + /* Do Box vertices vs Pyramid planes. */ + for (int p = 0; p < 5; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_pyramid.planes[p], vec4(box.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + /* Now do Pyramid vertices vs Box planes. */ + IsectBox i_box = isect_data_setup(box); + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 5; ++v) { + float test = dot(i_box.planes[p], vec4(i_pyramid.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + return intersects; +} + +bool intersect(IsectFrustum i_frustum, Pyramid pyramid) +{ + bool intersects = true; + + /* Do Pyramid vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 5; ++v) { + float test = dot(i_frustum.planes[p], vec4(pyramid.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + /* Now do Frustum vertices vs Pyramid planes. */ + IsectPyramid i_pyramid = isect_data_setup(pyramid); + for (int p = 0; p < 5; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_pyramid.planes[p], vec4(i_frustum.corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + return intersects; +} + +bool intersect(IsectFrustum i_frustum, Box box) +{ + bool intersects = true; + + /* Do Box vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_frustum.planes[p], vec4(box.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + /* Now do Frustum vertices vs Box planes. */ + IsectBox i_box = isect_data_setup(box); + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_box.planes[p], vec4(i_frustum.corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + return intersects; +} + +bool intersect(IsectFrustum i_frustum, Sphere sphere) +{ + bool intersects = true; + for (int p = 0; p < 6; ++p) { + float dist_to_plane = dot(i_frustum.planes[p], vec4(sphere.center, 1.0)); + if (dist_to_plane < -sphere.radius) { + intersects = false; + break; + } + } + return intersects; +} + +bool intersect(Cone cone, Sphere sphere) +{ + /** + * Following "Improve Tile-based Light Culling with Spherical-sliced Cone" + * by Eric Zhang + * https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html + */ + float sphere_distance = length(sphere.center); + float sphere_distance_rcp = safe_rcp(sphere_distance); + float sphere_sin = saturate(sphere.radius * sphere_distance_rcp); + float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin); + float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos); + + float cone_sphere_center_cos = dot(sphere.center * sphere_distance_rcp, cone.direction); + /* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */ + float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ? + -1.0 : + (cone.angle_cos * sphere_cos - + cone_aperture_sin * sphere_sin); + /* Comparing cosines instead of angles since we are interested + * only in the monotonic region [0 .. M_PI / 2]. This saves costly acos() calls. */ + bool intersects = (cone_sphere_center_cos >= cone_sphere_angle_sum_cos); + + return intersects; +} + +bool intersect(Circle circle_a, Circle circle_b) +{ + return distance_squared(circle_a.center, circle_b.center) < + sqr(circle_a.radius + circle_b.radius); +} + +/** \} */ diff --git a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl index 6d4452c18c8..71460c39285 100644 --- a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl +++ b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl @@ -5,63 +5,88 @@ /** \name Math intersection & projection functions. * \{ */ -float point_plane_projection_dist(vec3 lineorigin, vec3 planeorigin, vec3 planenormal) +vec4 plane_from_quad(vec3 v0, vec3 v1, vec3 v2, vec3 v3) { - return dot(planenormal, planeorigin - lineorigin); + vec3 nor = normalize(cross(v2 - v1, v0 - v1) + cross(v0 - v3, v2 - v3)); + return vec4(nor, -dot(nor, v2)); } -float line_plane_intersect_dist(vec3 lineorigin, - vec3 linedirection, - vec3 planeorigin, - vec3 planenormal) +vec4 plane_from_tri(vec3 v0, vec3 v1, vec3 v2) { - return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection); + vec3 nor = normalize(cross(v2 - v1, v0 - v1)); + return vec4(nor, -dot(nor, v2)); } -float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec4 plane) +float point_plane_projection_dist(vec3 line_origin, vec3 plane_origin, vec3 plane_normal) +{ + return dot(plane_normal, plane_origin - line_origin); +} + +float point_line_projection_dist(vec2 point, vec2 line_origin, vec2 line_normal) +{ + return dot(line_normal, line_origin - point); +} + +float line_plane_intersect_dist(vec3 line_origin, + vec3 line_direction, + vec3 plane_origin, + vec3 plane_normal) +{ + return dot(plane_normal, plane_origin - line_origin) / dot(plane_normal, line_direction); +} + +float line_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec4 plane) { vec3 plane_co = plane.xyz * (-plane.w / len_squared(plane.xyz)); - vec3 h = lineorigin - plane_co; - return -dot(plane.xyz, h) / dot(plane.xyz, linedirection); + vec3 h = line_origin - plane_co; + return -dot(plane.xyz, h) / dot(plane.xyz, line_direction); } -vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal) +vec3 line_plane_intersect(vec3 line_origin, + vec3 line_direction, + vec3 plane_origin, + vec3 plane_normal) { - float dist = line_plane_intersect_dist(lineorigin, linedirection, planeorigin, planenormal); - return lineorigin + linedirection * dist; + float dist = line_plane_intersect_dist(line_origin, line_direction, plane_origin, plane_normal); + return line_origin + line_direction * dist; } -vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec4 plane) +vec3 line_plane_intersect(vec3 line_origin, vec3 line_direction, vec4 plane) { - float dist = line_plane_intersect_dist(lineorigin, linedirection, plane); - return lineorigin + linedirection * dist; + float dist = line_plane_intersect_dist(line_origin, line_direction, plane); + return line_origin + line_direction * dist; } -float line_aligned_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin) +float line_aligned_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec3 plane_origin) { /* aligned plane normal */ - vec3 L = planeorigin - lineorigin; - float diskdist = length(L); - vec3 planenormal = -normalize(L); - return -diskdist / dot(planenormal, linedirection); + vec3 L = plane_origin - line_origin; + float disk_dist = length(L); + vec3 plane_normal = -normalize(L); + return -disk_dist / dot(plane_normal, line_direction); } -vec3 line_aligned_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin) +vec3 line_aligned_plane_intersect(vec3 line_origin, vec3 line_direction, vec3 plane_origin) { - float dist = line_aligned_plane_intersect_dist(lineorigin, linedirection, planeorigin); + float dist = line_aligned_plane_intersect_dist(line_origin, line_direction, plane_origin); if (dist < 0) { /* if intersection is behind we fake the intersection to be * really far and (hopefully) not inside the radius of interest */ dist = 1e16; } - return lineorigin + linedirection * dist; + return line_origin + line_direction * dist; } -float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection) +/** + * Returns intersection distance between the unit sphere and the line + * with the assumption that \a line_origin is contained in the unit sphere. + * It will always returns the farthest intersection. + */ +float line_unit_sphere_intersect_dist(vec3 line_origin, vec3 line_direction) { - float a = dot(linedirection, linedirection); - float b = dot(linedirection, lineorigin); - float c = dot(lineorigin, lineorigin) - 1; + float a = dot(line_direction, line_direction); + float b = dot(line_direction, line_origin); + float c = dot(line_origin, line_origin) - 1; float dist = 1e15; float determinant = b * b - a * c; @@ -72,22 +97,63 @@ float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection) return dist; } -float line_unit_box_intersect_dist(vec3 lineorigin, vec3 linedirection) +/** + * Returns minimum intersection distance between the unit box and the line + * with the assumption that \a line_origin is contained in the unit box. + * In other words, it will always returns the farthest intersection. + */ +float line_unit_box_intersect_dist(vec3 line_origin, vec3 line_direction) { /* https://seblagarde.wordpress.com/2012/09/29/image-based-lighting-approaches-and-parallax-corrected-cubemap/ */ - vec3 firstplane = (vec3(1.0) - lineorigin) / linedirection; - vec3 secondplane = (vec3(-1.0) - lineorigin) / linedirection; - vec3 furthestplane = max(firstplane, secondplane); + vec3 first_plane = (vec3(1.0) - line_origin) / line_direction; + vec3 second_plane = (vec3(-1.0) - line_origin) / line_direction; + vec3 farthest_plane = max(first_plane, second_plane); + + return min_v3(farthest_plane); +} + +float line_unit_box_intersect_dist_safe(vec3 line_origin, vec3 line_direction) +{ + vec3 safe_line_direction = max(vec3(1e-8), abs(line_direction)) * + select(vec3(1.0), -vec3(1.0), lessThan(line_direction, vec3(0.0))); + return line_unit_box_intersect_dist(line_origin, safe_line_direction); +} + +/** + * Same as line_unit_box_intersect_dist but for 2D case. + */ +float line_unit_square_intersect_dist(vec2 line_origin, vec2 line_direction) +{ + vec2 first_plane = (vec2(1.0) - line_origin) / line_direction; + vec2 second_plane = (vec2(-1.0) - line_origin) / line_direction; + vec2 farthest_plane = max(first_plane, second_plane); - return min_v3(furthestplane); + return min_v2(farthest_plane); } -float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection) +float line_unit_square_intersect_dist_safe(vec2 line_origin, vec2 line_direction) { - vec3 safe_linedirection = max(vec3(1e-8), abs(linedirection)) * - select(vec3(1.0), -vec3(1.0), lessThan(linedirection, vec3(0.0))); - return line_unit_box_intersect_dist(lineorigin, safe_linedirection); + vec2 safe_line_direction = max(vec2(1e-8), abs(line_direction)) * + select(vec2(1.0), -vec2(1.0), lessThan(line_direction, vec2(0.0))); + return line_unit_square_intersect_dist(line_origin, safe_line_direction); +} + +/** + * Returns clipping distance (intersection with the nearest plane) with the given axis-aligned + * bound box along \a line_direction. + * Safe even if \a line_direction is degenerate. + * It assumes that an intersection exists (i.e: that \a line_direction points towards the AABB). + */ +float line_aabb_clipping_dist(vec3 line_origin, vec3 line_direction, vec3 aabb_min, vec3 aabb_max) +{ + vec3 safe_dir = select(line_direction, vec3(1e-5), lessThan(abs(line_direction), vec3(1e-5))); + vec3 dir_inv = 1.0 / safe_dir; + + vec3 first_plane = (aabb_min - line_origin) * dir_inv; + vec3 second_plane = (aabb_max - line_origin) * dir_inv; + vec3 nearest_plane = min(first_plane, second_plane); + return max_v3(nearest_plane); } /** \} */ @@ -98,8 +164,8 @@ float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection) void make_orthonormal_basis(vec3 N, out vec3 T, out vec3 B) { - vec3 UpVector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); - T = normalize(cross(UpVector, N)); + vec3 up_vector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + T = normalize(cross(up_vector, N)); B = cross(N, T); } diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl index 51f3c890df8..5842df424be 100644 --- a/source/blender/draw/intern/shaders/common_math_lib.glsl +++ b/source/blender/draw/intern/shaders/common_math_lib.glsl @@ -17,6 +17,7 @@ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ #define FLT_MAX 3.402823e+38 +#define FLT_MIN 1.175494e-38 vec3 mul(mat3 m, vec3 v) { @@ -116,8 +117,8 @@ bool flag_test(int flag, int val) { return (flag & val) != 0; } void set_flag_from_test(inout uint value, bool test, uint flag) { if (test) { value |= flag; } else { value &= ~flag; } } void set_flag_from_test(inout int value, bool test, int flag) { if (test) { value |= flag; } else { value &= ~flag; } } -#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights))); -#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights))); +#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights))) +#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights))) /* clang-format on */ @@ -130,12 +131,17 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu #define in_texture_range(texel, tex) \ (all(greaterThanEqual(texel, ivec2(0))) && all(lessThan(texel, textureSize(tex, 0).xy))) -uint divide_ceil_u(uint visible_count, uint divisor) +uint divide_ceil(uint visible_count, uint divisor) { return (visible_count + (divisor - 1u)) / divisor; } -int divide_ceil_i(int visible_count, int divisor) +int divide_ceil(int visible_count, int divisor) +{ + return (visible_count + (divisor - 1)) / divisor; +} + +ivec2 divide_ceil(ivec2 visible_count, ivec2 divisor) { return (visible_count + (divisor - 1)) / divisor; } diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl new file mode 100644 index 00000000000..f2c8bf0faaf --- /dev/null +++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl @@ -0,0 +1,202 @@ + +#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl) + +/** + * Geometric shape structures. + * Some constructors might seems redundant but are here to make the API cleaner and + * allow for more than one constructor per type. + */ + +/* ---------------------------------------------------------------------- */ +/** \name Circle + * \{ */ + +struct Circle { + vec2 center; + float radius; +}; + +Circle shape_circle(vec2 center, float radius) +{ + return Circle(center, radius); +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Sphere + * \{ */ + +struct Sphere { + vec3 center; + float radius; +}; + +Sphere shape_sphere(vec3 center, float radius) +{ + return Sphere(center, radius); +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Box + * \{ */ + +struct Box { + vec3 corners[8]; +}; + +/* Construct box from 4 basis points. */ +Box shape_box(vec3 v000, vec3 v100, vec3 v010, vec3 v001) +{ + v100 -= v000; + v010 -= v000; + v001 -= v000; + Box box; + box.corners[0] = v000; + box.corners[1] = v000 + v100; + box.corners[2] = v000 + v010 + v100; + box.corners[3] = v000 + v010; + box.corners[4] = box.corners[0] + v001; + box.corners[5] = box.corners[1] + v001; + box.corners[6] = box.corners[2] + v001; + box.corners[7] = box.corners[3] + v001; + return box; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Square Pyramid + * \{ */ + +struct Pyramid { + /* Apex is the first. Base vertices are in clockwise order from front view. */ + vec3 corners[5]; +}; + +/** + * Regular Square Pyramid (can be oblique). + * Use this corner order. + * (Top-Down View of the pyramid) + * <pre> + * + * Y + * | + * | + * .-----X + * + * 4-----------3 + * | \ / | + * | \ / | + * | 0 | + * | / \ | + * | / \ | + * 1-----------2 + * </pre> + * base_corner_00 is vertex 1 + * base_corner_01 is vertex 2 + * base_corner_10 is vertex 4 + */ +Pyramid shape_pyramid(vec3 apex, vec3 base_corner_00, vec3 base_corner_01, vec3 base_corner_10) +{ + Pyramid pyramid; + pyramid.corners[0] = apex; + pyramid.corners[1] = base_corner_00; + pyramid.corners[2] = base_corner_01; + pyramid.corners[3] = base_corner_10 + (base_corner_01 - base_corner_00); + pyramid.corners[4] = base_corner_10; + return pyramid; +} + +/** + * Regular Square Pyramid. + * <pre> + * + * Y + * | + * | + * .-----X + * + * 4-----Y-----3 + * | \ | / | + * | \ | / | + * | 0-----X + * | / \ | + * | / \ | + * 1-----------2 + * </pre> + * base_center_pos_x is vector from base center to X + * base_center_pos_y is vector from base center to Y + */ +Pyramid shape_pyramid_non_oblique(vec3 apex, + vec3 base_center, + vec3 base_center_pos_x, + vec3 base_center_pos_y) +{ + Pyramid pyramid; + pyramid.corners[0] = apex; + pyramid.corners[1] = base_center - base_center_pos_x - base_center_pos_y; + pyramid.corners[2] = base_center + base_center_pos_x - base_center_pos_y; + pyramid.corners[3] = base_center + base_center_pos_x + base_center_pos_y; + pyramid.corners[4] = base_center - base_center_pos_x + base_center_pos_y; + return pyramid; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Frustum + * \{ */ + +struct Frustum { + vec3 corners[8]; +}; + +/** + * Use this corner order. + * <pre> + * + * Z Y + * | / + * |/ + * .-----X + * 2----------6 + * /| /| + * / | / | + * 1----------5 | + * | | | | + * | 3-------|--7 + * | / | / + * |/ |/ + * 0----------4 + * </pre> + */ +Frustum shape_frustum(vec3 corners[8]) +{ + Frustum frustum; + for (int i = 0; i < 8; i++) { + frustum.corners[i] = corners[i]; + } + return frustum; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Cone + * \{ */ + +/* Cone at orign with no height. */ +struct Cone { + vec3 direction; + float angle_cos; +}; + +Cone shape_cone(vec3 direction, float angle_cosine) +{ + return Cone(direction, angle_cosine); +} + +/** \} */ diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl index 3244b7960d8..eacdf8e6333 100644 --- a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl +++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl @@ -35,7 +35,7 @@ void emit_line(uint line_offset, uint quad_index, uint start_loop_index, uint co uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index, coarse_poly_count); - if (is_face_hidden(coarse_quad_index) || + if (use_hide && is_face_hidden(coarse_quad_index) || (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display)) { output_lines[line_offset + 0] = 0xffffffff; output_lines[line_offset + 1] = 0xffffffff; diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl index ce3c8478d3f..a46d69eca88 100644 --- a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl +++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl @@ -45,7 +45,7 @@ void main() int triangle_loop_index = (int(quad_index) + mat_offset) * 6; #endif - if (is_face_hidden(coarse_quad_index)) { + if (use_hide && is_face_hidden(coarse_quad_index)) { output_tris[triangle_loop_index + 0] = 0xffffffff; output_tris[triangle_loop_index + 1] = 0xffffffff; output_tris[triangle_loop_index + 2] = 0xffffffff; diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl index d76a7369f79..4183b4a1cd3 100644 --- a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl +++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl @@ -36,6 +36,10 @@ layout(std140) uniform shader_data /* Total number of elements to process. */ uint total_dispatch_size; + + bool is_edit_mode; + + bool use_hide; }; uint get_global_invocation_index() diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl index e146ccb343a..81e346863c2 100644 --- a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl +++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl @@ -427,7 +427,7 @@ void main() output_nors[coarse_quad_index] = fnor; # endif - if (is_face_hidden(coarse_quad_index)) { + if (use_hide && is_face_hidden(coarse_quad_index)) { output_indices[coarse_quad_index] = 0xffffffff; } else { diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl index f5c4c7895aa..97c07704c06 100644 --- a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl +++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl @@ -26,6 +26,23 @@ bool is_face_selected(uint coarse_quad_index) return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0; } +bool is_face_hidden(uint coarse_quad_index) +{ + return (extra_coarse_face_data[coarse_quad_index] & coarse_face_hidden_mask) != 0; +} + +/* Flag for paint mode overlay and normals drawing in edit-mode. */ +float get_loop_flag(uint coarse_quad_index, int vert_origindex) +{ + if (is_face_hidden(coarse_quad_index) || (is_edit_mode && vert_origindex == -1)) { + return -1.0; + } + if (is_face_selected(coarse_quad_index)) { + return 1.0; + } + return 0.0; +} + void main() { /* We execute for each quad. */ @@ -44,7 +61,11 @@ void main() /* Face is smooth, use vertex normals. */ for (int i = 0; i < 4; i++) { PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i]; - output_lnor[start_loop_index + i] = get_normal_and_flag(pos_nor_loop); + int origindex = input_vert_origindex[start_loop_index + i]; + LoopNormal loop_normal = get_normal_and_flag(pos_nor_loop); + loop_normal.flag = get_loop_flag(coarse_quad_index, origindex); + + output_lnor[start_loop_index + i] = loop_normal; } } else { @@ -68,11 +89,7 @@ void main() for (int i = 0; i < 4; i++) { int origindex = input_vert_origindex[start_loop_index + i]; - float flag = 0.0; - if (origindex == -1) { - flag = -1.0; - } - loop_normal.flag = flag; + loop_normal.flag = get_loop_flag(coarse_quad_index, origindex); output_lnor[start_loop_index + i] = loop_normal; } diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl index 8eecaa46b58..6521476c3a7 100644 --- a/source/blender/draw/intern/shaders/common_view_lib.glsl +++ b/source/blender/draw/intern/shaders/common_view_lib.glsl @@ -37,6 +37,9 @@ layout(std140) uniform viewBlock # endif #endif +#define IS_DEBUG_MOUSE_FRAGMENT (ivec2(gl_FragCoord) == drw_view.mouse_pixel) +#define IS_FIRST_INVOCATION (gl_GlobalInvocationID == uvec3(0)) + #define ViewNear (ViewVecs[0].w) #define ViewFar (ViewVecs[1].w) @@ -152,7 +155,11 @@ uniform int drw_ResourceID; # define PASS_RESOURCE_ID # elif defined(GPU_VERTEX_SHADER) -# define resource_id gpu_InstanceIndex +# if defined(UNIFORM_RESOURCE_ID_NEW) +# define resource_id drw_ResourceID +# else +# define resource_id gpu_InstanceIndex +# endif # define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id; # elif defined(GPU_GEOMETRY_SHADER) @@ -200,8 +207,8 @@ flat in int resourceIDFrag; # ifndef DRW_SHADER_SHARED_H struct ObjectMatrices { - mat4 drw_modelMatrix; - mat4 drw_modelMatrixInverse; + mat4 model; + mat4 model_inverse; }; # endif /* DRW_SHADER_SHARED_H */ @@ -211,8 +218,8 @@ layout(std140) uniform modelBlock ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN]; }; -# define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix) -# define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse) +# define ModelMatrix (drw_matrices[resource_id].model) +# define ModelMatrixInverse (drw_matrices[resource_id].model_inverse) # endif /* USE_GPU_SHADER_CREATE_INFO */ #else /* GPU_INTEL */ diff --git a/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl new file mode 100644 index 00000000000..3e640540777 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl @@ -0,0 +1,84 @@ + +/** + * Convert DrawPrototype into draw commands. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val) + +/* This is only called by the last thread executed over the group's prototype draws. */ +void write_draw_call(DrawGroup group, uint group_id) +{ + DrawCommand cmd; + cmd.vertex_len = group.vertex_len; + cmd.vertex_first = group.vertex_first; + if (group.base_index != -1) { + cmd.base_index = group.base_index; + cmd.instance_first_indexed = group.start; + } + else { + cmd._instance_first_array = group.start; + } + /* Back-facing command. */ + cmd.instance_len = group_buf[group_id].back_facing_counter; + command_buf[group_id * 2 + 0] = cmd; + /* Front-facing command. */ + cmd.instance_len = group_buf[group_id].front_facing_counter; + command_buf[group_id * 2 + 1] = cmd; + + /* Reset the counters for a next command gen dispatch. Avoids resending the whole data just + * for this purpose. Only the last thread will execute this so it is thread-safe. */ + group_buf[group_id].front_facing_counter = 0u; + group_buf[group_id].back_facing_counter = 0u; + group_buf[group_id].total_counter = 0u; +} + +void main() +{ + uint proto_id = gl_GlobalInvocationID.x; + if (proto_id >= prototype_len) { + return; + } + + DrawPrototype proto = prototype_buf[proto_id]; + uint group_id = proto.group_id; + bool is_inverted = (proto.resource_handle & 0x80000000u) != 0; + uint resource_index = (proto.resource_handle & 0x7FFFFFFFu); + + /* Visibility test result. */ + bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0; + + DrawGroup group = group_buf[group_id]; + + if (!is_visible) { + /* Skip the draw but still count towards the completion. */ + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + return; + } + + uint back_facing_len = group.len - group.front_facing_len; + uint front_facing_len = group.front_facing_len; + uint dst_index = group.start; + if (is_inverted) { + uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len); + dst_index += offset; + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + } + else { + uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len); + dst_index += back_facing_len + offset; + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + } + + for (uint i = dst_index; i < dst_index + proto.instance_len; i++) { + /* Fill resource_id buffer for each instance of this draw */ + resource_id_buf[i] = resource_index; + } +} diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl new file mode 100644 index 00000000000..3fc5294b024 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl @@ -0,0 +1,9 @@ + +/** + * Display debug edge list. + **/ + +void main() +{ + out_color = interp.color; +} diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl new file mode 100644 index 00000000000..4061dda5d1c --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl @@ -0,0 +1,15 @@ + +/** + * Display debug edge list. + **/ + +void main() +{ + /* Skip the first vertex containing header data. */ + DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2]; + vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2)); + vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0; + + interp.color = col; + gl_Position = persmat * vec4(pos, 1.0); +} diff --git a/source/blender/draw/intern/shaders/draw_debug_info.hh b/source/blender/draw/intern/shaders/draw_debug_info.hh new file mode 100644 index 00000000000..ce450bb1210 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_debug_info.hh @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "draw_defines.h" +#include "gpu_shader_create_info.hh" + +/* -------------------------------------------------------------------- */ +/** \name Debug print + * + * Allows print() function to have logging support inside shaders. + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_debug_print) + .typedef_source("draw_shader_shared.h") + .storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]"); + +GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index"); + +GPU_SHADER_CREATE_INFO(draw_debug_print_display) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .storage_buf(7, Qualifier::READ, "uint", "drw_debug_print_buf[]") + .vertex_out(draw_debug_print_display_iface) + .fragment_out(0, Type::VEC4, "out_color") + .vertex_source("draw_debug_print_display_vert.glsl") + .fragment_source("draw_debug_print_display_frag.glsl") + .additional_info("draw_view"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug draw shapes + * + * Allows to draw lines and points just like the DRW_debug module functions. + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_debug_draw) + .typedef_source("draw_shader_shared.h") + .storage_buf(DRW_DEBUG_DRAW_SLOT, + Qualifier::READ_WRITE, + "DRWDebugVert", + "drw_debug_verts_buf[]"); + +GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color"); + +GPU_SHADER_CREATE_INFO(draw_debug_draw_display) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .storage_buf(6, Qualifier::READ, "DRWDebugVert", "drw_debug_verts_buf[]") + .vertex_out(draw_debug_draw_display_iface) + .fragment_out(0, Type::VEC4, "out_color") + .push_constant(Type::MAT4, "persmat") + .vertex_source("draw_debug_draw_display_vert.glsl") + .fragment_source("draw_debug_draw_display_frag.glsl") + .additional_info("draw_view"); + +/** \} */ diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl new file mode 100644 index 00000000000..4e0d980637f --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl @@ -0,0 +1,133 @@ + +/** + * Display characters using an ascii table. + **/ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +bool char_intersect(uvec2 bitmap_position) +{ + /* Using 8x8 = 64bits = uvec2. */ + uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u), + uvec2(0x18001800u, 0x183c3c18u), + uvec2(0x00000000u, 0x36360000u), + uvec2(0x7f363600u, 0x36367f36u), + uvec2(0x301f0c00u, 0x0c3e031eu), + uvec2(0x0c666300u, 0x00633318u), + uvec2(0x3b336e00u, 0x1c361c6eu), + uvec2(0x00000000u, 0x06060300u), + uvec2(0x060c1800u, 0x180c0606u), + uvec2(0x180c0600u, 0x060c1818u), + uvec2(0x3c660000u, 0x00663cffu), + uvec2(0x0c0c0000u, 0x000c0c3fu), + uvec2(0x000c0c06u, 0x00000000u), + uvec2(0x00000000u, 0x0000003fu), + uvec2(0x000c0c00u, 0x00000000u), + uvec2(0x06030100u, 0x6030180cu), + uvec2(0x6f673e00u, 0x3e63737bu), + uvec2(0x0c0c3f00u, 0x0c0e0c0cu), + uvec2(0x06333f00u, 0x1e33301cu), + uvec2(0x30331e00u, 0x1e33301cu), + uvec2(0x7f307800u, 0x383c3633u), + uvec2(0x30331e00u, 0x3f031f30u), + uvec2(0x33331e00u, 0x1c06031fu), + uvec2(0x0c0c0c00u, 0x3f333018u), + uvec2(0x33331e00u, 0x1e33331eu), + uvec2(0x30180e00u, 0x1e33333eu), + uvec2(0x000c0c00u, 0x000c0c00u), + uvec2(0x000c0c06u, 0x000c0c00u), + uvec2(0x060c1800u, 0x180c0603u), + uvec2(0x003f0000u, 0x00003f00u), + uvec2(0x180c0600u, 0x060c1830u), + uvec2(0x0c000c00u, 0x1e333018u), + uvec2(0x7b031e00u, 0x3e637b7bu), + uvec2(0x3f333300u, 0x0c1e3333u), + uvec2(0x66663f00u, 0x3f66663eu), + uvec2(0x03663c00u, 0x3c660303u), + uvec2(0x66361f00u, 0x1f366666u), + uvec2(0x16467f00u, 0x7f46161eu), + uvec2(0x16060f00u, 0x7f46161eu), + uvec2(0x73667c00u, 0x3c660303u), + uvec2(0x33333300u, 0x3333333fu), + uvec2(0x0c0c1e00u, 0x1e0c0c0cu), + uvec2(0x33331e00u, 0x78303030u), + uvec2(0x36666700u, 0x6766361eu), + uvec2(0x46667f00u, 0x0f060606u), + uvec2(0x6b636300u, 0x63777f7fu), + uvec2(0x73636300u, 0x63676f7bu), + uvec2(0x63361c00u, 0x1c366363u), + uvec2(0x06060f00u, 0x3f66663eu), + uvec2(0x3b1e3800u, 0x1e333333u), + uvec2(0x36666700u, 0x3f66663eu), + uvec2(0x38331e00u, 0x1e33070eu), + uvec2(0x0c0c1e00u, 0x3f2d0c0cu), + uvec2(0x33333f00u, 0x33333333u), + uvec2(0x331e0c00u, 0x33333333u), + uvec2(0x7f776300u, 0x6363636bu), + uvec2(0x1c366300u, 0x6363361cu), + uvec2(0x0c0c1e00u, 0x3333331eu), + uvec2(0x4c667f00u, 0x7f633118u), + uvec2(0x06061e00u, 0x1e060606u), + uvec2(0x30604000u, 0x03060c18u), + uvec2(0x18181e00u, 0x1e181818u), + uvec2(0x00000000u, 0x081c3663u), + uvec2(0x000000ffu, 0x00000000u), + uvec2(0x00000000u, 0x0c0c1800u), + uvec2(0x3e336e00u, 0x00001e30u), + uvec2(0x66663b00u, 0x0706063eu), + uvec2(0x03331e00u, 0x00001e33u), + uvec2(0x33336e00u, 0x3830303eu), + uvec2(0x3f031e00u, 0x00001e33u), + uvec2(0x06060f00u, 0x1c36060fu), + uvec2(0x333e301fu, 0x00006e33u), + uvec2(0x66666700u, 0x0706366eu), + uvec2(0x0c0c1e00u, 0x0c000e0cu), + uvec2(0x3033331eu, 0x30003030u), + uvec2(0x1e366700u, 0x07066636u), + uvec2(0x0c0c1e00u, 0x0e0c0c0cu), + uvec2(0x7f6b6300u, 0x0000337fu), + uvec2(0x33333300u, 0x00001f33u), + uvec2(0x33331e00u, 0x00001e33u), + uvec2(0x663e060fu, 0x00003b66u), + uvec2(0x333e3078u, 0x00006e33u), + uvec2(0x66060f00u, 0x00003b6eu), + uvec2(0x1e301f00u, 0x00003e03u), + uvec2(0x0c2c1800u, 0x080c3e0cu), + uvec2(0x33336e00u, 0x00003333u), + uvec2(0x331e0c00u, 0x00003333u), + uvec2(0x7f7f3600u, 0x0000636bu), + uvec2(0x1c366300u, 0x00006336u), + uvec2(0x333e301fu, 0x00003333u), + uvec2(0x0c263f00u, 0x00003f19u), + uvec2(0x0c0c3800u, 0x380c0c07u), + uvec2(0x18181800u, 0x18181800u), + uvec2(0x0c0c0700u, 0x070c0c38u), + uvec2(0x00000000u, 0x6e3b0000u), + uvec2(0x00000000u, 0x00000000u)); + + if (!in_range_inclusive(bitmap_position, uvec2(0), uvec2(7))) { + return false; + } + uint char_bits = ascii_bitmap[char_index][bitmap_position.y >> 2u & 1u]; + char_bits = (char_bits >> ((bitmap_position.y & 3u) * 8u + bitmap_position.x)); + return (char_bits & 1u) != 0u; +} + +void main() +{ + uvec2 bitmap_position = uvec2(gl_PointCoord.xy * 8.0); + /* Point coord start from top left corner. But layout is from bottom to top. */ + bitmap_position.y = 7 - bitmap_position.y; + + if (char_intersect(bitmap_position)) { + out_color = vec4(1); + } + else if (char_intersect(bitmap_position + uvec2(0, 1))) { + /* Shadow */ + out_color = vec4(0, 0, 0, 1); + } + else { + /* Transparent Background for ease of read. */ + out_color = vec4(0, 0, 0, 0.2); + } +} diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl new file mode 100644 index 00000000000..cb379056e2b --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl @@ -0,0 +1,29 @@ + +/** + * Display characters using an ascii table. Outputs one point per character. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) + +void main() +{ + /* Skip first 4 chars containing header data. */ + uint char_data = drw_debug_print_buf[gl_VertexID + 8]; + char_index = (char_data & 0xFFu) - 0x20u; + + /* Discard invalid chars. */ + if (char_index >= 96u) { + gl_Position = vec4(-1); + gl_PointSize = 0.0; + return; + } + uint row = (char_data >> 16u) & 0xFFu; + uint col = (char_data >> 8u) & 0xFFu; + + float char_size = 16.0; + /* Change anchor point to the top left. */ + vec2 pos_on_screen = char_size * vec2(col, row) + char_size * 4; + gl_Position = vec4( + pos_on_screen * drw_view.viewport_size_inverse * vec2(2.0, -2.0) - vec2(1.0, -1.0), 0, 1); + gl_PointSize = char_size; +} diff --git a/source/blender/draw/intern/shaders/draw_object_infos_info.hh b/source/blender/draw/intern/shaders/draw_object_infos_info.hh index 8fd55ea351f..31fee018fbc 100644 --- a/source/blender/draw/intern/shaders/draw_object_infos_info.hh +++ b/source/blender/draw/intern/shaders/draw_object_infos_info.hh @@ -1,10 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "draw_defines.h" #include "gpu_shader_create_info.hh" GPU_SHADER_CREATE_INFO(draw_object_infos) .typedef_source("draw_shader_shared.h") .define("OBINFO_LIB") + .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)") + .define("ObjectInfo", "(drw_infos[resource_id].infos)") + .define("ObjectColor", "(drw_infos[resource_id].color)") .uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH); GPU_SHADER_CREATE_INFO(draw_volume_infos) @@ -14,3 +18,19 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos) GPU_SHADER_CREATE_INFO(draw_curves_infos) .typedef_source("draw_shader_shared.h") .uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH); + +GPU_SHADER_CREATE_INFO(draw_object_infos_new) + .typedef_source("draw_shader_shared.h") + .define("OBINFO_LIB") + .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)") + .define("ObjectInfo", "(drw_infos[resource_id].infos)") + .define("ObjectColor", "(drw_infos[resource_id].color)") + .storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]"); + +/** \note Requires draw_object_infos_new. */ +GPU_SHADER_CREATE_INFO(draw_object_attribute_new) + .define("OBATTR_LIB") + .define("ObjectAttributeStart", "(drw_infos[resource_id].orco_mul_bias[0].w)") + .define("ObjectAttributeLen", "(drw_infos[resource_id].orco_mul_bias[1].w)") + .storage_buf(DRW_OBJ_ATTR_SLOT, Qualifier::READ, "ObjectAttribute", "drw_attrs[]") + .additional_info("draw_object_infos_new"); diff --git a/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl new file mode 100644 index 00000000000..511d4e49651 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl @@ -0,0 +1,64 @@ + +/** + * Finish computation of a few draw resource after sync. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +void main() +{ + uint resource_id = gl_GlobalInvocationID.x; + if (resource_id >= resource_len) { + return; + } + + mat4 model_mat = matrix_buf[resource_id].model; + ObjectInfos infos = infos_buf[resource_id]; + ObjectBounds bounds = bounds_buf[resource_id]; + + if (bounds.bounding_sphere.w != -1.0) { + /* Convert corners to origin + sides in world space. */ + vec3 p0 = bounds.bounding_corners[0].xyz; + vec3 p01 = bounds.bounding_corners[1].xyz - p0; + vec3 p02 = bounds.bounding_corners[2].xyz - p0; + vec3 p03 = bounds.bounding_corners[3].xyz - p0; + /* Avoid flat box. */ + p01.x = max(p01.x, 1e-4); + p02.y = max(p02.y, 1e-4); + p03.z = max(p03.z, 1e-4); + vec3 diagonal = p01 + p02 + p03; + vec3 center = p0 + diagonal * 0.5; + float min_axis = min_v3(abs(diagonal)); + bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center); + /* We have to apply scaling to the diagonal. */ + bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) * + 0.5; + bounds_buf[resource_id]._inner_sphere_radius = min_axis; + bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0); + bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01); + bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02); + bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03); + /* Always have correct handedness in the corners vectors. */ + if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) { + bounds_buf[resource_id].bounding_corners[0].xyz += + bounds_buf[resource_id].bounding_corners[1].xyz; + bounds_buf[resource_id].bounding_corners[1].xyz = + -bounds_buf[resource_id].bounding_corners[1].xyz; + } + + /* TODO: Bypass test for very large objects (see T67319). */ + if (bounds_buf[resource_id].bounding_sphere.w > 1e12) { + bounds_buf[resource_id].bounding_sphere.w = -1.0; + } + } + + vec3 loc = infos.orco_add; /* Box center. */ + vec3 size = infos.orco_mul; /* Box half-extent. */ + /* This is what the original computation looks like. + * Simplify to a nice MADD in shading code. */ + // orco = (pos - loc) / size; + // orco = pos * (1.0 / size) + (-loc / size); + vec3 size_inv = safe_rcp(size); + infos_buf[resource_id].orco_add = -loc * size_inv; + infos_buf[resource_id].orco_mul = size_inv; +} diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh index 0400521c53d..c522c607791 100644 --- a/source/blender/draw/intern/shaders/draw_view_info.hh +++ b/source/blender/draw/intern/shaders/draw_view_info.hh @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "draw_defines.h" #include "gpu_shader_create_info.hh" /* -------------------------------------------------------------------- */ @@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle) * \{ */ GPU_SHADER_CREATE_INFO(draw_view) - .uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS) + .uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS) .typedef_source("draw_shader_shared.h"); GPU_SHADER_CREATE_INFO(draw_modelmat) .uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH) - .define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)") - .define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)") + .define("ModelMatrix", "(drw_matrices[resource_id].model)") + .define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)") .additional_info("draw_view"); GPU_SHADER_CREATE_INFO(draw_modelmat_legacy) @@ -136,3 +137,77 @@ GPU_SHADER_CREATE_INFO(draw_gpencil) .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos"); /** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Internal Draw Manager usage + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_resource_finalize) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .define("DRAW_FINALIZE_SHADER") + .local_group_size(DRW_FINALIZE_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]") + .storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]") + .storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]") + .push_constant(Type::INT, "resource_len") + .compute_source("draw_resource_finalize_comp.glsl"); + +GPU_SHADER_CREATE_INFO(draw_visibility_compute) + .do_static_compilation(true) + .local_group_size(DRW_VISIBILITY_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]") + .storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]") + .push_constant(Type::INT, "resource_len") + .compute_source("draw_visibility_comp.glsl") + .additional_info("draw_view"); + +GPU_SHADER_CREATE_INFO(draw_command_generate) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .typedef_source("draw_command_shared.hh") + .local_group_size(DRW_COMMAND_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]") + .storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]") + .storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]") + .storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]") + .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]") + .push_constant(Type::INT, "prototype_len") + .compute_source("draw_command_generate_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Resource ID + * New implementation using gl_BaseInstance and storage buffers. + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_resource_id_new) + .define("UNIFORM_RESOURCE_ID_NEW") + .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]") + .define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]"); + +/** + * Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf. + */ +GPU_SHADER_CREATE_INFO(draw_resource_id_fallback) + .define("UNIFORM_RESOURCE_ID_NEW") + .vertex_in(15, Type::INT, "drw_ResourceID"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Object Resources + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_modelmat_new) + .typedef_source("draw_shader_shared.h") + .storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]") + .define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse") + .define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model") + /* TODO For compatibility with old shaders. To be removed. */ + .define("ModelMatrixInverse", "drw_ModelMatrixInverse") + .define("ModelMatrix", "drw_ModelMatrix") + .additional_info("draw_resource_id_new"); + +/** \} */ diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl new file mode 100644 index 00000000000..86add2d1fe2 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl @@ -0,0 +1,46 @@ + +/** + * Compute visibility of each resource bounds for a given view. + */ +/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) + +shared uint shared_result; + +void mask_visibility_bit() +{ + uint bit = 1u << gl_LocalInvocationID.x; + atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit); +} + +void main() +{ + if (gl_GlobalInvocationID.x >= resource_len) { + return; + } + + ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x]; + + if (bounds.bounding_sphere.w != -1.0) { + IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz, + bounds.bounding_corners[1].xyz, + bounds.bounding_corners[2].xyz, + bounds.bounding_corners[3].xyz); + Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); + Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius); + + if (intersect_view(inscribed_sphere) == true) { + /* Visible. */ + } + else if (intersect_view(bounding_sphere) == false) { + /* Not visible. */ + mask_visibility_bit(); + } + else if (intersect_view(box) == false) { + /* Not visible. */ + mask_visibility_bit(); + } + } +} diff --git a/source/blender/draw/tests/draw_pass_test.cc b/source/blender/draw/tests/draw_pass_test.cc new file mode 100644 index 00000000000..394ca8bd3cf --- /dev/null +++ b/source/blender/draw/tests/draw_pass_test.cc @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "testing/testing.h" + +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader.h" +#include "draw_testing.hh" + +#include <bitset> + +namespace blender::draw { + +static void test_draw_pass_all_commands() +{ + Texture tex; + tex.ensure_2d(GPU_RGBA16, int2(1)); + + UniformBuffer<uint4> ubo; + ubo.push_update(); + + StorageBuffer<uint4> ssbo; + ssbo.push_update(); + + float4 color(1.0f, 1.0f, 1.0f, 0.0f); + int3 dispatch_size(1); + + PassSimple pass = {"test.all_commands"}; + pass.init(); + pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL); + pass.clear_color_depth_stencil(float4(0.25f, 0.5f, 100.0f, -2000.0f), 0.5f, 0xF0); + pass.state_stencil(0x80, 0x0F, 0x8F); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.bind_texture("image", tex); + pass.bind_texture("image", &tex); + pass.bind_image("missing_image", tex); /* Should not crash. */ + pass.bind_image("missing_image", &tex); /* Should not crash. */ + pass.bind_ubo("missing_ubo", ubo); /* Should not crash. */ + pass.bind_ubo("missing_ubo", &ubo); /* Should not crash. */ + pass.bind_ssbo("missing_ssbo", ssbo); /* Should not crash. */ + pass.bind_ssbo("missing_ssbo", &ssbo); /* Should not crash. */ + pass.push_constant("color", color); + pass.push_constant("color", &color); + pass.push_constant("ModelViewProjectionMatrix", float4x4::identity()); + pass.draw_procedural(GPU_PRIM_TRIS, 1, 3); + + /* Should not crash even if shader is not a compute. This is because we only serialize. */ + /* TODO(fclem): Use real compute shader. */ + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.dispatch(dispatch_size); + pass.dispatch(&dispatch_size); + pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); + + /* Change references. */ + color[3] = 1.0f; + dispatch_size = int3(2); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.all_commands" << std::endl; + expected << " .state_set(6)" << std::endl; + expected << " .clear(color=(0.25, 0.5, 100, -2000), depth=0.5, stencil=0b11110000))" + << std::endl; + expected << " .stencil_set(write_mask=0b10000000, compare_mask=0b00001111, reference=0b10001111" + << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl; + expected << " .bind_texture(0)" << std::endl; + expected << " .bind_texture_ref(0)" << std::endl; + expected << " .bind_image(-1)" << std::endl; + expected << " .bind_image_ref(-1)" << std::endl; + expected << " .bind_uniform_buf(-1)" << std::endl; + expected << " .bind_uniform_buf_ref(-1)" << std::endl; + expected << " .bind_storage_buf(-1)" << std::endl; + expected << " .bind_storage_buf_ref(-1)" << std::endl; + expected << " .push_constant(1, data=(1, 1, 1, 0))" << std::endl; + expected << " .push_constant(1, data=(1, 1, 1, 1))" << std::endl; + expected << " .push_constant(0, data=(" << std::endl; + expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << " .draw(inst_len=1, vert_len=3, vert_first=0, res_id=0)" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl; + expected << " .dispatch(1, 1, 1)" << std::endl; + expected << " .dispatch_ref(2, 2, 2)" << std::endl; + expected << " .barrier(4)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_all_commands) + +static void test_draw_pass_sub_ordering() +{ + PassSimple pass = {"test.sub_ordering"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.push_constant("test_pass", 1); + + PassSimple::Sub &sub1 = pass.sub("Sub1"); + sub1.push_constant("test_sub1", 11); + + PassSimple::Sub &sub2 = pass.sub("Sub2"); + sub2.push_constant("test_sub2", 21); + + /* Will execute after both sub. */ + pass.push_constant("test_pass", 2); + + /* Will execute after sub1. */ + sub2.push_constant("test_sub2", 22); + + /* Will execute before sub2. */ + sub1.push_constant("test_sub1", 12); + + /* Will execute before end of pass. */ + sub2.push_constant("test_sub2", 23); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.sub_ordering" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl; + expected << " .push_constant(-1, data=1)" << std::endl; + expected << " .Sub1" << std::endl; + expected << " .push_constant(-1, data=11)" << std::endl; + expected << " .push_constant(-1, data=12)" << std::endl; + expected << " .Sub2" << std::endl; + expected << " .push_constant(-1, data=21)" << std::endl; + expected << " .push_constant(-1, data=22)" << std::endl; + expected << " .push_constant(-1, data=23)" << std::endl; + expected << " .push_constant(-1, data=2)" << std::endl; + + EXPECT_EQ(result, expected.str()); +} +DRAW_TEST(draw_pass_sub_ordering) + +static void test_draw_pass_simple_draw() +{ + PassSimple pass = {"test.simple_draw"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + /* Each draw procedural type uses a different batch. Groups are drawn in correct order. */ + pass.draw_procedural(GPU_PRIM_TRIS, 1, 10, 1, {1}); + pass.draw_procedural(GPU_PRIM_POINTS, 4, 20, 2, {2}); + pass.draw_procedural(GPU_PRIM_TRIS, 2, 30, 3, {3}); + pass.draw_procedural(GPU_PRIM_POINTS, 5, 40, 4, ResourceHandle(4, true)); + pass.draw_procedural(GPU_PRIM_LINES, 1, 50, 5, {5}); + pass.draw_procedural(GPU_PRIM_POINTS, 6, 60, 6, {5}); + pass.draw_procedural(GPU_PRIM_TRIS, 3, 70, 7, {6}); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.simple_draw" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl; + expected << " .draw(inst_len=1, vert_len=10, vert_first=1, res_id=1)" << std::endl; + expected << " .draw(inst_len=4, vert_len=20, vert_first=2, res_id=2)" << std::endl; + expected << " .draw(inst_len=2, vert_len=30, vert_first=3, res_id=3)" << std::endl; + expected << " .draw(inst_len=5, vert_len=40, vert_first=4, res_id=4)" << std::endl; + expected << " .draw(inst_len=1, vert_len=50, vert_first=5, res_id=5)" << std::endl; + expected << " .draw(inst_len=6, vert_len=60, vert_first=6, res_id=5)" << std::endl; + expected << " .draw(inst_len=3, vert_len=70, vert_first=7, res_id=6)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_simple_draw) + +static void test_draw_pass_multi_draw() +{ + PassMain pass = {"test.multi_draw"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + /* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */ + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1}); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2}); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3}); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true)); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5}); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5}); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6}); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.multi_draw" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl; + expected << " .draw_multi(3)" << std::endl; + expected << " .group(id=2, len=1)" << std::endl; + expected << " .proto(instance_len=1, resource_id=5, front_face)" << std::endl; + expected << " .group(id=1, len=15)" << std::endl; + expected << " .proto(instance_len=5, resource_id=4, back_face)" << std::endl; + expected << " .proto(instance_len=6, resource_id=5, front_face)" << std::endl; + expected << " .proto(instance_len=4, resource_id=2, front_face)" << std::endl; + expected << " .group(id=0, len=6)" << std::endl; + expected << " .proto(instance_len=3, resource_id=6, front_face)" << std::endl; + expected << " .proto(instance_len=2, resource_id=3, front_face)" << std::endl; + expected << " .proto(instance_len=1, resource_id=1, front_face)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_multi_draw) + +static void test_draw_pass_sortable() +{ + PassSortable pass = {"test.sortable"}; + pass.init(); + + pass.sub("Sub3", 3.0f); + pass.sub("Sub2", 2.0f); + pass.sub("Sub5", 4.0f); + pass.sub("Sub4", 3.0f); + pass.sub("Sub1", 1.0f); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.sortable" << std::endl; + expected << " .Sub1" << std::endl; + expected << " .Sub2" << std::endl; + expected << " .Sub3" << std::endl; + expected << " .Sub4" << std::endl; + expected << " .Sub5" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_sortable) + +static void test_draw_resource_id_gen() +{ + float4x4 win_mat; + orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1); + + View view("test_view"); + view.sync(float4x4::identity(), win_mat); + + Manager drw; + + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + drw.begin_sync(); + ResourceHandle handle1 = drw.resource_handle(obmat_1); + ResourceHandle handle2 = drw.resource_handle(obmat_1); + ResourceHandle handle3 = drw.resource_handle(obmat_2); + drw.resource_handle(obmat_2, float3(2), float3(1)); + drw.end_sync(); + + StringRefNull expected = "2 1 1 1 1 3 3 1 1 1 1 1 3 2 2 2 2 2 2 1 1 1 "; + + { + /* Computed on CPU. */ + PassSimple pass = {"test.resource_id"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + + std::stringstream result; + for (auto val : debug.resource_id) { + result << val << " "; + } + + EXPECT_EQ(result.str(), expected); + } + { + /* Same thing with PassMain (computed on GPU) */ + PassSimple pass = {"test.resource_id"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + + std::stringstream result; + for (auto val : debug.resource_id) { + result << val << " "; + } + + EXPECT_EQ(result.str(), expected); + } + + DRW_shape_cache_free(); + DRW_shaders_free(); +} +DRAW_TEST(draw_resource_id_gen) + +static void test_draw_visibility() +{ + float4x4 win_mat; + orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1); + + View view("test_view"); + view.sync(float4x4::identity(), win_mat); + + Manager drw; + + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + drw.begin_sync(); /* Default {0} always visible. */ + drw.resource_handle(obmat_1); /* No bounds, always visible. */ + drw.resource_handle(obmat_1, float3(3), float3(1)); /* Out of view. */ + drw.resource_handle(obmat_2, float3(0), float3(1)); /* Inside view. */ + drw.end_sync(); + + PassMain pass = {"test.visibility"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + Vector<uint32_t> expected_visibility = {0}; + + std::stringstream result; + for (auto val : debug.visibility) { + result << std::bitset<32>(val); + } + + EXPECT_EQ(result.str(), "11111111111111111111111111111011"); + + DRW_shape_cache_free(); + DRW_shaders_free(); +} +DRAW_TEST(draw_visibility) + +static void test_draw_manager_sync() +{ + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + /* TODO find a way to create a minimum object to test resource handle creation on it. */ + Manager drw; + + drw.begin_sync(); + drw.resource_handle(obmat_1); + drw.resource_handle(obmat_2, float3(2), float3(1)); + drw.end_sync(); + + Manager::DataDebugOutput debug = drw.data_debug(); + + std::stringstream result; + for (const auto &val : debug.matrices) { + result << val; + } + for (const auto &val : debug.bounds) { + result << val; + } + for (const auto &val : debug.infos) { + result << val; + } + + std::stringstream expected; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( 1.000000, -0.000000, 0.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 1.000000, -0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, -0.000000, 1.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( -0.500000, -0.000000, -0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.500000, -0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.000000, -0.500000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( -2.000000, 0.000000, -0.000000, -0.000000)" << std::endl; + expected << "( 0.000000, -2.000000, 0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -2.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( 0.500000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.500000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.500000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( 2.000000, -0.000000, 0.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 2.000000, -0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, -0.000000, 2.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectBounds(skipped)" << std::endl; + expected << "ObjectBounds(skipped)" << std::endl; + expected << "ObjectBounds(" << std::endl; + expected << ".bounding_corners[0](0.5, 0.5, 0.5)" << std::endl; + expected << ".bounding_corners[1](1, 0, 0)" << std::endl; + expected << ".bounding_corners[2](0, 1, 0)" << std::endl; + expected << ".bounding_corners[3](0, 0, 1)" << std::endl; + expected << ".sphere=(pos=(1, 1, 1), rad=0.866025" << std::endl; + expected << ")" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + + EXPECT_EQ(result.str(), expected.str()); + + DRW_shaders_free(); +} +DRAW_TEST(draw_manager_sync) + +} // namespace blender::draw diff --git a/source/blender/draw/tests/shaders_test.cc b/source/blender/draw/tests/shaders_test.cc index 2bc0c9af895..892fd999fb5 100644 --- a/source/blender/draw/tests/shaders_test.cc +++ b/source/blender/draw/tests/shaders_test.cc @@ -256,6 +256,7 @@ static void test_overlay_glsl_shaders() EXPECT_NE(OVERLAY_shader_uniform_color(), nullptr); EXPECT_NE(OVERLAY_shader_outline_prepass(false), nullptr); EXPECT_NE(OVERLAY_shader_outline_prepass(true), nullptr); + EXPECT_NE(OVERLAY_shader_outline_prepass_curves(), nullptr); EXPECT_NE(OVERLAY_shader_outline_prepass_gpencil(), nullptr); EXPECT_NE(OVERLAY_shader_outline_prepass_pointcloud(), nullptr); EXPECT_NE(OVERLAY_shader_extra_grid(), nullptr); @@ -270,6 +271,7 @@ static void test_overlay_glsl_shaders() EXPECT_NE(OVERLAY_shader_particle_dot(), nullptr); EXPECT_NE(OVERLAY_shader_particle_shape(), nullptr); EXPECT_NE(OVERLAY_shader_sculpt_mask(), nullptr); + EXPECT_NE(OVERLAY_shader_sculpt_curves_selection(), nullptr); EXPECT_NE(OVERLAY_shader_volume_velocity(false, false), nullptr); EXPECT_NE(OVERLAY_shader_volume_velocity(false, true), nullptr); EXPECT_NE(OVERLAY_shader_volume_velocity(true, false), nullptr); @@ -358,6 +360,8 @@ static void test_eevee_glsl_shaders_static() EXPECT_NE(EEVEE_shaders_volumes_integration_sh_get(), nullptr); EXPECT_NE(EEVEE_shaders_volumes_resolve_sh_get(false), nullptr); EXPECT_NE(EEVEE_shaders_volumes_resolve_sh_get(true), nullptr); + EXPECT_NE(EEVEE_shaders_volumes_resolve_comp_sh_get(false), nullptr); + EXPECT_NE(EEVEE_shaders_volumes_resolve_comp_sh_get(true), nullptr); EXPECT_NE(EEVEE_shaders_volumes_accum_sh_get(), nullptr); EXPECT_NE(EEVEE_shaders_studiolight_probe_sh_get(), nullptr); EXPECT_NE(EEVEE_shaders_studiolight_background_sh_get(), nullptr); @@ -397,6 +401,7 @@ static void test_basic_glsl_shaders() eGPUShaderConfig sh_cfg = static_cast<eGPUShaderConfig>(i); BASIC_shaders_depth_sh_get(sh_cfg); BASIC_shaders_pointcloud_depth_sh_get(sh_cfg); + BASIC_shaders_curves_depth_sh_get(sh_cfg); BASIC_shaders_depth_conservative_sh_get(sh_cfg); BASIC_shaders_pointcloud_depth_conservative_sh_get(sh_cfg); } |