diff options
48 files changed, 4742 insertions, 165 deletions
diff --git a/release/scripts/addons b/release/scripts/addons -Subproject 25ffc6f430fc995b1c046b01acba1c3e6c1896b +Subproject 67f1fbca1482d9d9362a4001332e785c3fd5d23 diff --git a/release/scripts/startup/bl_ui/space_userpref.py b/release/scripts/startup/bl_ui/space_userpref.py index cd11938e146..49f0fef5849 100644 --- a/release/scripts/startup/bl_ui/space_userpref.py +++ b/release/scripts/startup/bl_ui/space_userpref.py @@ -2327,6 +2327,7 @@ class USERPREF_PT_experimental_debugging(ExperimentalPanel, Panel): ({"property": "use_cycles_debug"}, None), ({"property": "show_asset_debug_info"}, None), ({"property": "use_asset_indexing"}, None), + ({"property": "use_viewport_debug"}, None), ), ) diff --git a/release/scripts/startup/bl_ui/space_view3d.py b/release/scripts/startup/bl_ui/space_view3d.py index b1b5738aecd..b2fa8e4d64f 100644 --- a/release/scripts/startup/bl_ui/space_view3d.py +++ b/release/scripts/startup/bl_ui/space_view3d.py @@ -7809,6 +7809,25 @@ class VIEW3D_PT_curves_sculpt_grow_shrink_scaling(Panel): layout.prop(brush.curves_sculpt_settings, "minimum_length") +class VIEW3D_PT_viewport_debug(Panel): + bl_space_type = 'VIEW_3D' + bl_region_type = 'HEADER' + bl_parent_id = 'VIEW3D_PT_overlay' + bl_label = "Viewport Debug" + + @classmethod + def poll(cls, context): + prefs = context.preferences + return prefs.experimental.use_viewport_debug + + def draw(self, context): + layout = self.layout + view = context.space_data + overlay = view.overlay + + layout.prop(overlay, "use_debug_freeze_view_culling") + + classes = ( VIEW3D_HT_header, VIEW3D_HT_tool_header, @@ -8046,6 +8065,7 @@ classes = ( TOPBAR_PT_annotation_layers, VIEW3D_PT_curves_sculpt_add_shape, VIEW3D_PT_curves_sculpt_grow_shrink_scaling, + VIEW3D_PT_viewport_debug, ) diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 939e302b3d2..5704c9e6774 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -79,19 +79,21 @@ set(SRC intern/draw_cache_impl_subdivision.cc intern/draw_cache_impl_volume.c intern/draw_color_management.cc + intern/draw_command.cc intern/draw_common.c intern/draw_curves.cc intern/draw_debug.cc intern/draw_fluid.c intern/draw_hair.cc intern/draw_instance_data.c - intern/draw_manager.c intern/draw_manager_data.c intern/draw_manager_exec.c intern/draw_manager_profiling.c intern/draw_manager_shader.c intern/draw_manager_text.c intern/draw_manager_texture.c + intern/draw_manager.c + intern/draw_manager.cc intern/draw_select_buffer.c intern/draw_shader.cc intern/draw_texture_pool.cc @@ -206,28 +208,32 @@ set(SRC intern/DRW_gpu_wrapper.hh intern/DRW_render.h intern/draw_attributes.h - intern/draw_cache.h intern/draw_cache_extract.hh intern/draw_cache_impl.h intern/draw_cache_inline.h + intern/draw_cache.h intern/draw_color_management.h - intern/draw_common.h + intern/draw_command.hh intern/draw_common_shader_shared.h + intern/draw_common.h intern/draw_curves_private.h intern/draw_debug.h intern/draw_debug.hh intern/draw_hair_private.h intern/draw_instance_data.h - intern/draw_manager.h intern/draw_manager_profiling.h intern/draw_manager_testing.h intern/draw_manager_text.h - intern/draw_shader.h + intern/draw_manager.h + intern/draw_manager.hh + intern/draw_pass.hh intern/draw_shader_shared.h + intern/draw_shader.h intern/draw_subdivision.h intern/draw_texture_pool.h - intern/draw_view.h intern/draw_view_data.h + intern/draw_view.cc + intern/draw_view.h intern/mesh_extractors/extract_mesh.hh intern/smaa_textures.h engines/basic/basic_engine.h @@ -496,14 +502,19 @@ set(GLSL_SRC intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl intern/shaders/common_view_clipping_lib.glsl intern/shaders/common_view_lib.glsl + intern/shaders/draw_command_generate_comp.glsl intern/shaders/draw_debug_draw_display_frag.glsl intern/shaders/draw_debug_draw_display_vert.glsl intern/shaders/draw_debug_info.hh intern/shaders/draw_debug_print_display_frag.glsl intern/shaders/draw_debug_print_display_vert.glsl + intern/shaders/draw_resource_finalize_comp.glsl + intern/shaders/draw_visibility_comp.glsl intern/draw_common_shader_shared.h + intern/draw_command_shared.hh intern/draw_shader_shared.h + intern/draw_defines.h engines/gpencil/shaders/gpencil_frag.glsl engines/gpencil/shaders/gpencil_vert.glsl @@ -708,6 +719,7 @@ if(WITH_GTESTS) if(WITH_OPENGL_DRAW_TESTS) set(TEST_SRC tests/draw_testing.cc + tests/draw_pass_test.cc tests/shaders_test.cc tests/draw_testing.hh diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh index 8ed6594c31e..d9122657144 100644 --- a/source/blender/draw/intern/DRW_gpu_wrapper.hh +++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh @@ -238,6 +238,11 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable GPU_storagebuf_clear_to_zero(ssbo_); } + void read() + { + GPU_storagebuf_read(ssbo_, this->data_); + } + operator GPUStorageBuf *() const { return ssbo_; @@ -850,6 +855,32 @@ class TextureFromPool : public Texture, NonMovable { GPUTexture *stencil_view() = delete; }; +/** + * Dummy type to bind texture as image. + * It is just a GPUTexture in disguise. + */ +class Image {}; + +static inline Image *as_image(GPUTexture *tex) +{ + return reinterpret_cast<Image *>(tex); +} + +static inline Image **as_image(GPUTexture **tex) +{ + return reinterpret_cast<Image **>(tex); +} + +static inline GPUTexture *as_texture(Image *img) +{ + return reinterpret_cast<GPUTexture *>(img); +} + +static inline GPUTexture **as_texture(Image **img) +{ + return reinterpret_cast<GPUTexture **>(img); +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index 30c1144739e..7b80ffd2b88 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -41,6 +41,7 @@ #include "draw_debug.h" #include "draw_manager_profiling.h" +#include "draw_state.h" #include "draw_view_data.h" #include "MEM_guardedalloc.h" @@ -288,83 +289,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib); /* Batches */ -/** - * DRWState is a bit-mask that stores the current render state and the desired render state. Based - * on the differences the minimum state changes can be invoked to setup the desired render state. - * - * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive - * therefore they aren't ordered as a bit mask. - */ -typedef enum { - /** To be used for compute passes. */ - DRW_STATE_NO_DRAW = 0, - /** Write mask */ - DRW_STATE_WRITE_DEPTH = (1 << 0), - DRW_STATE_WRITE_COLOR = (1 << 1), - /* Write Stencil. These options are mutual exclusive and packed into 2 bits */ - DRW_STATE_WRITE_STENCIL = (1 << 2), - DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2), - DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2), - /** Depth test. These options are mutual exclusive and packed into 3 bits */ - DRW_STATE_DEPTH_ALWAYS = (1 << 4), - DRW_STATE_DEPTH_LESS = (2 << 4), - DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4), - DRW_STATE_DEPTH_EQUAL = (4 << 4), - DRW_STATE_DEPTH_GREATER = (5 << 4), - DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4), - /** Culling test */ - DRW_STATE_CULL_BACK = (1 << 7), - DRW_STATE_CULL_FRONT = (1 << 8), - /** Stencil test. These options are mutually exclusive and packed into 2 bits. */ - DRW_STATE_STENCIL_ALWAYS = (1 << 9), - DRW_STATE_STENCIL_EQUAL = (2 << 9), - DRW_STATE_STENCIL_NEQUAL = (3 << 9), - - /** Blend state. These options are mutual exclusive and packed into 4 bits */ - DRW_STATE_BLEND_ADD = (1 << 11), - /** Same as additive but let alpha accumulate without pre-multiply. */ - DRW_STATE_BLEND_ADD_FULL = (2 << 11), - /** Standard alpha blending. */ - DRW_STATE_BLEND_ALPHA = (3 << 11), - /** Use that if color is already pre-multiply by alpha. */ - DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11), - DRW_STATE_BLEND_BACKGROUND = (5 << 11), - DRW_STATE_BLEND_OIT = (6 << 11), - DRW_STATE_BLEND_MUL = (7 << 11), - DRW_STATE_BLEND_SUB = (8 << 11), - /** Use dual source blending. WARNING: Only one color buffer allowed. */ - DRW_STATE_BLEND_CUSTOM = (9 << 11), - DRW_STATE_LOGIC_INVERT = (10 << 11), - DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11), - - DRW_STATE_IN_FRONT_SELECT = (1 << 27), - DRW_STATE_SHADOW_OFFSET = (1 << 28), - DRW_STATE_CLIP_PLANES = (1 << 29), - DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30), - /** DO NOT USE. Assumed always enabled. Only used internally. */ - DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31), -} DRWState; - -ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE); - -#define DRW_STATE_DEFAULT \ - (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL) -#define DRW_STATE_BLEND_ENABLED \ - (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \ - DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \ - DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT) -#define DRW_STATE_RASTERIZER_ENABLED \ - (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \ - DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) -#define DRW_STATE_DEPTH_TEST_ENABLED \ - (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \ - DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL) -#define DRW_STATE_STENCIL_TEST_ENABLED \ - (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL) -#define DRW_STATE_WRITE_STENCIL_ENABLED \ - (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \ - DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) - typedef enum { DRW_ATTR_INT, DRW_ATTR_FLOAT, diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc new file mode 100644 index 00000000000..7d5ea5c2048 --- /dev/null +++ b/source/blender/draw/intern/draw_command.cc @@ -0,0 +1,600 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_compute.h" +#include "GPU_debug.h" + +#include "draw_command.hh" +#include "draw_shader.h" +#include "draw_view.hh" + +#include <bitset> +#include <sstream> + +namespace blender::draw::command { + +/* -------------------------------------------------------------------- */ +/** \name Commands Execution + * \{ */ + +void ShaderBind::execute(RecordingState &state) const +{ + if (assign_if_different(state.shader, shader)) { + GPU_shader_bind(shader); + } +} + +void ResourceBind::execute() const +{ + if (slot == -1) { + return; + } + switch (type) { + case ResourceBind::Type::Sampler: + GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false); + break; + case ResourceBind::Type::Image: + GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot); + break; + case ResourceBind::Type::UniformBuf: + GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot); + break; + case ResourceBind::Type::StorageBuf: + GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot); + break; + } +} + +void PushConstant::execute(RecordingState &state) const +{ + if (location == -1) { + return; + } + switch (type) { + case PushConstant::Type::IntValue: + GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value); + break; + case PushConstant::Type::IntReference: + GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref); + break; + case PushConstant::Type::FloatValue: + GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value); + break; + case PushConstant::Type::FloatReference: + GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref); + break; + } +} + +void Draw::execute(RecordingState &state) const +{ + state.front_facing_set(handle.has_inverted_handedness()); + + if (GPU_shader_draw_parameters_support() == false) { + GPU_batch_resource_id_buf_set(batch, state.resource_id_buf); + } + + GPU_batch_set_shader(batch, state.shader); + GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len); +} + +void DrawMulti::execute(RecordingState &state) const +{ + DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_; + DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_; + + uint group_index = this->group_first; + while (group_index != (uint)-1) { + const DrawGroup &group = groups[group_index]; + + if (group.vertex_len > 0) { + if (GPU_shader_draw_parameters_support() == false) { + GPU_batch_resource_id_buf_set(group.gpu_batch, state.resource_id_buf); + } + + GPU_batch_set_shader(group.gpu_batch, state.shader); + + constexpr intptr_t stride = sizeof(DrawCommand); + /* We have 2 indirect command reserved per draw group. */ + intptr_t offset = stride * group_index * 2; + + /* Draw negatively scaled geometry first. */ + if (group.len - group.front_facing_len > 0) { + state.front_facing_set(true); + GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset); + } + + if (group.front_facing_len > 0) { + state.front_facing_set(false); + GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride); + } + } + + group_index = group.next; + } +} + +void DrawIndirect::execute(RecordingState &state) const +{ + state.front_facing_set(handle.has_inverted_handedness()); + + GPU_batch_draw_indirect(batch, *indirect_buf, 0); +} + +void Dispatch::execute(RecordingState &state) const +{ + if (is_reference) { + GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z); + } + else { + GPU_compute_dispatch(state.shader, size.x, size.y, size.z); + } +} + +void DispatchIndirect::execute(RecordingState &state) const +{ + GPU_compute_dispatch_indirect(state.shader, *indirect_buf); +} + +void Barrier::execute() const +{ + GPU_memory_barrier(type); +} + +void Clear::execute() const +{ + GPUFrameBuffer *fb = GPU_framebuffer_active_get(); + GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil); +} + +void StateSet::execute(RecordingState &recording_state) const +{ + /** + * Does not support locked state for the moment and never should. + * Better implement a less hacky selection! + */ + BLI_assert(DST.state_lock == 0); + + if (!assign_if_different(recording_state.pipeline_state, new_state)) { + return; + } + + /* Keep old API working. Keep the state tracking in sync. */ + /* TODO(fclem): Move at the end of a pass. */ + DST.state = new_state; + + GPU_state_set(to_write_mask(new_state), + to_blend(new_state), + to_face_cull_test(new_state), + to_depth_test(new_state), + to_stencil_test(new_state), + to_stencil_op(new_state), + to_provoking_vertex(new_state)); + + if (new_state & DRW_STATE_SHADOW_OFFSET) { + GPU_shadow_offset(true); + } + else { + GPU_shadow_offset(false); + } + + /* TODO: this should be part of shader state. */ + if (new_state & DRW_STATE_CLIP_PLANES) { + GPU_clip_distances(recording_state.view_clip_plane_count); + } + else { + GPU_clip_distances(0); + } + + if (new_state & DRW_STATE_IN_FRONT_SELECT) { + /* XXX `GPU_depth_range` is not a perfect solution + * since very distant geometries can still be occluded. + * Also the depth test precision of these geometries is impaired. + * However, it solves the selection for the vast majority of cases. */ + GPU_depth_range(0.0f, 0.01f); + } + else { + GPU_depth_range(0.0f, 1.0f); + } + + if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) { + GPU_program_point_size(true); + } + else { + GPU_program_point_size(false); + } +} + +void StencilSet::execute() const +{ + GPU_stencil_write_mask_set(write_mask); + GPU_stencil_compare_mask_set(compare_mask); + GPU_stencil_reference_set(reference); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Commands Serialization for debugging + * \{ */ + +std::string ShaderBind::serialize() const +{ + return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")"; +} + +std::string ResourceBind::serialize() const +{ + switch (type) { + case Type::Sampler: + return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + + (sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")"; + case Type::Image: + return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + case Type::UniformBuf: + return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + case Type::StorageBuf: + return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" + + std::to_string(slot) + ")"; + default: + BLI_assert_unreachable(); + return ""; + } +} + +std::string PushConstant::serialize() const +{ + std::stringstream ss; + for (int i = 0; i < array_len; i++) { + switch (comp_len) { + case 1: + switch (type) { + case Type::IntValue: + ss << int1_value; + break; + case Type::IntReference: + ss << int_ref[i]; + break; + case Type::FloatValue: + ss << float1_value; + break; + case Type::FloatReference: + ss << float_ref[i]; + break; + } + break; + case 2: + switch (type) { + case Type::IntValue: + ss << int2_value; + break; + case Type::IntReference: + ss << int2_ref[i]; + break; + case Type::FloatValue: + ss << float2_value; + break; + case Type::FloatReference: + ss << float2_ref[i]; + break; + } + break; + case 3: + switch (type) { + case Type::IntValue: + ss << int3_value; + break; + case Type::IntReference: + ss << int3_ref[i]; + break; + case Type::FloatValue: + ss << float3_value; + break; + case Type::FloatReference: + ss << float3_ref[i]; + break; + } + break; + case 4: + switch (type) { + case Type::IntValue: + ss << int4_value; + break; + case Type::IntReference: + ss << int4_ref[i]; + break; + case Type::FloatValue: + ss << float4_value; + break; + case Type::FloatReference: + ss << float4_ref[i]; + break; + } + break; + case 16: + switch (type) { + case Type::IntValue: + case Type::IntReference: + BLI_assert_unreachable(); + break; + case Type::FloatValue: + ss << *reinterpret_cast<const float4x4 *>(&float4_value); + break; + case Type::FloatReference: + ss << *float4x4_ref; + break; + } + break; + } + if (i < array_len - 1) { + ss << ", "; + } + } + + return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")"; +} + +std::string Draw::serialize() const +{ + std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len); + std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len); + std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" : + std::to_string(vertex_first); + return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len + + ", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) + + ")"; +} + +std::string DrawMulti::serialize(std::string line_prefix) const +{ + DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_; + + MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(), + multi_draw_buf->prototype_count_); + + /* This emulates the GPU sorting but without the unstable draw order. */ + std::sort( + prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) { + return (a.group_id < b.group_id) || + (a.group_id == b.group_id && a.resource_handle > b.resource_handle); + }); + + /* Compute prefix sum to have correct offsets. */ + uint prefix_sum = 0u; + for (DrawGroup &group : groups) { + group.start = prefix_sum; + prefix_sum += group.front_proto_len + group.back_proto_len; + } + + std::stringstream ss; + + uint group_len = 0; + uint group_index = this->group_first; + while (group_index != (uint)-1) { + const DrawGroup &grp = groups[group_index]; + + ss << std::endl << line_prefix << " .group(id=" << group_index << ", len=" << grp.len << ")"; + + intptr_t offset = grp.start; + + if (grp.back_proto_len > 0) { + for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) { + BLI_assert(proto.group_id == group_index); + ResourceHandle handle(proto.resource_handle); + BLI_assert(handle.has_inverted_handedness()); + ss << std::endl + << line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len) + << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)"; + } + offset += grp.back_proto_len; + } + + if (grp.front_proto_len > 0) { + for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) { + BLI_assert(proto.group_id == group_index); + ResourceHandle handle(proto.resource_handle); + BLI_assert(!handle.has_inverted_handedness()); + ss << std::endl + << line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len) + << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)"; + } + } + + group_index = grp.next; + group_len++; + } + + ss << std::endl; + + return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str(); +} + +std::string DrawIndirect::serialize() const +{ + return std::string(".draw_indirect()"); +} + +std::string Dispatch::serialize() const +{ + int3 sz = is_reference ? *size_ref : size; + return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) + + ", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")"; +} + +std::string DispatchIndirect::serialize() const +{ + return std::string(".dispatch_indirect()"); +} + +std::string Barrier::serialize() const +{ + /* TOOD(fclem): Better serialization... */ + return std::string(".barrier(") + std::to_string(type) + ")"; +} + +std::string Clear::serialize() const +{ + std::stringstream ss; + if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) { + ss << "color=" << color; + if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { + ss << ", "; + } + } + if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) { + ss << "depth=" << depth; + if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) { + ss << ", "; + } + } + if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) { + ss << "stencil=0b" << std::bitset<8>(stencil) << ")"; + } + return std::string(".clear(") + ss.str() + ")"; +} + +std::string StateSet::serialize() const +{ + /* TOOD(fclem): Better serialization... */ + return std::string(".state_set(") + std::to_string(new_state) + ")"; +} + +std::string StencilSet::serialize() const +{ + std::stringstream ss; + ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b" + << std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference); + return ss.str(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Commands buffers binding / command / resource ID generation + * \{ */ + +void DrawCommandBuf::bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands) +{ + UNUSED_VARS(headers, commands); + + resource_id_count_ = 0; + + for (const Header &header : headers) { + if (header.type != Type::Draw) { + continue; + } + + Draw &cmd = commands[header.index].draw; + + int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len; + /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */ + GPU_batch_draw_parameter_get( + cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len); + /* Instancing attributes are not supported using the new pipeline since we use the base + * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */ + BLI_assert(batch_inst_len == 1); + + if (cmd.vertex_len == (uint)-1) { + cmd.vertex_len = batch_vert_len; + } + + if (cmd.handle.raw > 0) { + /* Save correct offset to start of resource_id buffer region for this draw. */ + uint instance_first = resource_id_count_; + resource_id_count_ += cmd.instance_len; + /* Ensure the buffer is big enough. */ + resource_id_buf_.get_or_resize(resource_id_count_ - 1); + + /* Copy the resource id for all instances. */ + uint index = cmd.handle.resource_index(); + for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) { + resource_id_buf_[i] = index; + } + } + } + + resource_id_buf_.push_update(); + + if (GPU_shader_draw_parameters_support() == false) { + state.resource_id_buf = resource_id_buf_; + } + else { + GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT); + } +} + +void DrawMultiBuf::bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + VisibilityBuf &visibility_buf) +{ + UNUSED_VARS(headers, commands); + + GPU_debug_group_begin("DrawMultiBuf.bind"); + + resource_id_count_ = 0u; + for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) { + /* Compute prefix sum of all instance of previous group. */ + group.start = resource_id_count_; + resource_id_count_ += group.len; + + int batch_inst_len; + /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */ + GPU_batch_draw_parameter_get(group.gpu_batch, + &group.vertex_len, + &group.vertex_first, + &group.base_index, + &batch_inst_len); + + /* Instancing attributes are not supported using the new pipeline since we use the base + * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */ + BLI_assert(batch_inst_len == 1); + UNUSED_VARS_NDEBUG(batch_inst_len); + + /* Now that we got the batch infos, we can set the counters to 0. */ + group.total_counter = group.front_facing_counter = group.back_facing_counter = 0; + } + + group_buf_.push_update(); + prototype_buf_.push_update(); + /* Allocate enough for the expansion pass. */ + resource_id_buf_.get_or_resize(resource_id_count_); + /* Two command per group. */ + command_buf_.get_or_resize(group_count_ * 2); + + if (prototype_count_ > 0) { + GPUShader *shader = DRW_shader_draw_command_generate_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_); + GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf")); + GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf")); + GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf")); + GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf")); + GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT); + GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1); + if (GPU_shader_draw_parameters_support() == false) { + GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY); + state.resource_id_buf = resource_id_buf_; + } + else { + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + } + + GPU_debug_group_end(); +} + +/** \} */ + +}; // namespace blender::draw::command diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh new file mode 100644 index 00000000000..e24a620bb73 --- /dev/null +++ b/source/blender/draw/intern/draw_command.hh @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Commands stored inside draw passes. Converted into GPU commands upon pass submission. + * + * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or + * `DrawMultiBuf`. See implementation details at their definition. + */ + +#include "BKE_global.h" +#include "BLI_map.hh" +#include "DRW_gpu_wrapper.hh" + +#include "draw_command_shared.hh" +#include "draw_handle.hh" +#include "draw_state.h" +#include "draw_view.hh" + +namespace blender::draw::command { + +class DrawCommandBuf; +class DrawMultiBuf; + +/* -------------------------------------------------------------------- */ +/** \name Recording State + * \{ */ + +/** + * Command recording state. + * Keep track of several states and avoid redundant state changes. + */ +struct RecordingState { + GPUShader *shader = nullptr; + bool front_facing = true; + bool inverted_view = false; + DRWState pipeline_state = DRW_STATE_NO_DRAW; + int view_clip_plane_count = 0; + /** Used for gl_BaseInstance workaround. */ + GPUStorageBuf *resource_id_buf = nullptr; + + void front_facing_set(bool facing) + { + /* Facing is inverted if view is not in expected handedness. */ + facing = this->inverted_view == facing; + /* Remove redundant changes. */ + if (assign_if_different(this->front_facing, facing)) { + GPU_front_facing(!facing); + } + } + + void cleanup() + { + if (front_facing == false) { + GPU_front_facing(false); + } + + if (G.debug & G_DEBUG_GPU) { + GPU_storagebuf_unbind_all(); + GPU_texture_image_unbind_all(); + GPU_texture_unbind_all(); + GPU_uniformbuf_unbind_all(); + } + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Regular Commands + * \{ */ + +enum class Type : uint8_t { + /** + * None Type commands are either uninitialized or are repurposed as data storage. + * They are skipped during submission. + */ + None = 0, + + /** Commands stored as Undetermined in regular command buffer. */ + Barrier, + Clear, + Dispatch, + DispatchIndirect, + Draw, + DrawIndirect, + PushConstant, + ResourceBind, + ShaderBind, + StateSet, + StencilSet, + + /** Special commands stored in separate buffers. */ + SubPass, + DrawMulti, +}; + +/** + * The index of the group is implicit since it is known by the one who want to + * access it. This also allows to have an indexed object to split the command + * stream. + */ +struct Header { + /** Command type. */ + Type type; + /** Command index in command heap of this type. */ + uint index; +}; + +struct ShaderBind { + GPUShader *shader; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct ResourceBind { + eGPUSamplerState sampler; + int slot; + bool is_reference; + + enum class Type : uint8_t { + Sampler = 0, + Image, + UniformBuf, + StorageBuf, + } type; + + union { + /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more + * debug info. */ + GPUUniformBuf *uniform_buf; + GPUUniformBuf **uniform_buf_ref; + GPUStorageBuf *storage_buf; + GPUStorageBuf **storage_buf_ref; + /** NOTE: Texture is used for both Sampler and Image binds. */ + GPUTexture *texture; + GPUTexture **texture_ref; + }; + + ResourceBind() = default; + + ResourceBind(int slot_, GPUUniformBuf *res) + : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){}; + ResourceBind(int slot_, GPUUniformBuf **res) + : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){}; + ResourceBind(int slot_, GPUStorageBuf *res) + : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){}; + ResourceBind(int slot_, GPUStorageBuf **res) + : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){}; + ResourceBind(int slot_, draw::Image *res) + : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){}; + ResourceBind(int slot_, draw::Image **res) + : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){}; + ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){}; + ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){}; + + void execute() const; + std::string serialize() const; +}; + +struct PushConstant { + int location; + uint8_t array_len; + uint8_t comp_len; + enum class Type : uint8_t { + IntValue = 0, + FloatValue, + IntReference, + FloatReference, + } type; + /** + * IMPORTANT: Data is at the end of the struct as it can span over the next commands. + * These next commands are not real commands but just memory to hold the data and are not + * referenced by any Command::Header. + * This is a hack to support float4x4 copy. + */ + union { + int int1_value; + int2 int2_value; + int3 int3_value; + int4 int4_value; + float float1_value; + float2 float2_value; + float3 float3_value; + float4 float4_value; + const int *int_ref; + const int2 *int2_ref; + const int3 *int3_ref; + const int4 *int4_ref; + const float *float_ref; + const float2 *float2_ref; + const float3 *float3_ref; + const float4 *float4_ref; + const float4x4 *float4x4_ref; + }; + + PushConstant() = default; + + PushConstant(int loc, const float &val) + : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){}; + PushConstant(int loc, const float2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){}; + PushConstant(int loc, const float3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){}; + PushConstant(int loc, const float4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){}; + + PushConstant(int loc, const int &val) + : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){}; + PushConstant(int loc, const int2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){}; + PushConstant(int loc, const int3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){}; + PushConstant(int loc, const int4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){}; + + PushConstant(int loc, const float *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){}; + PushConstant(int loc, const float2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){}; + PushConstant(int loc, const float3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){}; + PushConstant(int loc, const float4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){}; + PushConstant(int loc, const float4x4 *val) + : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){}; + + PushConstant(int loc, const int *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){}; + PushConstant(int loc, const int2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){}; + PushConstant(int loc, const int3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){}; + PushConstant(int loc, const int4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Draw { + GPUBatch *batch; + uint instance_len; + uint vertex_len; + uint vertex_first; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DrawMulti { + GPUBatch *batch; + DrawMultiBuf *multi_draw_buf; + uint group_first; + uint uuid; + + void execute(RecordingState &state) const; + std::string serialize(std::string line_prefix) const; +}; + +struct DrawIndirect { + GPUBatch *batch; + GPUStorageBuf **indirect_buf; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Dispatch { + bool is_reference; + union { + int3 size; + int3 *size_ref; + }; + + Dispatch() = default; + + Dispatch(int3 group_len) : is_reference(false), size(group_len){}; + Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DispatchIndirect { + GPUStorageBuf **indirect_buf; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Barrier { + eGPUBarrier type; + + void execute() const; + std::string serialize() const; +}; + +struct Clear { + uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */ + uint8_t stencil; + float depth; + float4 color; + + void execute() const; + std::string serialize() const; +}; + +struct StateSet { + DRWState new_state; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct StencilSet { + uint write_mask; + uint compare_mask; + uint reference; + + void execute() const; + std::string serialize() const; +}; + +union Undetermined { + ShaderBind shader_bind; + ResourceBind resource_bind; + PushConstant push_constant; + Draw draw; + DrawMulti draw_multi; + DrawIndirect draw_indirect; + Dispatch dispatch; + DispatchIndirect dispatch_indirect; + Barrier barrier; + Clear clear; + StateSet state_set; + StencilSet stencil_set; +}; + +/** Try to keep the command size as low as possible for performance. */ +BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.") + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Commands + * + * A draw command buffer used to issue single draw commands without instance merging or any + * other optimizations. + * + * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands. + * + * \{ */ + +class DrawCommandBuf { + friend Manager; + + private: + using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>; + + /** Array of resource id. One per instance. Generated on GPU and send to GPU. */ + ResourceIdBuf resource_id_buf_; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear(){}; + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + vertex_first = vertex_first != -1 ? vertex_first : 0; + instance_len = instance_len != -1 ? instance_len : 1; + + int64_t index = commands.append_and_get_index({}); + headers.append({Type::Draw, static_cast<uint>(index)}); + commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle}; + } + + void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands); +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Multi Draw Commands + * + * For efficient rendering of large scene we strive to minimize the number of draw call and state + * changes. To this end, we group many rendering commands and sort them per render state using + * `DrawGroup` as a container. This is done automatically for any successive commands with the + * same state. + * + * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti` + * (Pipeline State). + * + * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect + * command. + * + * To support this arbitrary reordering, we only need to know the offset of all the commands for a + * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to + * GPU and used on CPU to issue the right command indirect. + * + * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the + * GPU. + * + * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or + * not depending on visibility) to the correct draw command using the offset of the `DrawGroup` + * computed on CPU. After that, it also outputs one resource ID for each instance inside a + * `DrawPrototype`. + * + * \{ */ + +class DrawMultiBuf { + friend Manager; + friend DrawMulti; + + private: + using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>; + using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>; + using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>; + using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>; + + using DrawGroupKey = std::pair<uint, GPUBatch *>; + using DrawGroupMap = Map<DrawGroupKey, uint>; + /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */ + DrawGroupMap group_ids_; + + /** DrawGroup Command heap. Uploaded to GPU for sorting. */ + DrawGroupBuf group_buf_ = {"DrawGroupBuf"}; + /** Command Prototypes. Unsorted */ + DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"}; + /** Command list generated by the sorting / compaction steps. Lives on GPU. */ + DrawCommandBuf command_buf_ = {"DrawCommandBuf"}; + /** Array of resource id. One per instance. Lives on GPU. */ + ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"}; + /** Give unique ID to each header so we can use that as hash key. */ + uint header_id_counter_ = 0; + /** Number of groups inside group_buf_. */ + uint group_count_ = 0; + /** Number of prototype command inside prototype_buf_. */ + uint prototype_count_ = 0; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear() + { + header_id_counter_ = 0; + group_count_ = 0; + prototype_count_ = 0; + group_ids_.clear(); + } + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + /* Unsupported for now. Use PassSimple. */ + BLI_assert(vertex_first == 0 || vertex_first == -1); + BLI_assert(vertex_len == -1); + + instance_len = instance_len != -1 ? instance_len : 1; + + /* If there was some state changes since previous call, we have to create another command. */ + if (headers.is_empty() || headers.last().type != Type::DrawMulti) { + uint index = commands.append_and_get_index({}); + headers.append({Type::DrawMulti, index}); + commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++}; + } + + DrawMulti &cmd = commands.last().draw_multi; + + uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1); + + bool inverted = handle.has_inverted_handedness(); + + if (group_id == (uint)-1) { + uint new_group_id = group_count_++; + + DrawGroup &group = group_buf_.get_or_resize(new_group_id); + group.next = cmd.group_first; + group.len = instance_len; + group.front_facing_len = inverted ? 0 : instance_len; + group.gpu_batch = batch; + group.front_proto_len = 0; + group.back_proto_len = 0; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + /* Append to list. */ + cmd.group_first = new_group_id; + group_id = new_group_id; + } + else { + DrawGroup &group = group_buf_[group_id]; + group.len += instance_len; + group.front_facing_len += inverted ? 0 : instance_len; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + } + + DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++); + draw.group_id = group_id; + draw.resource_handle = handle.raw; + draw.instance_len = instance_len; + } + + void bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + VisibilityBuf &visibility_buf); +}; + +/** \} */ + +}; // namespace blender::draw::command
\ No newline at end of file diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh new file mode 100644 index 00000000000..22d1facfb09 --- /dev/null +++ b/source/blender/draw/intern/draw_command_shared.hh @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#ifndef GPU_SHADER +# include "BLI_span.hh" +# include "GPU_shader_shared_utils.h" + +namespace blender::draw::command { + +struct RecordingState; + +#endif + +/* -------------------------------------------------------------------- */ +/** \name Multi Draw + * \{ */ + +/** + * A DrawGroup allow to split the command stream into batch-able chunks of commands with + * the same render state. + */ +struct DrawGroup { + /** Index of next DrawGroup from the same header. */ + uint next; + + /** Index of the first instances after sorting. */ + uint start; + /** Total number of instances (including inverted facing). Needed to issue the draw call. */ + uint len; + /** Number of non inverted scaling instances in this Group. */ + uint front_facing_len; + + /** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */ + int vertex_len; + int vertex_first; + int base_index; + + /** Atomic counters used during command sorting. */ + uint total_counter; + +#ifndef GPU_SHADER + /* NOTE: Union just to make sure the struct has always the same size on all platform. */ + union { + struct { + /** For debug printing only. */ + uint front_proto_len; + uint back_proto_len; + /** Needed to create the correct draw call. */ + GPUBatch *gpu_batch; + }; + struct { +#endif + uint front_facing_counter; + uint back_facing_counter; + uint _pad0, _pad1; +#ifndef GPU_SHADER + }; + }; +#endif +}; +BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16) + +/** + * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then + * converted into #DrawCommand on GPU after visibility and compaction. Multiple + * #DrawPrototype might get merged into the same final #DrawCommand. + */ +struct DrawPrototype { + /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */ + uint group_id; + /* Resource handle associated with this call. Also reference visibility. */ + uint resource_handle; + /* Number of instances. */ + uint instance_len; + uint _pad0; +}; +BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16) + +/** \} */ + +#ifndef GPU_SHADER +}; // namespace blender::draw::command +#endif diff --git a/source/blender/draw/intern/draw_common_shader_shared.h b/source/blender/draw/intern/draw_common_shader_shared.h index c9819d9da87..57cb7880ce6 100644 --- a/source/blender/draw/intern/draw_common_shader_shared.h +++ b/source/blender/draw/intern/draw_common_shader_shared.h @@ -19,7 +19,7 @@ typedef struct GlobalsUboStorage GlobalsUboStorage; #define UBO_LAST_COLOR color_uv_shadow /* Used as ubo but colors can be directly referenced as well */ -/* NOTE: Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */ +/* \note Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */ struct GlobalsUboStorage { /* UBOs data needs to be 16 byte aligned (size of vec4) */ float4 color_wire; diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc index ab78db5d913..9cb79d73812 100644 --- a/source/blender/draw/intern/draw_debug.cc +++ b/source/blender/draw/intern/draw_debug.cc @@ -63,26 +63,26 @@ DebugDraw::DebugDraw() void DebugDraw::init() { - cpu_print_buf_.command.v_count = 0; - cpu_print_buf_.command.v_first = 0; - cpu_print_buf_.command.i_count = 1; - cpu_print_buf_.command.i_first = 0; - - cpu_draw_buf_.command.v_count = 0; - cpu_draw_buf_.command.v_first = 0; - cpu_draw_buf_.command.i_count = 1; - cpu_draw_buf_.command.i_first = 0; - - gpu_print_buf_.command.v_count = 0; - gpu_print_buf_.command.v_first = 0; - gpu_print_buf_.command.i_count = 1; - gpu_print_buf_.command.i_first = 0; + cpu_print_buf_.command.vertex_len = 0; + cpu_print_buf_.command.vertex_first = 0; + cpu_print_buf_.command.instance_len = 1; + cpu_print_buf_.command.instance_first_array = 0; + + cpu_draw_buf_.command.vertex_len = 0; + cpu_draw_buf_.command.vertex_first = 0; + cpu_draw_buf_.command.instance_len = 1; + cpu_draw_buf_.command.instance_first_array = 0; + + gpu_print_buf_.command.vertex_len = 0; + gpu_print_buf_.command.vertex_first = 0; + gpu_print_buf_.command.instance_len = 1; + gpu_print_buf_.command.instance_first_array = 0; gpu_print_buf_used = false; - gpu_draw_buf_.command.v_count = 0; - gpu_draw_buf_.command.v_first = 0; - gpu_draw_buf_.command.i_count = 1; - gpu_draw_buf_.command.i_first = 0; + gpu_draw_buf_.command.vertex_len = 0; + gpu_draw_buf_.command.vertex_first = 0; + gpu_draw_buf_.command.instance_len = 1; + gpu_draw_buf_.command.instance_first_array = 0; gpu_draw_buf_used = false; modelmat_reset(); @@ -323,11 +323,11 @@ template<> void DebugDraw::print_value<uint4>(const uint4 &value) void DebugDraw::draw_line(float3 v1, float3 v2, uint color) { DebugDrawBuf &buf = cpu_draw_buf_; - uint index = buf.command.v_count; + uint index = buf.command.vertex_len; if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) { buf.verts[index + 0] = vert_pack(model_mat_ * v1, color); buf.verts[index + 1] = vert_pack(model_mat_ * v2, color); - buf.command.v_count += 2; + buf.command.vertex_len += 2; } } @@ -356,7 +356,7 @@ DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color) void DebugDraw::print_newline() { print_col_ = 0u; - print_row_ = ++cpu_print_buf_.command.i_first; + print_row_ = ++cpu_print_buf_.command.instance_first_array; } void DebugDraw::print_string_start(uint len) @@ -406,7 +406,7 @@ void DebugDraw::print_char4(uint data) break; } /* NOTE: Do not skip the header manually like in GPU. */ - uint cursor = cpu_print_buf_.command.v_count++; + uint cursor = cpu_print_buf_.command.vertex_len++; if (cursor < DRW_DEBUG_PRINT_MAX) { /* For future usage. (i.e: Color) */ uint flags = 0u; @@ -504,7 +504,7 @@ void DebugDraw::print_value_uint(uint value, void DebugDraw::display_lines() { - if (cpu_draw_buf_.command.v_count == 0 && gpu_draw_buf_used == false) { + if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) { return; } GPU_debug_group_begin("Lines"); @@ -541,7 +541,7 @@ void DebugDraw::display_lines() void DebugDraw::display_prints() { - if (cpu_print_buf_.command.v_count == 0 && gpu_print_buf_used == false) { + if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) { return; } GPU_debug_group_begin("Prints"); diff --git a/source/blender/draw/intern/draw_defines.h b/source/blender/draw/intern/draw_defines.h new file mode 100644 index 00000000000..3df7e47cffb --- /dev/null +++ b/source/blender/draw/intern/draw_defines.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. + */ + +/** \file + * \ingroup draw + * + * List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid + * dragging larger headers into the createInfo pipeline which would cause problems. + */ + +#pragma once + +#define DRW_VIEW_UBO_SLOT 0 + +#define DRW_RESOURCE_ID_SLOT 11 +#define DRW_OBJ_MAT_SLOT 10 +#define DRW_OBJ_INFOS_SLOT 9 +#define DRW_OBJ_ATTR_SLOT 8 + +#define DRW_DEBUG_PRINT_SLOT 15 +#define DRW_DEBUG_DRAW_SLOT 14 + +#define DRW_COMMAND_GROUP_SIZE 64 +#define DRW_FINALIZE_GROUP_SIZE 64 +/* Must be multiple of 32. Set to 32 for shader simplicity. */ +#define DRW_VISIBILITY_GROUP_SIZE 32 diff --git a/source/blender/draw/intern/draw_handle.hh b/source/blender/draw/intern/draw_handle.hh new file mode 100644 index 00000000000..5f96bfa5dcd --- /dev/null +++ b/source/blender/draw/intern/draw_handle.hh @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * A unique identifier for each object component. + * It is used to access each component data such as matrices and object attributes. + * It is valid only for the current draw, it is not persistent. + * + * The most significant bit is used to encode if the object needs to invert the front face winding + * because of its object matrix handedness. This is handy because this means sorting inside + * #DrawGroup command will put all inverted commands last. + * + * Default value of 0 points toward an non-cull-able object with unit bounding box centered at + * the origin. + */ + +#include "draw_shader_shared.h" + +struct Object; +struct DupliObject; + +namespace blender::draw { + +struct ResourceHandle { + uint raw; + + ResourceHandle() = default; + ResourceHandle(uint raw_) : raw(raw_){}; + ResourceHandle(uint index, bool inverted_handedness) + { + raw = index; + SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u); + } + + bool has_inverted_handedness() const + { + return (raw & 0x80000000u) != 0; + } + + uint resource_index() const + { + return (raw & 0x7FFFFFFFu); + } +}; + +/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */ +struct ObjectRef { + Object *object; + /** Dupli object that corresponds to the current object. */ + DupliObject *dupli_object; + /** Object that created the dupli-list the current object is part of. */ + Object *dupli_parent; +}; + +}; // namespace blender::draw diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index f44cd33fb2b..799d0544e34 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -1001,6 +1001,8 @@ static void drw_engines_init(void) static void drw_engines_cache_init(void) { + DRW_manager_begin_sync(); + DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) { if (data->text_draw_cache) { DRW_text_cache_destroy(data->text_draw_cache); @@ -1072,6 +1074,8 @@ static void drw_engines_cache_finish(void) engine->cache_finish(data); } } + + DRW_manager_end_sync(); } static void drw_engines_draw_scene(void) diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc new file mode 100644 index 00000000000..8fb2ffb39e8 --- /dev/null +++ b/source/blender/draw/intern/draw_manager.cc @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "BKE_global.h" +#include "GPU_compute.h" + +#include "draw_debug.hh" +#include "draw_defines.h" +#include "draw_manager.h" +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader.h" + +namespace blender::draw { + +Manager::~Manager() +{ + for (GPUTexture *texture : acquired_textures) { + /* Decrease refcount and free if 0. */ + GPU_texture_free(texture); + } +} + +void Manager::begin_sync() +{ + /* TODO: This means the reference is kept until further redraw or manager teardown. Instead, they + * should be released after each draw loop. But for now, mimics old DRW behavior. */ + for (GPUTexture *texture : acquired_textures) { + /* Decrease refcount and free if 0. */ + GPU_texture_free(texture); + } + + acquired_textures.clear(); + +#ifdef DEBUG + /* Detect non-init data. */ + memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data())); + memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data())); + memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data())); +#endif + resource_len_ = 0; + /* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */ + + object_active = DST.draw_ctx.obact; + + /* Init the 0 resource. */ + resource_handle(float4x4::identity()); +} + +void Manager::end_sync() +{ + GPU_debug_group_begin("Manager.end_sync"); + + matrix_buf.push_update(); + bounds_buf.push_update(); + infos_buf.push_update(); + + debug_bind(); + + /* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */ + uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE); + GPUShader *shader = DRW_shader_draw_resource_finalize_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "resource_len", resource_len_); + GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf")); + GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf")); + GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf")); + GPU_compute_dispatch(shader, thread_groups, 1, 1); + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + + GPU_debug_group_end(); +} + +void Manager::debug_bind() +{ +#ifdef DEBUG + if (DST.debug == nullptr) { + return; + } + GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT); + GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT); +# ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER + /* Add a barrier to allow multiple shader writing to the same buffer. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); +# endif +#endif +} + +void Manager::submit(PassSimple &pass, View &view) +{ + view.bind(); + + debug_bind(); + + command::RecordingState state; + state.inverted_view = view.is_inverted(); + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_); + + GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT); + GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT); + // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */ + + pass.submit(state); + + state.cleanup(); +} + +void Manager::submit(PassMain &pass, View &view) +{ + view.bind(); + + debug_bind(); + + bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d && + (DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0); + + view.compute_visibility(bounds_buf, resource_len_, freeze_culling); + + command::RecordingState state; + state.inverted_view = view.is_inverted(); + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_); + + GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT); + GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT); + // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */ + + pass.submit(state); + + state.cleanup(); +} + +void Manager::submit(PassSortable &pass, View &view) +{ + pass.sort(); + + this->submit(static_cast<PassMain &>(pass), view); +} + +void Manager::submit(PassSimple &pass) +{ + debug_bind(); + + command::RecordingState state; + + pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_); + + GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT); + GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT); + // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */ + + pass.submit(state); + + state.cleanup(); +} + +Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view) +{ + submit(pass, view); + + pass.draw_commands_buf_.resource_id_buf_.read(); + + Manager::SubmitDebugOutput output; + output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(), + pass.draw_commands_buf_.resource_id_count_}; + /* There is no visibility data for PassSimple. */ + output.visibility = {(uint *)view.visibility_buf_.data(), 0}; + return output; +} + +Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view) +{ + submit(pass, view); + + GPU_finish(); + + pass.draw_commands_buf_.resource_id_buf_.read(); + view.visibility_buf_.read(); + + Manager::SubmitDebugOutput output; + output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(), + pass.draw_commands_buf_.resource_id_count_}; + output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)}; + return output; +} + +Manager::DataDebugOutput Manager::data_debug() +{ + matrix_buf.read(); + bounds_buf.read(); + infos_buf.read(); + + Manager::DataDebugOutput output; + output.matrices = {matrix_buf.data(), resource_len_}; + output.bounds = {bounds_buf.data(), resource_len_}; + output.infos = {infos_buf.data(), resource_len_}; + return output; +} + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index a29f2fa7507..83ebe1b3c3b 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -694,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_ void drw_engine_data_cache_release(GPUViewport *viewport); void drw_engine_data_free(GPUViewport *viewport); +void DRW_manager_begin_sync(void); +void DRW_manager_end_sync(void); + #ifdef __cplusplus } #endif diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh new file mode 100644 index 00000000000..5f110b8bb6b --- /dev/null +++ b/source/blender/draw/intern/draw_manager.hh @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * `draw::Manager` is the interface between scene data and viewport engines. + * + * It holds per component data (`ObjectInfo`, `ObjectMatrices`, ...) indexed per `ResourceHandle`. + * + * \note It is currently work in progress and should replace the old global draw manager. + */ + +#include "BLI_sys_types.h" + +#include "draw_resource.hh" +#include "draw_view.hh" + +#include <string> + +namespace blender::draw { + +/* Forward declarations. */ + +namespace detail { +template<typename T> class Pass; +} // namespace detail + +namespace command { +class DrawCommandBuf; +class DrawMultiBuf; +} // namespace command + +using PassSimple = detail::Pass<command::DrawCommandBuf>; +using PassMain = detail::Pass<command::DrawMultiBuf>; +class PassSortable; + +class Manager { + using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>; + using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>; + using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>; + + public: + struct SubmitDebugOutput { + /** Indexed by resource id. */ + Span<uint32_t> visibility; + /** Indexed by drawn instance. */ + Span<uint32_t> resource_id; + }; + + struct DataDebugOutput { + /** Indexed by resource id. */ + Span<ObjectMatrices> matrices; + /** Indexed by resource id. */ + Span<ObjectBounds> bounds; + /** Indexed by resource id. */ + Span<ObjectInfos> infos; + }; + + /** + * Buffers containing all object data. Referenced by resource index. + * Exposed as public members for shader access after sync. + */ + ObjectMatricesBuf matrix_buf; + ObjectBoundsBuf bounds_buf; + ObjectInfosBuf infos_buf; + + /** List of textures coming from Image data-blocks. They need to be refcounted in order to avoid + * beeing freed in another thread. */ + Vector<GPUTexture *> acquired_textures; + + private: + uint resource_len_ = 0; + Object *object = nullptr; + + Object *object_active = nullptr; + + public: + Manager(){}; + ~Manager(); + + /** + * Create a new resource handle for the given object. Can be called multiple time with the + * same object **successively** without duplicating the data. + */ + ResourceHandle resource_handle(const ObjectRef ref); + /** + * Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled + * and there won't be any associated object info / bounds. Assumes correct handedness / winding. + */ + ResourceHandle resource_handle(const float4x4 &model_matrix); + /** + * Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will + * be culled bute there won't be any associated object info / bounds. Assumes correct handedness + * / winding. + */ + ResourceHandle resource_handle(const float4x4 &model_matrix, + const float3 &bounds_center, + const float3 &bounds_half_extent); + + /** + * Populate additional per resource data on demand. + */ + void extract_object_attributes(ResourceHandle handle, + Object &object, + Span<GPUMaterial *> materials); + + /** + * Submit a pass for drawing. All resource reference will be dereferenced and commands will be + * sent to GPU. + */ + void submit(PassSimple &pass, View &view); + void submit(PassMain &pass, View &view); + void submit(PassSortable &pass, View &view); + /** + * Variant without any view. Must not contain any shader using `draw_view` create info. + */ + void submit(PassSimple &pass); + + /** + * Submit a pass for drawing but read back all data buffers for inspection. + */ + SubmitDebugOutput submit_debug(PassSimple &pass, View &view); + SubmitDebugOutput submit_debug(PassMain &pass, View &view); + + /** + * Check data buffers of the draw manager. Only to be used after end_sync(). + */ + DataDebugOutput data_debug(); + + /** + * Will acquire the texture using ref counting and release it after drawing. To be used for + * texture coming from blender Image. + */ + void acquire_texture(GPUTexture *texture) + { + GPU_texture_ref(texture); + acquired_textures.append(texture); + } + + /** TODO(fclem): The following should become private at some point. */ + void begin_sync(); + void end_sync(); + + void debug_bind(); +}; + +inline ResourceHandle Manager::resource_handle(const ObjectRef ref) +{ + bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active; + matrix_buf.get_or_resize(resource_len_).sync(*ref.object); + bounds_buf.get_or_resize(resource_len_).sync(*ref.object); + infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object); + return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0); +} + +inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix) +{ + matrix_buf.get_or_resize(resource_len_).sync(model_matrix); + bounds_buf.get_or_resize(resource_len_).sync(); + infos_buf.get_or_resize(resource_len_).sync(); + return ResourceHandle(resource_len_++, false); +} + +inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix, + const float3 &bounds_center, + const float3 &bounds_half_extent) +{ + matrix_buf.get_or_resize(resource_len_).sync(model_matrix); + bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent); + infos_buf.get_or_resize(resource_len_).sync(); + return ResourceHandle(resource_len_++, false); +} + +inline void Manager::extract_object_attributes(ResourceHandle handle, + Object &object, + Span<GPUMaterial *> materials) +{ + /* TODO */ + (void)handle; + (void)object; + (void)materials; +} + +} // namespace blender::draw + +/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the + * callbacks. */ +blender::draw::Manager *DRW_manager_get(); +blender::draw::ObjectRef DRW_object_ref_get(Object *object); diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh new file mode 100644 index 00000000000..65faa9febbc --- /dev/null +++ b/source/blender/draw/intern/draw_pass.hh @@ -0,0 +1,1004 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Passes record draw commands. Commands are executed only when a pass is submitted for execution. + * + * `PassMain`: + * Should be used on heavy load passes such as ones that may contain scene objects. Draw call + * submission is optimized for large number of draw calls. But has a significant overhead per + * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of + * commands. \note The draw call order inside a batch of multiple draw with the exact same state is + * not guaranteed and is not even deterministic. Use a PassSimple or PassSortable if ordering is + * needed. \note As of now, it is also quite limited in the type of draw command it can record + * (no custom vertex count, no custom first vertex). + * + * `PassSimple`: + * Does not have the overhead of #PassMain but does not have the culling and batching optimization. + * It should be used for passes that needs a few commands or that needs guaranteed draw call order. + * + * `Pass<T>::Sub`: + * A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub() + * and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference + * is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are + * inserted inside the parent #Pass where the sub have been created durring submission. + * + * `PassSortable`: + * This is a sort of `PassMain` augmented with a per sub-pass sorting value. They can't directly + * contain draw command, everything needs to be inside sub-passes. Sub-passes are automatically + * sorted before submission. + * + * \note A pass can be recorded once and resubmitted any number of time. This can be a good + * optimization for passes that are always the same for each frame. The only thing to be aware of + * is the life time of external resources. If a pass contains draw-calls with non default + * ResourceHandle (not 0) or a reference to any non static resources (GPUBatch, PushConstant ref, + * ResourceBind ref) it will have to be re-recorded if any of these reference becomes invalid. + */ + +#include "BKE_image.h" +#include "BLI_vector.hh" +#include "DRW_gpu_wrapper.hh" +#include "GPU_debug.h" +#include "GPU_material.h" + +#include "draw_command.hh" +#include "draw_handle.hh" +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader_shared.h" +#include "draw_state.h" + +#include "intern/gpu_codegen.h" + +namespace blender::draw { + +using namespace blender::draw; +using namespace blender::draw::command; + +class Manager; + +/* -------------------------------------------------------------------- */ +/** \name Pass API + * \{ */ + +namespace detail { + +/** + * Special container that never moves allocated items and has fast indexing. + */ +template<typename T, + /** Numbers of element of type T to allocate together. */ + int64_t block_size = 16> +class SubPassVector { + private: + Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_; + + public: + void clear() + { + blocks_.clear(); + } + + int64_t append_and_get_index(T &&elem) + { + /* Do not go over the inline size so that existing members never move. */ + if (blocks_.is_empty() || blocks_.last()->size() == block_size) { + blocks_.append(std::make_unique<Vector<T, block_size>>()); + } + return blocks_.last()->append_and_get_index(std::move(elem)) + + (blocks_.size() - 1) * block_size; + } + + T &operator[](int64_t index) + { + return (*blocks_[index / block_size])[index % block_size]; + } + + const T &operator[](int64_t index) const + { + return (*blocks_[index / block_size])[index % block_size]; + } +}; + +/** + * Public API of a draw pass. + */ +template< + /** Type of command buffer used to create the draw calls. */ + typename DrawCommandBufType> +class PassBase { + friend Manager; + + /** Will use texture own sampler state. */ + static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX; + + protected: + /** Highest level of the command stream. Split command stream in different command types. */ + Vector<command::Header, 0> headers_; + /** Commands referenced by headers (which contains their types). */ + Vector<command::Undetermined, 0> commands_; + /* Reference to draw commands buffer. Either own or from parent pass. */ + DrawCommandBufType &draw_commands_buf_; + /* Reference to sub-pass commands buffer. Either own or from parent pass. */ + SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_; + /** Currently bound shader. Used for interface queries. */ + GPUShader *shader_; + + public: + const char *debug_name; + + PassBase(const char *name, + DrawCommandBufType &draw_command_buf, + SubPassVector<PassBase<DrawCommandBufType>> &sub_passes, + GPUShader *shader = nullptr) + : draw_commands_buf_(draw_command_buf), + sub_passes_(sub_passes), + shader_(shader), + debug_name(name){}; + + /** + * Reset the pass command pool. + * \note Implemented in derived class. Not a virtual function to avoid indirection. Here only for + * API readability listing. + */ + void init(); + + /** + * Create a sub-pass inside this pass. + */ + PassBase<DrawCommandBufType> &sub(const char *name); + + /** + * Changes the fixed function pipeline state. + * Starts as DRW_STATE_NO_DRAW at the start of a Pass submission. + * SubPass inherit previous pass state. + * + * IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil() + * to ensure correct behavior of stencil aware draws. + */ + void state_set(DRWState state); + + /** + * Clear the current frame-buffer. + */ + void clear_color(float4 color); + void clear_depth(float depth); + void clear_stencil(uint8_t stencil); + void clear_depth_stencil(float depth, uint8_t stencil); + void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil); + + /** + * Reminders: + * - (compare_mask & reference) is what is tested against (compare_mask & stencil_value) + * stencil_value being the value stored in the stencil buffer. + * - (write-mask & reference) is what gets written if the test condition is fulfilled. + */ + void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask); + + /** + * Bind a shader. Any following bind() or push_constant() call will use its interface. + */ + void shader_set(GPUShader *shader); + + /** + * Bind a material shader along with its associated resources. Any following bind() or + * push_constant() call will use its interface. + * IMPORTANT: Assumes material is compiled and can be used (no compilation error). + */ + void material_set(Manager &manager, GPUMaterial *material); + + /** + * Record a draw call. + * \note Setting the count or first to -1 will use the values from the batch. + * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded. + */ + void draw(GPUBatch *batch, + uint instance_len = -1, + uint vertex_len = -1, + uint vertex_first = -1, + ResourceHandle handle = {0}); + + /** + * Shorter version for the common case. + * \note Implemented in derived class. Not a virtual function to avoid indirection. + */ + void draw(GPUBatch *batch, ResourceHandle handle); + + /** + * Record a procedural draw call. Geometry is **NOT** source from a GPUBatch. + * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded. + */ + void draw_procedural(GPUPrimType primitive, + uint instance_len, + uint vertex_len, + uint vertex_first = -1, + ResourceHandle handle = {0}); + + /** + * Indirect variants. + * \note If needed, the resource id need to also be set accordingly in the DrawCommand. + */ + void draw_indirect(GPUBatch *batch, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle = {0}); + void draw_procedural_indirect(GPUPrimType primitive, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle = {0}); + + /** + * Record a compute dispatch call. + */ + void dispatch(int3 group_len); + void dispatch(int3 *group_len); + void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer); + + /** + * Record a barrier call to synchronize arbitrary load/store operation between draw calls. + */ + void barrier(eGPUBarrier type); + + /** + * Bind a shader resource. + * + * Reference versions are to be used when the resource might be resize / realloc or even change + * between the time it is referenced and the time it is dereferenced for drawing. + * + * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data + * still alive until pass submission. + * + * \note Variations using slot will not query a shader interface and can be used before + * binding a shader. + */ + void bind_image(const char *name, GPUTexture *image); + void bind_image(const char *name, GPUTexture **image); + void bind_image(int slot, GPUTexture *image); + void bind_image(int slot, GPUTexture **image); + void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto); + void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto); + void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto); + void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto); + void bind_ssbo(const char *name, GPUStorageBuf *buffer); + void bind_ssbo(const char *name, GPUStorageBuf **buffer); + void bind_ssbo(int slot, GPUStorageBuf *buffer); + void bind_ssbo(int slot, GPUStorageBuf **buffer); + void bind_ubo(const char *name, GPUUniformBuf *buffer); + void bind_ubo(const char *name, GPUUniformBuf **buffer); + void bind_ubo(int slot, GPUUniformBuf *buffer); + void bind_ubo(int slot, GPUUniformBuf **buffer); + + /** + * Update a shader constant. + * + * Reference versions are to be used when the resource might change between the time it is + * referenced and the time it is dereferenced for drawing. + * + * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data + * still alive until pass submission. + * + * \note bool reference version is expected to take bool1 reference which is aliased to int. + */ + void push_constant(const char *name, const float &data); + void push_constant(const char *name, const float2 &data); + void push_constant(const char *name, const float3 &data); + void push_constant(const char *name, const float4 &data); + void push_constant(const char *name, const int &data); + void push_constant(const char *name, const int2 &data); + void push_constant(const char *name, const int3 &data); + void push_constant(const char *name, const int4 &data); + void push_constant(const char *name, const bool &data); + void push_constant(const char *name, const float4x4 &data); + void push_constant(const char *name, const float *data, int array_len = 1); + void push_constant(const char *name, const float2 *data, int array_len = 1); + void push_constant(const char *name, const float3 *data, int array_len = 1); + void push_constant(const char *name, const float4 *data, int array_len = 1); + void push_constant(const char *name, const int *data, int array_len = 1); + void push_constant(const char *name, const int2 *data, int array_len = 1); + void push_constant(const char *name, const int3 *data, int array_len = 1); + void push_constant(const char *name, const int4 *data, int array_len = 1); + void push_constant(const char *name, const float4x4 *data); + + /** + * Turn the pass into a string for inspection. + */ + std::string serialize(std::string line_prefix = "") const; + + friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass) + { + return stream << pass.serialize(); + } + + protected: + /** + * Internal Helpers + */ + + int push_constant_offset(const char *name); + + void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil); + + GPUBatch *procedural_batch_get(GPUPrimType primitive); + + /** + * Return a new command recorded with the given type. + */ + command::Undetermined &create_command(command::Type type); + + void submit(command::RecordingState &state) const; +}; + +template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> { + public: + using Sub = detail::PassBase<DrawCommandBufType>; + + private: + /** Sub-passes referenced by headers. */ + SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_; + /** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */ + DrawCommandBufType draw_commands_buf_main_; + + public: + Pass(const char *name) + : detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){}; + + void init() + { + this->headers_.clear(); + this->commands_.clear(); + this->sub_passes_.clear(); + this->draw_commands_buf_.clear(); + } +}; // namespace blender::draw + +} // namespace detail + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Pass types + * \{ */ + +/** + * Normal pass type. No visibility or draw-call optimisation. + */ +// using PassSimple = detail::Pass<DrawCommandBuf>; + +/** + * Main pass type. + * Optimized for many draw calls and sub-pass. + * + * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially + * high overhead due to batching and culling optimizations. + */ +// using PassMain = detail::Pass<DrawMultiBuf>; + +/** + * Special pass type for rendering transparent objects. + * The base level can only be composed of sub passes that will be ordered by a sorting value. + */ +class PassSortable : public PassMain { + friend Manager; + + private: + /** Sorting value associated with each sub pass. */ + Vector<float> sorting_values_; + + bool sorted_ = false; + + public: + PassSortable(const char *name_) : PassMain(name_){}; + + void init() + { + sorting_values_.clear(); + sorted_ = false; + PassMain::init(); + } + + PassMain::Sub &sub(const char *name, float sorting_value) + { + int64_t index = sub_passes_.append_and_get_index( + PassBase(name, draw_commands_buf_, sub_passes_, shader_)); + headers_.append({Type::SubPass, static_cast<uint>(index)}); + sorting_values_.append(sorting_value); + return sub_passes_[index]; + } + + std::string serialize(std::string line_prefix = "") const + { + if (sorted_ == false) { + const_cast<PassSortable *>(this)->sort(); + } + return PassMain::serialize(line_prefix); + } + + protected: + void sort() + { + if (sorted_ == false) { + std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) { + BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass); + float a_val = sorting_values_[a.index]; + float b_val = sorting_values_[b.index]; + return a_val < b_val || (a_val == b_val && a.index < b.index); + }); + sorted_ = true; + } + } +}; + +/** \} */ + +namespace detail { + +/* -------------------------------------------------------------------- */ +/** \name PassBase Implementation + * \{ */ + +template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type) +{ + int64_t index = commands_.append_and_get_index({}); + headers_.append({type, static_cast<uint>(index)}); + return commands_[index]; +} + +template<class T> +inline void PassBase<T>::clear(eGPUFrameBufferBits planes, + float4 color, + float depth, + uint8_t stencil) +{ + create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color}; +} + +template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive) +{ + switch (primitive) { + case GPU_PRIM_POINTS: + return drw_cache_procedural_points_get(); + case GPU_PRIM_LINES: + return drw_cache_procedural_lines_get(); + case GPU_PRIM_TRIS: + return drw_cache_procedural_triangles_get(); + case GPU_PRIM_TRI_STRIP: + return drw_cache_procedural_triangle_strips_get(); + default: + /* Add new one as needed. */ + BLI_assert_unreachable(); + return nullptr; + } +} + +template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name) +{ + int64_t index = sub_passes_.append_and_get_index( + PassBase(name, draw_commands_buf_, sub_passes_, shader_)); + headers_.append({command::Type::SubPass, static_cast<uint>(index)}); + return sub_passes_[index]; +} + +template<class T> void PassBase<T>::submit(command::RecordingState &state) const +{ + GPU_debug_group_begin(debug_name); + + for (const command::Header &header : headers_) { + switch (header.type) { + default: + case Type::None: + break; + case Type::SubPass: + sub_passes_[header.index].submit(state); + break; + case command::Type::ShaderBind: + commands_[header.index].shader_bind.execute(state); + break; + case command::Type::ResourceBind: + commands_[header.index].resource_bind.execute(); + break; + case command::Type::PushConstant: + commands_[header.index].push_constant.execute(state); + break; + case command::Type::Draw: + commands_[header.index].draw.execute(state); + break; + case command::Type::DrawMulti: + commands_[header.index].draw_multi.execute(state); + break; + case command::Type::DrawIndirect: + commands_[header.index].draw_indirect.execute(state); + break; + case command::Type::Dispatch: + commands_[header.index].dispatch.execute(state); + break; + case command::Type::DispatchIndirect: + commands_[header.index].dispatch_indirect.execute(state); + break; + case command::Type::Barrier: + commands_[header.index].barrier.execute(); + break; + case command::Type::Clear: + commands_[header.index].clear.execute(); + break; + case command::Type::StateSet: + commands_[header.index].state_set.execute(state); + break; + case command::Type::StencilSet: + commands_[header.index].stencil_set.execute(); + break; + } + } + + GPU_debug_group_end(); +} + +template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const +{ + std::stringstream ss; + ss << line_prefix << "." << debug_name << std::endl; + line_prefix += " "; + for (const command::Header &header : headers_) { + switch (header.type) { + default: + case Type::None: + break; + case Type::SubPass: + ss << sub_passes_[header.index].serialize(line_prefix); + break; + case Type::ShaderBind: + ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl; + break; + case Type::ResourceBind: + ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl; + break; + case Type::PushConstant: + ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl; + break; + case Type::Draw: + ss << line_prefix << commands_[header.index].draw.serialize() << std::endl; + break; + case Type::DrawMulti: + ss << commands_[header.index].draw_multi.serialize(line_prefix); + break; + case Type::DrawIndirect: + ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl; + break; + case Type::Dispatch: + ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl; + break; + case Type::DispatchIndirect: + ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl; + break; + case Type::Barrier: + ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl; + break; + case Type::Clear: + ss << line_prefix << commands_[header.index].clear.serialize() << std::endl; + break; + case Type::StateSet: + ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl; + break; + case Type::StencilSet: + ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl; + break; + } + } + return ss.str(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw calls + * \{ */ + +template<class T> +inline void PassBase<T>::draw( + GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle) +{ + if (instance_len == 0 || vertex_len == 0) { + return; + } + BLI_assert(shader_); + draw_commands_buf_.append_draw( + headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle); +} + +template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle) +{ + this->draw(batch, -1, -1, -1, handle); +} + +template<class T> +inline void PassBase<T>::draw_procedural(GPUPrimType primitive, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) +{ + this->draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Indirect draw calls + * \{ */ + +template<class T> +inline void PassBase<T>::draw_indirect(GPUBatch *batch, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle) +{ + BLI_assert(shader_); + create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle}; +} + +template<class T> +inline void PassBase<T>::draw_procedural_indirect( + GPUPrimType primitive, + StorageBuffer<DrawCommand, true> &indirect_buffer, + ResourceHandle handle) +{ + this->draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Compute Dispatch Implementation + * \{ */ + +template<class T> inline void PassBase<T>::dispatch(int3 group_len) +{ + BLI_assert(shader_); + create_command(Type::Dispatch).dispatch = {group_len}; +} + +template<class T> inline void PassBase<T>::dispatch(int3 *group_len) +{ + BLI_assert(shader_); + create_command(Type::Dispatch).dispatch = {group_len}; +} + +template<class T> +inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer) +{ + BLI_assert(shader_); + create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Clear Implementation + * \{ */ + +template<class T> inline void PassBase<T>::clear_color(float4 color) +{ + this->clear(GPU_COLOR_BIT, color, 0.0f, 0); +} + +template<class T> inline void PassBase<T>::clear_depth(float depth) +{ + this->clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0); +} + +template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil) +{ + this->clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil); +} + +template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil) +{ + this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil); +} + +template<class T> +inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil) +{ + this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Barrier Implementation + * \{ */ + +template<class T> inline void PassBase<T>::barrier(eGPUBarrier type) +{ + create_command(Type::Barrier).barrier = {type}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name State Implementation + * \{ */ + +template<class T> inline void PassBase<T>::state_set(DRWState state) +{ + create_command(Type::StateSet).state_set = {state}; +} + +template<class T> +inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask) +{ + create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask}; +} + +template<class T> inline void PassBase<T>::shader_set(GPUShader *shader) +{ + shader_ = shader; + create_command(Type::ShaderBind).shader_bind = {shader}; +} + +template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material) +{ + GPUPass *gpupass = GPU_material_get_pass(material); + shader_set(GPU_pass_shader_get(gpupass)); + + /* Bind all textures needed by the material. */ + ListBase textures = GPU_material_textures(material); + for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) { + if (tex->ima) { + /* Image */ + ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr; + if (tex->tiled_mapping_name[0]) { + GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr); + manager.acquire_texture(tiles); + bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state); + + GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr); + manager.acquire_texture(tile_map); + bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state); + } + else { + GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr); + manager.acquire_texture(texture); + bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state); + } + } + else if (tex->colorband) { + /* Color Ramp */ + bind_texture(tex->sampler_name, *tex->colorband); + } + } + + GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material); + if (ubo != nullptr) { + bind_ubo(GPU_UBO_BLOCK_NAME, ubo); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Resource bind Implementation + * \{ */ + +template<class T> inline int PassBase<T>::push_constant_offset(const char *name) +{ + return GPU_shader_get_uniform(shader_, name); +} + +template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer) +{ + this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer); +} + +template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer) +{ + this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer); +} + +template<class T> +inline void PassBase<T>::bind_texture(const char *name, + GPUTexture *texture, + eGPUSamplerState state) +{ + this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state); +} + +template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image) +{ + this->bind_texture(GPU_shader_get_texture_binding(shader_, name), image); +} + +template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> +inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state) +{ + create_command(Type::ResourceBind).resource_bind = {slot, texture, state}; +} + +template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image) +{ + create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)}; +} + +template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer) +{ + this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer); +} + +template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer) +{ + this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer); +} + +template<class T> +inline void PassBase<T>::bind_texture(const char *name, + GPUTexture **texture, + eGPUSamplerState state) +{ + this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state); +} + +template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image) +{ + this->bind_image(GPU_shader_get_texture_binding(shader_, name), image); +} + +template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer) +{ + + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer) +{ + create_command(Type::ResourceBind).resource_bind = {slot, buffer}; +} + +template<class T> +inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state) +{ + create_command(Type::ResourceBind).resource_bind = {slot, texture, state}; +} + +template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image) +{ + create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)}; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Push Constant Implementation + * \{ */ + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> +inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data) +{ + create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data}; +} + +template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data) +{ + /* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4. + * This assumes that all commands are always stored in flat array of memory. */ + Undetermined commands[3]; + + PushConstant &cmd = commands[0].push_constant; + cmd.location = push_constant_offset(name); + cmd.array_len = 1; + cmd.comp_len = 16; + cmd.type = PushConstant::Type::FloatValue; + /* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */ + *reinterpret_cast<float4x4 *>(&cmd.float4_value) = data; + + create_command(Type::PushConstant) = commands[0]; + create_command(Type::None) = commands[1]; + create_command(Type::None) = commands[2]; +} + +/** \} */ + +} // namespace detail + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh new file mode 100644 index 00000000000..503833e8a6d --- /dev/null +++ b/source/blender/draw/intern/draw_resource.hh @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Component / Object level resources like object attributes, matrices, visibility etc... + * Each of them are reference by resource index (#ResourceHandle). + */ + +#include "BKE_curve.h" +#include "BKE_duplilist.h" +#include "BKE_mesh.h" +#include "BKE_object.h" +#include "BKE_volume.h" +#include "BLI_hash.h" +#include "DNA_curve_types.h" +#include "DNA_layer_types.h" +#include "DNA_meta_types.h" +#include "DNA_object_types.h" + +#include "draw_handle.hh" +#include "draw_manager.hh" +#include "draw_shader_shared.h" + +/* -------------------------------------------------------------------- */ +/** \name ObjectMatrices + * \{ */ + +inline void ObjectMatrices::sync(const Object &object) +{ + model = object.obmat; + model_inverse = object.imat; +} + +inline void ObjectMatrices::sync(const float4x4 &model_matrix) +{ + model = model_matrix; + model_inverse = model_matrix.inverted(); +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices) +{ + stream << "ObjectMatrices(" << std::endl; + stream << "model=" << matrices.model << ", " << std::endl; + stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl; + return stream; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name ObjectInfos + * \{ */ + +ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE) + +inline void ObjectInfos::sync() +{ + flag = eObjectInfoFlag::OBJECT_NO_INFO; +} + +inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object) +{ + color = ref.object->color; + index = ref.object->index; + SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI); + SET_FLAG_FROM_TEST( + flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET); + SET_FLAG_FROM_TEST( + flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE); + + if (ref.dupli_object == nullptr) { + /* TODO(fclem): this is rather costly to do at draw time. Maybe we can + * put it in ob->runtime and make depsgraph ensure it is up to date. */ + random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) * (1.0f / 0xFFFFFFFF); + } + else { + random = ref.dupli_object->random_id * (1.0f / 0xFFFFFFFF); + } + /* Default values. Set if needed. */ + random = 0.0f; + + if (ref.object->data == nullptr) { + orco_add = float3(0.0f); + orco_mul = float3(1.0f); + return; + } + + switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) { + case ID_VO: { + BoundBox &bbox = *BKE_volume_boundbox_get(ref.object); + orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */ + orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]); /* Size. */ + break; + } + case ID_ME: { + BKE_mesh_texspace_get(static_cast<Mesh *>(ref.object->data), orco_add, orco_mul); + break; + } + case ID_CU_LEGACY: { + Curve &cu = *static_cast<Curve *>(ref.object->data); + BKE_curve_texspace_ensure(&cu); + orco_add = cu.loc; + orco_mul = cu.size; + break; + } + case ID_MB: { + MetaBall &mb = *static_cast<MetaBall *>(ref.object->data); + orco_add = mb.loc; + orco_mul = mb.size; + break; + } + default: + orco_add = float3(0.0f); + orco_mul = float3(1.0f); + break; + } +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos) +{ + stream << "ObjectInfos("; + if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) { + stream << "skipped)" << std::endl; + return stream; + } + stream << "orco_add=" << infos.orco_add << ", "; + stream << "orco_mul=" << infos.orco_mul << ", "; + stream << "color=" << infos.color << ", "; + stream << "index=" << infos.index << ", "; + stream << "random=" << infos.random << ", "; + stream << "flag=" << infos.flag << ")" << std::endl; + return stream; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name ObjectBounds + * \{ */ + +inline void ObjectBounds::sync() +{ + bounding_sphere.w = -1.0f; /* Disable test. */ +} + +inline void ObjectBounds::sync(Object &ob) +{ + const BoundBox *bbox = BKE_object_boundbox_get(&ob); + if (bbox == nullptr) { + bounding_sphere.w = -1.0f; /* Disable test. */ + return; + } + *reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0]; + *reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4]; + *reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3]; + *reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1]; + bounding_sphere.w = 0.0f; /* Enable test. */ +} + +inline void ObjectBounds::sync(const float3 ¢er, const float3 &size) +{ + *reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size; + *reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z); + *reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z); + *reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z); + bounding_sphere.w = 0.0; /* Enable test. */ +} + +inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds) +{ + stream << "ObjectBounds("; + if (bounds.bounding_sphere.w == -1.0f) { + stream << "skipped)" << std::endl; + return stream; + } + stream << std::endl; + stream << ".bounding_corners[0]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl; + stream << ".bounding_corners[1]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl; + stream << ".bounding_corners[2]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl; + stream << ".bounding_corners[3]" + << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl; + stream << ".sphere=(pos=" << float3(bounds.bounding_sphere) + << ", rad=" << bounds.bounding_sphere.w << std::endl; + stream << ")" << std::endl; + return stream; +} + +/** \} */ diff --git a/source/blender/draw/intern/draw_shader.cc b/source/blender/draw/intern/draw_shader.cc index ecb30d54b64..960348b4a94 100644 --- a/source/blender/draw/intern/draw_shader.cc +++ b/source/blender/draw/intern/draw_shader.cc @@ -17,15 +17,15 @@ #include "draw_shader.h" extern "C" char datatoc_common_hair_lib_glsl[]; - extern "C" char datatoc_common_hair_refine_vert_glsl[]; -extern "C" char datatoc_common_hair_refine_comp_glsl[]; -extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[]; static struct { struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER]; struct GPUShader *debug_print_display_sh; struct GPUShader *debug_draw_display_sh; + struct GPUShader *draw_visibility_compute_sh; + struct GPUShader *draw_resource_finalize_sh; + struct GPUShader *draw_command_generate_sh; } e_data = {{nullptr}}; /* -------------------------------------------------------------------- */ @@ -127,6 +127,31 @@ GPUShader *DRW_shader_debug_draw_display_get() return e_data.debug_draw_display_sh; } +GPUShader *DRW_shader_draw_visibility_compute_get() +{ + if (e_data.draw_visibility_compute_sh == nullptr) { + e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name( + "draw_visibility_compute"); + } + return e_data.draw_visibility_compute_sh; +} + +GPUShader *DRW_shader_draw_resource_finalize_get() +{ + if (e_data.draw_resource_finalize_sh == nullptr) { + e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize"); + } + return e_data.draw_resource_finalize_sh; +} + +GPUShader *DRW_shader_draw_command_generate_get() +{ + if (e_data.draw_command_generate_sh == nullptr) { + e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate"); + } + return e_data.draw_command_generate_sh; +} + /** \} */ void DRW_shaders_free() @@ -136,4 +161,7 @@ void DRW_shaders_free() } DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh); DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh); + DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh); } diff --git a/source/blender/draw/intern/draw_shader.h b/source/blender/draw/intern/draw_shader.h index dabb4b3327f..3b8c0425fa9 100644 --- a/source/blender/draw/intern/draw_shader.h +++ b/source/blender/draw/intern/draw_shader.h @@ -32,6 +32,9 @@ struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type, struct GPUShader *DRW_shader_debug_print_display_get(void); struct GPUShader *DRW_shader_debug_draw_display_get(void); +struct GPUShader *DRW_shader_draw_visibility_compute_get(void); +struct GPUShader *DRW_shader_draw_resource_finalize_get(void); +struct GPUShader *DRW_shader_draw_command_generate_get(void); void DRW_shaders_free(void); diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h index 90a6475c42b..00d54311548 100644 --- a/source/blender/draw/intern/draw_shader_shared.h +++ b/source/blender/draw/intern/draw_shader_shared.h @@ -5,18 +5,35 @@ # include "GPU_shader.h" # include "GPU_shader_shared_utils.h" +# include "draw_defines.h" typedef struct ViewInfos ViewInfos; typedef struct ObjectMatrices ObjectMatrices; typedef struct ObjectInfos ObjectInfos; +typedef struct ObjectBounds ObjectBounds; typedef struct VolumeInfos VolumeInfos; typedef struct CurvesInfos CurvesInfos; typedef struct DrawCommand DrawCommand; -typedef struct DrawCommandIndexed DrawCommandIndexed; typedef struct DispatchCommand DispatchCommand; typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer; typedef struct DRWDebugVert DRWDebugVert; typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer; + +# ifdef __cplusplus +/* C++ only forward declarations. */ +struct Object; + +namespace blender::draw { + +struct ObjectRef; + +} // namespace blender::draw + +# else /* __cplusplus */ +/* C only forward declarations. */ +typedef enum eObjectInfoFlag eObjectInfoFlag; + +# endif #endif #define DRW_SHADER_SHARED_H @@ -48,15 +65,18 @@ struct ViewInfos { float2 viewport_size_inverse; /** Frustum culling data. */ - /** NOTE: vec3 arrays are padded to vec4. */ + /** \note vec3 array padded to vec4. */ float4 frustum_corners[8]; float4 frustum_planes[6]; + float4 frustum_bound_sphere; /** For debugging purpose */ /* Mouse pixel. */ int2 mouse_pixel; - int2 _pad0; + /** True if facing needs to be inverted. */ + bool1 is_inverted; + int _pad0; }; BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) @@ -74,23 +94,89 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) # define CameraTexCoFactors drw_view.viewcamtexcofac #endif +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Debug draw shapes + * \{ */ + struct ObjectMatrices { - float4x4 drw_modelMatrix; - float4x4 drw_modelMatrixInverse; + float4x4 model; + float4x4 model_inverse; + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(const Object &object); + void sync(const float4x4 &model_matrix); +#endif +}; +BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16) + +enum eObjectInfoFlag { + OBJECT_SELECTED = (1u << 0u), + OBJECT_FROM_DUPLI = (1u << 1u), + OBJECT_FROM_SET = (1u << 2u), + OBJECT_ACTIVE = (1u << 3u), + OBJECT_NEGATIVE_SCALE = (1u << 4u), + /* Avoid skipped info to change culling. */ + OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE }; -BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) struct ObjectInfos { - float4 drw_OrcoTexCoFactors[2]; - float4 drw_ObjectColor; - float4 drw_Infos; +#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER) + /* TODO Rename to struct member for glsl too. */ + float4 orco_mul_bias[2]; + float4 color; + float4 infos; +#else + /** Uploaded as center + size. Converted to mul+bias to local coord. */ + float3 orco_add; + float _pad0; + float3 orco_mul; + float _pad1; + + float4 color; + uint index; + uint _pad2; + float random; + eObjectInfoFlag flag; +#endif + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(); + void sync(const blender::draw::ObjectRef ref, bool is_active_object); +#endif }; -BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16) +BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16) + +struct ObjectBounds { + /** + * Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order. + * Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling. + */ + float4 bounding_corners[4]; + /** Bounding sphere derived from the bounding corner. Computed on GPU. */ + float4 bounding_sphere; + /** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */ +#define _inner_sphere_radius bounding_corners[3].w + +#if !defined(GPU_SHADER) && defined(__cplusplus) + void sync(); + void sync(Object &ob); + void sync(const float3 ¢er, const float3 &size); +#endif +}; +BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Object attributes + * \{ */ struct VolumeInfos { - /* Object to grid-space. */ + /** Object to grid-space. */ float4x4 grids_xform[DRW_GRID_PER_VOLUME_MAX]; - /* NOTE: vec4 for alignment. Only float3 needed. */ + /** \note vec4 for alignment. Only float3 needed. */ float4 color_mul; float density_scale; float temperature_mul; @@ -100,38 +186,41 @@ struct VolumeInfos { BLI_STATIC_ASSERT_ALIGN(VolumeInfos, 16) struct CurvesInfos { - /* Per attribute scope, follows loading order. - * NOTE: uint as bool in GLSL is 4 bytes. - * NOTE: GLSL pad arrays of scalar to 16 bytes (std140). */ + /** Per attribute scope, follows loading order. + * \note uint as bool in GLSL is 4 bytes. + * \note GLSL pad arrays of scalar to 16 bytes (std140). */ uint4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX]; }; BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16) -#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors) -#define ObjectInfo (drw_infos[resource_id].drw_Infos) -#define ObjectColor (drw_infos[resource_id].drw_ObjectColor) +/** \} */ -/* Indirect commands structures. */ +/* -------------------------------------------------------------------- */ +/** \name Indirect commands structures. + * \{ */ struct DrawCommand { - uint v_count; - uint i_count; - uint v_first; - uint i_first; -}; -BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16) - -struct DrawCommandIndexed { - uint v_count; - uint i_count; - uint v_first; + /* TODO(fclem): Rename */ + uint vertex_len; + uint instance_len; + uint vertex_first; +#if defined(GPU_SHADER) uint base_index; - uint i_first; - uint _pad0; - uint _pad1; - uint _pad2; + /** \note base_index is i_first for non-indexed draw-calls. */ +# define _instance_first_array base_index +#else + union { + uint base_index; + /* Use this instead of instance_first_indexed for non indexed draw calls. */ + uint instance_first_array; + }; +#endif + + uint instance_first_indexed; + + uint _pad0, _pad1, _pad2; }; -BLI_STATIC_ASSERT_ALIGN(DrawCommandIndexed, 16) +BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16) struct DispatchCommand { uint num_groups_x; @@ -141,13 +230,15 @@ struct DispatchCommand { }; BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16) +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Debug print * \{ */ /* Take the header (DrawCommand) into account. */ #define DRW_DEBUG_PRINT_MAX (8 * 1024) - 4 -/* NOTE: Cannot be more than 255 (because of column encoding). */ +/** \note Cannot be more than 255 (because of column encoding). */ #define DRW_DEBUG_PRINT_WORD_WRAP_COLUMN 120u /* The debug print buffer is laid-out as the following struct. @@ -164,6 +255,9 @@ BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16) /* Reuse first instance as row index as we don't use instancing. Equivalent to * `DRWDebugPrintBuffer.command.i_first`. */ #define drw_debug_print_row_shared drw_debug_print_buf[3] +/** Offset to the first data. Equal to: sizeof(DrawCommand) / sizeof(uint). + * This is needed because we bind the whole buffer as a `uint` array. */ +#define drw_debug_print_offset 8 /** \} */ @@ -194,5 +288,8 @@ BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16) /* Equivalent to `DRWDebugDrawBuffer.command.v_count`. */ #define drw_debug_draw_v_count drw_debug_verts_buf[0].pos0 +/** Offset to the first data. Equal to: sizeof(DrawCommand) / sizeof(DRWDebugVert). + * This is needed because we bind the whole buffer as a `DRWDebugVert` array. */ +#define drw_debug_draw_offset 2 /** \} */ diff --git a/source/blender/draw/intern/draw_state.h b/source/blender/draw/intern/draw_state.h new file mode 100644 index 00000000000..bf1e63e0852 --- /dev/null +++ b/source/blender/draw/intern/draw_state.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/** \file + * \ingroup draw + * + * Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single + * enum. + */ + +/** + * DRWState is a bit-mask that stores the current render state and the desired render state. Based + * on the differences the minimum state changes can be invoked to setup the desired render state. + * + * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive + * therefore they aren't ordered as a bit mask. + */ +typedef enum { + /** To be used for compute passes. */ + DRW_STATE_NO_DRAW = 0, + /** Write mask */ + DRW_STATE_WRITE_DEPTH = (1 << 0), + DRW_STATE_WRITE_COLOR = (1 << 1), + /* Write Stencil. These options are mutual exclusive and packed into 2 bits */ + DRW_STATE_WRITE_STENCIL = (1 << 2), + DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2), + DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2), + /** Depth test. These options are mutual exclusive and packed into 3 bits */ + DRW_STATE_DEPTH_ALWAYS = (1 << 4), + DRW_STATE_DEPTH_LESS = (2 << 4), + DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4), + DRW_STATE_DEPTH_EQUAL = (4 << 4), + DRW_STATE_DEPTH_GREATER = (5 << 4), + DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4), + /** Culling test */ + DRW_STATE_CULL_BACK = (1 << 7), + DRW_STATE_CULL_FRONT = (1 << 8), + /** Stencil test. These options are mutually exclusive and packed into 2 bits. */ + DRW_STATE_STENCIL_ALWAYS = (1 << 9), + DRW_STATE_STENCIL_EQUAL = (2 << 9), + DRW_STATE_STENCIL_NEQUAL = (3 << 9), + + /** Blend state. These options are mutual exclusive and packed into 4 bits */ + DRW_STATE_BLEND_ADD = (1 << 11), + /** Same as additive but let alpha accumulate without pre-multiply. */ + DRW_STATE_BLEND_ADD_FULL = (2 << 11), + /** Standard alpha blending. */ + DRW_STATE_BLEND_ALPHA = (3 << 11), + /** Use that if color is already pre-multiply by alpha. */ + DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11), + DRW_STATE_BLEND_BACKGROUND = (5 << 11), + DRW_STATE_BLEND_OIT = (6 << 11), + DRW_STATE_BLEND_MUL = (7 << 11), + DRW_STATE_BLEND_SUB = (8 << 11), + /** Use dual source blending. WARNING: Only one color buffer allowed. */ + DRW_STATE_BLEND_CUSTOM = (9 << 11), + DRW_STATE_LOGIC_INVERT = (10 << 11), + DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11), + + DRW_STATE_IN_FRONT_SELECT = (1 << 27), + DRW_STATE_SHADOW_OFFSET = (1 << 28), + DRW_STATE_CLIP_PLANES = (1 << 29), + DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30), + /** DO NOT USE. Assumed always enabled. Only used internally. */ + DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31), +} DRWState; + +ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE); + +#define DRW_STATE_DEFAULT \ + (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL) +#define DRW_STATE_BLEND_ENABLED \ + (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \ + DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \ + DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT) +#define DRW_STATE_RASTERIZER_ENABLED \ + (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \ + DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) +#define DRW_STATE_DEPTH_TEST_ENABLED \ + (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \ + DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL) +#define DRW_STATE_STENCIL_TEST_ENABLED \ + (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL) +#define DRW_STATE_WRITE_STENCIL_ENABLED \ + (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \ + DRW_STATE_WRITE_STENCIL_SHADOW_FAIL) + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus + +namespace blender::draw { + +/* -------------------------------------------------------------------- */ +/** \name DRWState to GPU state conversion + * \{ */ + +static inline eGPUWriteMask to_write_mask(DRWState state) +{ + eGPUWriteMask write_mask = GPU_WRITE_NONE; + if (state & DRW_STATE_WRITE_DEPTH) { + write_mask |= GPU_WRITE_DEPTH; + } + if (state & DRW_STATE_WRITE_COLOR) { + write_mask |= GPU_WRITE_COLOR; + } + if (state & DRW_STATE_WRITE_STENCIL_ENABLED) { + write_mask |= GPU_WRITE_STENCIL; + } + return write_mask; +} + +static inline eGPUFaceCullTest to_face_cull_test(DRWState state) +{ + switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) { + case DRW_STATE_CULL_BACK: + return GPU_CULL_BACK; + case DRW_STATE_CULL_FRONT: + return GPU_CULL_FRONT; + default: + return GPU_CULL_NONE; + } +} + +static inline eGPUDepthTest to_depth_test(DRWState state) +{ + switch (state & DRW_STATE_DEPTH_TEST_ENABLED) { + case DRW_STATE_DEPTH_LESS: + return GPU_DEPTH_LESS; + case DRW_STATE_DEPTH_LESS_EQUAL: + return GPU_DEPTH_LESS_EQUAL; + case DRW_STATE_DEPTH_EQUAL: + return GPU_DEPTH_EQUAL; + case DRW_STATE_DEPTH_GREATER: + return GPU_DEPTH_GREATER; + case DRW_STATE_DEPTH_GREATER_EQUAL: + return GPU_DEPTH_GREATER_EQUAL; + case DRW_STATE_DEPTH_ALWAYS: + return GPU_DEPTH_ALWAYS; + default: + return GPU_DEPTH_NONE; + } +} + +static inline eGPUStencilOp to_stencil_op(DRWState state) +{ + switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) { + case DRW_STATE_WRITE_STENCIL: + return GPU_STENCIL_OP_REPLACE; + case DRW_STATE_WRITE_STENCIL_SHADOW_PASS: + return GPU_STENCIL_OP_COUNT_DEPTH_PASS; + case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL: + return GPU_STENCIL_OP_COUNT_DEPTH_FAIL; + default: + return GPU_STENCIL_OP_NONE; + } +} + +static inline eGPUStencilTest to_stencil_test(DRWState state) +{ + switch (state & DRW_STATE_STENCIL_TEST_ENABLED) { + case DRW_STATE_STENCIL_ALWAYS: + return GPU_STENCIL_ALWAYS; + case DRW_STATE_STENCIL_EQUAL: + return GPU_STENCIL_EQUAL; + case DRW_STATE_STENCIL_NEQUAL: + return GPU_STENCIL_NEQUAL; + default: + return GPU_STENCIL_NONE; + } +} + +static inline eGPUBlend to_blend(DRWState state) +{ + switch (state & DRW_STATE_BLEND_ENABLED) { + case DRW_STATE_BLEND_ADD: + return GPU_BLEND_ADDITIVE; + case DRW_STATE_BLEND_ADD_FULL: + return GPU_BLEND_ADDITIVE_PREMULT; + case DRW_STATE_BLEND_ALPHA: + return GPU_BLEND_ALPHA; + case DRW_STATE_BLEND_ALPHA_PREMUL: + return GPU_BLEND_ALPHA_PREMULT; + case DRW_STATE_BLEND_BACKGROUND: + return GPU_BLEND_BACKGROUND; + case DRW_STATE_BLEND_OIT: + return GPU_BLEND_OIT; + case DRW_STATE_BLEND_MUL: + return GPU_BLEND_MULTIPLY; + case DRW_STATE_BLEND_SUB: + return GPU_BLEND_SUBTRACT; + case DRW_STATE_BLEND_CUSTOM: + return GPU_BLEND_CUSTOM; + case DRW_STATE_LOGIC_INVERT: + return GPU_BLEND_INVERT; + case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL: + return GPU_BLEND_ALPHA_UNDER_PREMUL; + default: + return GPU_BLEND_NONE; + } +} + +static inline eGPUProvokingVertex to_provoking_vertex(DRWState state) +{ + switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) { + case DRW_STATE_FIRST_VERTEX_CONVENTION: + return GPU_VERTEX_FIRST; + default: + return GPU_VERTEX_LAST; + } +} + +/** \} */ + +}; // namespace blender::draw + +#endif diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc new file mode 100644 index 00000000000..326e8629e52 --- /dev/null +++ b/source/blender/draw/intern/draw_view.cc @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +/** \file + * \ingroup draw + */ + +#include "BLI_math_geom.h" +#include "GPU_compute.h" +#include "GPU_debug.h" + +#include "draw_debug.hh" +#include "draw_shader.h" +#include "draw_view.hh" + +namespace blender::draw { + +void View::sync(const float4x4 &view_mat, const float4x4 &win_mat) +{ + data_.viewmat = view_mat; + data_.viewinv = view_mat.inverted(); + data_.winmat = win_mat; + data_.wininv = win_mat.inverted(); + data_.persmat = data_.winmat * data_.viewmat; + data_.persinv = data_.persmat.inverted(); + /* Should not be used anymore. */ + data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f); + + data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr())); + + update_view_vectors(); + + BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners); + BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere); + frustum_boundbox_calc(bound_box); + frustum_culling_planes_calc(); + frustum_culling_sphere_calc(bound_box, bound_sphere); + + dirty_ = true; +} + +void View::frustum_boundbox_calc(BoundBox &bbox) +{ + /* Extract the 8 corners from a Projection Matrix. */ +#if 0 /* Equivalent to this but it has accuracy problems. */ + BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f)); + for (int i = 0; i < 8; i++) { + mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]); + } +#endif + + float left, right, bottom, top, near, far; + bool is_persp = data_.winmat[3][3] == 0.0f; + + projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far); + + bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near; + bbox.vec[0][0] = bbox.vec[3][0] = left; + bbox.vec[4][0] = bbox.vec[7][0] = right; + bbox.vec[0][1] = bbox.vec[4][1] = bottom; + bbox.vec[7][1] = bbox.vec[3][1] = top; + + /* Get the coordinates of the far plane. */ + if (is_persp) { + float sca_far = far / near; + left *= sca_far; + right *= sca_far; + bottom *= sca_far; + top *= sca_far; + } + + bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far; + bbox.vec[1][0] = bbox.vec[2][0] = left; + bbox.vec[6][0] = bbox.vec[5][0] = right; + bbox.vec[1][1] = bbox.vec[5][1] = bottom; + bbox.vec[2][1] = bbox.vec[6][1] = top; + + /* Transform into world space. */ + for (int i = 0; i < 8; i++) { + mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]); + } +} + +void View::frustum_culling_planes_calc() +{ + planes_from_projmat(data_.persmat.ptr(), + data_.frustum_planes[0], + data_.frustum_planes[5], + data_.frustum_planes[1], + data_.frustum_planes[3], + data_.frustum_planes[4], + data_.frustum_planes[2]); + + /* Normalize. */ + for (int p = 0; p < 6; p++) { + data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]); + } +} + +void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere) +{ + /* Extract Bounding Sphere */ + if (data_.winmat[3][3] != 0.0f) { + /* Orthographic */ + /* The most extreme points on the near and far plane. (normalized device coords). */ + const float *nearpoint = bbox.vec[0]; + const float *farpoint = bbox.vec[6]; + + /* just use median point */ + mid_v3_v3v3(bsphere.center, farpoint, nearpoint); + bsphere.radius = len_v3v3(bsphere.center, farpoint); + } + else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) { + /* Perspective with symmetrical frustum. */ + + /* We obtain the center and radius of the circumscribed circle of the + * isosceles trapezoid composed by the diagonals of the near and far clipping plane */ + + /* center of each clipping plane */ + float mid_min[3], mid_max[3]; + mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]); + mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]); + + /* square length of the diagonals of each clipping plane */ + float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]); + float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]); + + /* distance squared between clipping planes */ + float h_sq = len_squared_v3v3(mid_min, mid_max); + + float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq); + + /* The goal is to get the smallest sphere, + * not the sphere that passes through each corner */ + CLAMP(fac, 0.0f, 1.0f); + + interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac); + + /* distance from the center to one of the points of the far plane (1, 2, 5, 6) */ + bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]); + } + else { + /* Perspective with asymmetrical frustum. */ + + /* We put the sphere center on the line that goes from origin + * to the center of the far clipping plane. */ + + /* Detect which of the corner of the far clipping plane is the farthest to the origin */ + float nfar[4]; /* most extreme far point in NDC space */ + float farxy[2]; /* far-point projection onto the near plane */ + float farpoint[3] = {0.0f}; /* most extreme far point in camera coordinate */ + float nearpoint[3]; /* most extreme near point in camera coordinate */ + float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */ + float F = -1.0f, N; /* square distance of far and near point to origin */ + float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */ + float e, s; /* far and near clipping distance (<0) */ + float c; /* slope of center line = distance of far clipping center + * to z axis / far clipping distance. */ + float z; /* projection of sphere center on z axis (<0) */ + + /* Find farthest corner and center of far clip plane. */ + float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */ + for (int i = 0; i < 4; i++) { + float point[3]; + mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner); + float len = len_squared_v3(point); + if (len > F) { + copy_v3_v3(nfar, corner); + copy_v3_v3(farpoint, point); + F = len; + } + add_v3_v3(farcenter, point); + /* rotate by 90 degree to walk through the 4 points of the far clip plane */ + float tmp = corner[0]; + corner[0] = -corner[1]; + corner[1] = tmp; + } + + /* the far center is the average of the far clipping points */ + mul_v3_fl(farcenter, 0.25f); + /* the extreme near point is the opposite point on the near clipping plane */ + copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f); + mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar); + /* this is a frustum projection */ + N = len_squared_v3(nearpoint); + e = farpoint[2]; + s = nearpoint[2]; + /* distance to view Z axis */ + f = len_v2(farpoint); + /* get corresponding point on the near plane */ + mul_v2_v2fl(farxy, farpoint, s / e); + /* this formula preserve the sign of n */ + sub_v2_v2(nearpoint, farxy); + n = f * s / e - len_v2(nearpoint); + c = len_v2(farcenter) / e; + /* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */ + z = (F - N) / (2.0f * (e - s + c * (f - n))); + + bsphere.center[0] = farcenter[0] * z / e; + bsphere.center[1] = farcenter[1] * z / e; + bsphere.center[2] = z; + + /* For XR, the view matrix may contain a scale factor. Then, transforming only the center + * into world space after calculating the radius will result in incorrect behavior. */ + mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */ + mul_m4_v3(data_.viewinv.ptr(), farpoint); + bsphere.radius = len_v3v3(bsphere.center, farpoint); + } +} + +void View::set_clip_planes(Span<float4> planes) +{ + BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes)); + int i = 0; + for (const auto &plane : planes) { + data_.clip_planes[i++] = plane; + } +} + +void View::update_viewport_size() +{ + float4 viewport; + GPU_viewport_size_get_f(viewport); + float2 viewport_size = float2(viewport.z, viewport.w); + if (assign_if_different(data_.viewport_size, viewport_size)) { + dirty_ = true; + } +} + +void View::update_view_vectors() +{ + bool is_persp = data_.winmat[3][3] == 0.0f; + + /* Near clip distance. */ + data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) : + -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2]; + + /* Far clip distance. */ + data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) : + -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2]; + + /* View vectors for the corners of the view frustum. + * Can be used to recreate the world space position easily */ + float3 view_vecs[4] = { + {-1.0f, -1.0f, -1.0f}, + {1.0f, -1.0f, -1.0f}, + {-1.0f, 1.0f, -1.0f}, + {-1.0f, -1.0f, 1.0f}, + }; + + /* Convert the view vectors to view space */ + for (int i = 0; i < 4; i++) { + mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]); + /* Normalized trick see: + * http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */ + if (is_persp) { + view_vecs[i].x /= view_vecs[i].z; + view_vecs[i].y /= view_vecs[i].z; + } + } + + /** + * If ortho : view_vecs[0] is the near-bottom-left corner of the frustum and + * view_vecs[1] is the vector going from the near-bottom-left corner to + * the far-top-right corner. + * If Persp : view_vecs[0].xy and view_vecs[1].xy are respectively the bottom-left corner + * when Z = 1, and top-left corner if Z = 1. + * view_vecs[0].z the near clip distance and view_vecs[1].z is the (signed) + * distance from the near plane to the far clip plane. + */ + copy_v3_v3(data_.viewvecs[0], view_vecs[0]); + + /* we need to store the differences */ + data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0]; + data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1]; + data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2]; +} + +void View::bind() +{ + update_viewport_size(); + + if (dirty_) { + dirty_ = false; + data_.push_update(); + } + + GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT); +} + +void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze) +{ + if (debug_freeze && frozen_ == false) { + data_freeze_ = static_cast<ViewInfos>(data_); + data_freeze_.push_update(); + } +#ifdef DEBUG + if (debug_freeze) { + drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1)); + } +#endif + frozen_ = debug_freeze; + + GPU_debug_group_begin("View.compute_visibility"); + + /* TODO(fclem): Early out if visibility hasn't changed. */ + /* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */ + visibility_buf_.resize(divide_ceil_u(resource_len, 128)); + + uint32_t data = 0xFFFFFFFFu; + GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data); + + if (do_visibility_) { + GPUShader *shader = DRW_shader_draw_visibility_compute_get(); + GPU_shader_bind(shader); + GPU_shader_uniform_1i(shader, "resource_len", resource_len); + GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf")); + GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf")); + GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT); + GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1); + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + } + + if (frozen_) { + /* Bind back the non frozen data. */ + GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT); + } + + GPU_debug_group_end(); +} + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_view.hh b/source/blender/draw/intern/draw_view.hh new file mode 100644 index 00000000000..82e74774a5a --- /dev/null +++ b/source/blender/draw/intern/draw_view.hh @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + */ + +#include "DRW_gpu_wrapper.hh" +#include "DRW_render.h" + +#include "draw_shader_shared.h" + +namespace blender::draw { + +class Manager; + +/* TODO deduplicate. */ +using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>; +/** \note Using uint4 for declaration but bound as uint. */ +using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>; + +class View { + friend Manager; + + private: + UniformBuffer<ViewInfos> data_; + /** Freezed version of data_ used for debugging culling. */ + UniformBuffer<ViewInfos> data_freeze_; + /** Result of the visibility computation. 1 bit per resource ID. */ + VisibilityBuf visibility_buf_; + + const char *debug_name_; + + bool do_visibility_ = true; + bool dirty_ = true; + bool frozen_ = false; + + public: + View(const char *name) : visibility_buf_(name), debug_name_(name){}; + /* For compatibility with old system. Will be removed at some point. */ + View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name) + { + float4x4 view_mat, win_mat; + DRW_view_viewmat_get(view, view_mat.ptr(), false); + DRW_view_winmat_get(view, win_mat.ptr(), false); + this->sync(view_mat, win_mat); + } + + void set_clip_planes(Span<float4> planes); + + void sync(const float4x4 &view_mat, const float4x4 &win_mat); + + bool is_persp() const + { + return data_.winmat[3][3] == 0.0f; + } + + bool is_inverted() const + { + return data_.is_inverted; + } + + float far_clip() const + { + if (is_persp()) { + return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f); + } + return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2]; + } + + float near_clip() const + { + if (is_persp()) { + return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f); + } + return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2]; + } + + private: + /** Called from draw manager. */ + void bind(); + void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze); + + void update_view_vectors(); + void update_viewport_size(); + + void frustum_boundbox_calc(BoundBox &bbox); + void frustum_culling_planes_calc(); + void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere); +}; + +} // namespace blender::draw diff --git a/source/blender/draw/intern/draw_view_data.cc b/source/blender/draw/intern/draw_view_data.cc index 55f1ab83b3a..58d826e0218 100644 --- a/source/blender/draw/intern/draw_view_data.cc +++ b/source/blender/draw/intern/draw_view_data.cc @@ -7,6 +7,7 @@ #include "BLI_vector.hh" +#include "GPU_capabilities.h" #include "GPU_viewport.h" #include "DRW_render.h" @@ -16,6 +17,7 @@ #include "draw_manager_text.h" #include "draw_manager.h" +#include "draw_manager.hh" #include "draw_view_data.h" using namespace blender; @@ -33,6 +35,22 @@ struct DRWViewData { Vector<ViewportEngineData> engines; Vector<ViewportEngineData *> enabled_engines; + + /** New per view/viewport manager. Null if not supported by current hardware. */ + draw::Manager *manager = nullptr; + + DRWViewData() + { + /* Only for GL >= 4.3 implementation for now. */ + if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) { + manager = new draw::Manager(); + } + }; + + ~DRWViewData() + { + delete manager; + }; }; DRWViewData *DRW_view_data_create(ListBase *engine_types) @@ -237,3 +255,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it ViewportEngineData *engine = iterator->engines[iterator->id++]; return engine; } + +draw::Manager *DRW_manager_get() +{ + BLI_assert(DST.view_data_active->manager); + return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager); +} + +draw::ObjectRef DRW_object_ref_get(Object *object) +{ + BLI_assert(DST.view_data_active->manager); + return {object, DST.dupli_source, DST.dupli_parent}; +} + +void DRW_manager_begin_sync() +{ + if (DST.view_data_active->manager == nullptr) { + return; + } + reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync(); +} + +void DRW_manager_end_sync() +{ + if (DST.view_data_active->manager == nullptr) { + return; + } + reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync(); +} diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl index 5f795d3abdb..3287897e73c 100644 --- a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl +++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl @@ -17,8 +17,7 @@ const vec4 drw_debug_default_color = vec4(1.0, 0.0, 0.0, 1.0); uint drw_debug_start_draw(uint v_needed) { uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed); - /* NOTE: Skip the header manually. */ - vertid += 1; + vertid += drw_debug_draw_offset; return vertid; } diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl index 0c7f32bd00d..89d1729b52d 100644 --- a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl +++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl @@ -71,8 +71,7 @@ void drw_print_char4(uint data) break; } uint cursor = atomicAdd(drw_debug_print_cursor, 1u); - /* NOTE: Skip the header manually. */ - cursor += 4; + cursor += drw_debug_print_offset; if (cursor < DRW_DEBUG_PRINT_MAX) { /* For future usage. (i.e: Color) */ uint flags = 0u; diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl index 33378588553..83223f89277 100644 --- a/source/blender/draw/intern/shaders/common_intersect_lib.glsl +++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl @@ -70,6 +70,30 @@ IsectBox isect_data_setup(Box shape) return data; } +/* Construct box from 1 corner point + 3 side vectors. */ +IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z) +{ + IsectBox data; + data.corners[0] = origin; + data.corners[1] = origin + side_x; + data.corners[2] = origin + side_y + side_x; + data.corners[3] = origin + side_y; + data.corners[4] = data.corners[0] + side_z; + data.corners[5] = data.corners[1] + side_z; + data.corners[6] = data.corners[2] + side_z; + data.corners[7] = data.corners[3] + side_z; + + data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z); + data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y); + data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x); + /* Assumes that the box is actually a box! */ + data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6])); + data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6])); + data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6])); + + return data; +} + struct IsectFrustum { vec3 corners[8]; vec4 planes[6]; @@ -194,6 +218,50 @@ bool intersect_view(Box box) return intersects; } +bool intersect_view(IsectBox i_box) +{ + bool intersects = true; + + /* Do Box vertices vs Frustum planes. */ + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + if (!intersects) { + return intersects; + } + + for (int p = 0; p < 6; ++p) { + bool is_any_vertex_on_positive_side = false; + for (int v = 0; v < 8; ++v) { + float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0)); + if (test > 0.0) { + is_any_vertex_on_positive_side = true; + break; + } + } + bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side; + if (all_vertex_on_negative_side) { + intersects = false; + break; + } + } + + return intersects; +} + bool intersect_view(Sphere sphere) { bool intersects = true; diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl index 8ab2ef10e4c..6521476c3a7 100644 --- a/source/blender/draw/intern/shaders/common_view_lib.glsl +++ b/source/blender/draw/intern/shaders/common_view_lib.glsl @@ -155,7 +155,11 @@ uniform int drw_ResourceID; # define PASS_RESOURCE_ID # elif defined(GPU_VERTEX_SHADER) -# define resource_id gpu_InstanceIndex +# if defined(UNIFORM_RESOURCE_ID_NEW) +# define resource_id drw_ResourceID +# else +# define resource_id gpu_InstanceIndex +# endif # define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id; # elif defined(GPU_GEOMETRY_SHADER) @@ -203,8 +207,8 @@ flat in int resourceIDFrag; # ifndef DRW_SHADER_SHARED_H struct ObjectMatrices { - mat4 drw_modelMatrix; - mat4 drw_modelMatrixInverse; + mat4 model; + mat4 model_inverse; }; # endif /* DRW_SHADER_SHARED_H */ @@ -214,8 +218,8 @@ layout(std140) uniform modelBlock ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN]; }; -# define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix) -# define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse) +# define ModelMatrix (drw_matrices[resource_id].model) +# define ModelMatrixInverse (drw_matrices[resource_id].model_inverse) # endif /* USE_GPU_SHADER_CREATE_INFO */ #else /* GPU_INTEL */ diff --git a/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl new file mode 100644 index 00000000000..70842e5bb81 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl @@ -0,0 +1,84 @@ + +/** + * Convert DrawPrototype into draw commands. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val) + +/* This is only called by the last thread executed over the group's prototype draws. */ +void write_draw_call(DrawGroup group, uint group_id) +{ + DrawCommand cmd; + cmd.vertex_len = group.vertex_len; + cmd.vertex_first = group.vertex_first; + if (group.base_index != -1) { + cmd.base_index = group.base_index; + cmd.instance_first_indexed = group.start; + } + else { + cmd._instance_first_array = group.start; + } + /* Back-facing command. */ + cmd.instance_len = group_buf[group_id].back_facing_counter; + command_buf[group_id * 2 + 0] = cmd; + /* Front-facing command. */ + cmd.instance_len = group_buf[group_id].front_facing_counter; + command_buf[group_id * 2 + 1] = cmd; + + /* Reset the counters for a next command gen dispatch. Avoids resending the whole data just + * for this purpose. Only the last thread will execute this so it is threadsafe. */ + group_buf[group_id].front_facing_counter = 0u; + group_buf[group_id].back_facing_counter = 0u; + group_buf[group_id].total_counter = 0u; +} + +void main() +{ + uint proto_id = gl_GlobalInvocationID.x; + if (proto_id >= prototype_len) { + return; + } + + DrawPrototype proto = prototype_buf[proto_id]; + uint group_id = proto.group_id; + bool is_inverted = (proto.resource_handle & 0x80000000u) != 0; + uint resource_index = (proto.resource_handle & 0x7FFFFFFFu); + + /* Visibility test result. */ + bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0; + + DrawGroup group = group_buf[group_id]; + + if (!is_visible) { + /* Skip the draw but still count towards the completion. */ + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + return; + } + + uint back_facing_len = group.len - group.front_facing_len; + uint front_facing_len = group.front_facing_len; + uint dst_index = group.start; + if (is_inverted) { + uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len); + dst_index += offset; + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + } + else { + uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len); + dst_index += back_facing_len + offset; + if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) { + write_draw_call(group, group_id); + } + } + + for (uint i = dst_index; i < dst_index + proto.instance_len; i++) { + /* Fill resource_id buffer for each instance of this draw */ + resource_id_buf[i] = resource_index; + } +} diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl index ab76df819d5..4061dda5d1c 100644 --- a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl +++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl @@ -6,7 +6,7 @@ void main() { /* Skip the first vertex containing header data. */ - DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 1]; + DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2]; vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2)); vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0; diff --git a/source/blender/draw/intern/shaders/draw_debug_info.hh b/source/blender/draw/intern/shaders/draw_debug_info.hh index 893a5e537d9..ce450bb1210 100644 --- a/source/blender/draw/intern/shaders/draw_debug_info.hh +++ b/source/blender/draw/intern/shaders/draw_debug_info.hh @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "draw_defines.h" #include "gpu_shader_create_info.hh" /* -------------------------------------------------------------------- */ @@ -10,7 +11,7 @@ GPU_SHADER_CREATE_INFO(draw_debug_print) .typedef_source("draw_shader_shared.h") - .storage_buf(7, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]"); + .storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]"); GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index"); @@ -34,7 +35,10 @@ GPU_SHADER_CREATE_INFO(draw_debug_print_display) GPU_SHADER_CREATE_INFO(draw_debug_draw) .typedef_source("draw_shader_shared.h") - .storage_buf(6, Qualifier::READ_WRITE, "DRWDebugVert", "drw_debug_verts_buf[]"); + .storage_buf(DRW_DEBUG_DRAW_SLOT, + Qualifier::READ_WRITE, + "DRWDebugVert", + "drw_debug_verts_buf[]"); GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color"); diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl index f67e9d3f9e0..cb379056e2b 100644 --- a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl +++ b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl @@ -8,7 +8,7 @@ void main() { /* Skip first 4 chars containing header data. */ - uint char_data = drw_debug_print_buf[gl_VertexID + 4]; + uint char_data = drw_debug_print_buf[gl_VertexID + 8]; char_index = (char_data & 0xFFu) - 0x20u; /* Discard invalid chars. */ diff --git a/source/blender/draw/intern/shaders/draw_object_infos_info.hh b/source/blender/draw/intern/shaders/draw_object_infos_info.hh index 8fd55ea351f..2ec40ab76e3 100644 --- a/source/blender/draw/intern/shaders/draw_object_infos_info.hh +++ b/source/blender/draw/intern/shaders/draw_object_infos_info.hh @@ -1,10 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "draw_defines.h" #include "gpu_shader_create_info.hh" GPU_SHADER_CREATE_INFO(draw_object_infos) .typedef_source("draw_shader_shared.h") .define("OBINFO_LIB") + .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)") + .define("ObjectInfo", "(drw_infos[resource_id].infos)") + .define("ObjectColor", "(drw_infos[resource_id].color)") .uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH); GPU_SHADER_CREATE_INFO(draw_volume_infos) @@ -14,3 +18,11 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos) GPU_SHADER_CREATE_INFO(draw_curves_infos) .typedef_source("draw_shader_shared.h") .uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH); + +GPU_SHADER_CREATE_INFO(draw_object_infos_new) + .typedef_source("draw_shader_shared.h") + .define("OBINFO_LIB") + .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)") + .define("ObjectInfo", "(drw_infos[resource_id].infos)") + .define("ObjectColor", "(drw_infos[resource_id].color)") + .storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]");
\ No newline at end of file diff --git a/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl new file mode 100644 index 00000000000..d834435e54e --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl @@ -0,0 +1,64 @@ + +/** + * Finish computation of a few draw resource after sync. + */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +void main() +{ + uint resource_id = gl_GlobalInvocationID.x; + if (resource_id >= resource_len) { + return; + } + + mat4 model_mat = matrix_buf[resource_id].model; + ObjectInfos infos = infos_buf[resource_id]; + ObjectBounds bounds = bounds_buf[resource_id]; + + if (bounds.bounding_sphere.w != -1.0) { + /* Convert corners to origin + sides in world space. */ + vec3 p0 = bounds.bounding_corners[0].xyz; + vec3 p01 = bounds.bounding_corners[1].xyz - p0; + vec3 p02 = bounds.bounding_corners[2].xyz - p0; + vec3 p03 = bounds.bounding_corners[3].xyz - p0; + /* Avoid flat box. */ + p01.x = max(p01.x, 1e-4); + p02.y = max(p02.y, 1e-4); + p03.z = max(p03.z, 1e-4); + vec3 diagonal = p01 + p02 + p03; + vec3 center = p0 + diagonal * 0.5; + float min_axis = min_v3(abs(diagonal)); + bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center); + /* We have to apply scaling to the diagonal. */ + bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) * + 0.5; + bounds_buf[resource_id]._inner_sphere_radius = min_axis; + bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0); + bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01); + bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02); + bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03); + /* Always have correct handedness in the corners vectors. */ + if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) { + bounds_buf[resource_id].bounding_corners[0].xyz += + bounds_buf[resource_id].bounding_corners[1].xyz; + bounds_buf[resource_id].bounding_corners[1].xyz = + -bounds_buf[resource_id].bounding_corners[1].xyz; + } + + /* TODO: Bypass test for very large objects (see T67319). */ + if (bounds_buf[resource_id].bounding_sphere.w > 1e12) { + bounds_buf[resource_id].bounding_sphere.w = -1.0; + } + } + + vec3 loc = infos.orco_add; /* Box center. */ + vec3 size = infos.orco_mul; /* Box half-extent. */ + /* This is what the original computation looks like. + * Simplify to a nice MADD in shading code. */ + // orco = (pos - loc) / size; + // orco = pos * (1.0 / size) + (-loc / size); + vec3 size_inv = safe_rcp(size); + infos_buf[resource_id].orco_add = -loc * size_inv; + infos_buf[resource_id].orco_mul = size_inv; +}
\ No newline at end of file diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh index 0400521c53d..c522c607791 100644 --- a/source/blender/draw/intern/shaders/draw_view_info.hh +++ b/source/blender/draw/intern/shaders/draw_view_info.hh @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "draw_defines.h" #include "gpu_shader_create_info.hh" /* -------------------------------------------------------------------- */ @@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle) * \{ */ GPU_SHADER_CREATE_INFO(draw_view) - .uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS) + .uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS) .typedef_source("draw_shader_shared.h"); GPU_SHADER_CREATE_INFO(draw_modelmat) .uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH) - .define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)") - .define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)") + .define("ModelMatrix", "(drw_matrices[resource_id].model)") + .define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)") .additional_info("draw_view"); GPU_SHADER_CREATE_INFO(draw_modelmat_legacy) @@ -136,3 +137,77 @@ GPU_SHADER_CREATE_INFO(draw_gpencil) .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos"); /** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Internal Draw Manager usage + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_resource_finalize) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .define("DRAW_FINALIZE_SHADER") + .local_group_size(DRW_FINALIZE_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]") + .storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]") + .storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]") + .push_constant(Type::INT, "resource_len") + .compute_source("draw_resource_finalize_comp.glsl"); + +GPU_SHADER_CREATE_INFO(draw_visibility_compute) + .do_static_compilation(true) + .local_group_size(DRW_VISIBILITY_GROUP_SIZE) + .storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]") + .storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]") + .push_constant(Type::INT, "resource_len") + .compute_source("draw_visibility_comp.glsl") + .additional_info("draw_view"); + +GPU_SHADER_CREATE_INFO(draw_command_generate) + .do_static_compilation(true) + .typedef_source("draw_shader_shared.h") + .typedef_source("draw_command_shared.hh") + .local_group_size(DRW_COMMAND_GROUP_SIZE) + .storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]") + .storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]") + .storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]") + .storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]") + .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]") + .push_constant(Type::INT, "prototype_len") + .compute_source("draw_command_generate_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Resource ID + * New implementation using gl_BaseInstance and storage buffers. + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_resource_id_new) + .define("UNIFORM_RESOURCE_ID_NEW") + .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]") + .define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]"); + +/** + * Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf. + */ +GPU_SHADER_CREATE_INFO(draw_resource_id_fallback) + .define("UNIFORM_RESOURCE_ID_NEW") + .vertex_in(15, Type::INT, "drw_ResourceID"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Object Resources + * \{ */ + +GPU_SHADER_CREATE_INFO(draw_modelmat_new) + .typedef_source("draw_shader_shared.h") + .storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]") + .define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse") + .define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model") + /* TODO For compatibility with old shaders. To be removed. */ + .define("ModelMatrixInverse", "drw_ModelMatrixInverse") + .define("ModelMatrix", "drw_ModelMatrix") + .additional_info("draw_resource_id_new"); + +/** \} */ diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl new file mode 100644 index 00000000000..7ec58c8f919 --- /dev/null +++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl @@ -0,0 +1,46 @@ + +/** + * Compute visibility of each resource bounds for a given view. + */ +/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(common_intersect_lib.glsl) + +shared uint shared_result; + +void mask_visibility_bit() +{ + uint bit = 1u << gl_LocalInvocationID.x; + atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit); +} + +void main() +{ + if (gl_GlobalInvocationID.x >= resource_len) { + return; + } + + ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x]; + + if (bounds.bounding_sphere.w != -1.0) { + IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz, + bounds.bounding_corners[1].xyz, + bounds.bounding_corners[2].xyz, + bounds.bounding_corners[3].xyz); + Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); + Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius); + + if (intersect_view(inscribed_sphere) == true) { + /* Visible. */ + } + else if (intersect_view(bounding_sphere) == false) { + /* Not visible. */ + mask_visibility_bit(); + } + else if (intersect_view(box) == false) { + /* Not visible. */ + mask_visibility_bit(); + } + } +}
\ No newline at end of file diff --git a/source/blender/draw/tests/draw_pass_test.cc b/source/blender/draw/tests/draw_pass_test.cc new file mode 100644 index 00000000000..f8a006d096b --- /dev/null +++ b/source/blender/draw/tests/draw_pass_test.cc @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "testing/testing.h" + +#include "draw_manager.hh" +#include "draw_pass.hh" +#include "draw_shader.h" +#include "draw_testing.hh" + +#include <bitset> + +namespace blender::draw { + +static void test_draw_pass_all_commands() +{ + Texture tex; + tex.ensure_2d(GPU_RGBA16, int2(1)); + + UniformBuffer<uint4> ubo; + ubo.push_update(); + + StorageBuffer<uint4> ssbo; + ssbo.push_update(); + + float alpha = 0.0f; + int3 dispatch_size(1); + + PassSimple pass = {"test.all_commands"}; + pass.init(); + pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL); + pass.clear_color_depth_stencil(float4(0.25f, 0.5f, 100.0f, -2000.0f), 0.5f, 0xF0); + pass.state_stencil(0x80, 0x0F, 0x8F); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.bind_texture("image", tex); + pass.bind_texture("image", &tex); + pass.bind_image("missing_image", tex); /* Should not crash. */ + pass.bind_image("missing_image", &tex); /* Should not crash. */ + pass.bind_ubo("missing_ubo", ubo); /* Should not crash. */ + pass.bind_ubo("missing_ubo", &ubo); /* Should not crash. */ + pass.bind_ssbo("missing_ssbo", ssbo); /* Should not crash. */ + pass.bind_ssbo("missing_ssbo", &ssbo); /* Should not crash. */ + pass.push_constant("alpha", alpha); + pass.push_constant("alpha", &alpha); + pass.push_constant("ModelViewProjectionMatrix", float4x4::identity()); + pass.draw_procedural(GPU_PRIM_TRIS, 1, 3); + + /* Should not crash even if shader is not a compute. This is because we only serialize. */ + /* TODO(fclem): Use real compute shader. */ + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.dispatch(dispatch_size); + pass.dispatch(&dispatch_size); + pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); + + /* Change references. */ + alpha = 1.0f; + dispatch_size = int3(2); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.all_commands" << std::endl; + expected << " .state_set(6)" << std::endl; + expected << " .clear(color=(0.25, 0.5, 100, -2000), depth=0.5, stencil=0b11110000))" + << std::endl; + expected << " .stencil_set(write_mask=0b10000000, compare_mask=0b00001111, reference=0b10001111" + << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl; + expected << " .bind_texture(0)" << std::endl; + expected << " .bind_texture_ref(0)" << std::endl; + expected << " .bind_image(-1)" << std::endl; + expected << " .bind_image_ref(-1)" << std::endl; + expected << " .bind_uniform_buf(-1)" << std::endl; + expected << " .bind_uniform_buf_ref(-1)" << std::endl; + expected << " .bind_storage_buf(-1)" << std::endl; + expected << " .bind_storage_buf_ref(-1)" << std::endl; + expected << " .push_constant(2, data=0)" << std::endl; + expected << " .push_constant(2, data=1)" << std::endl; + expected << " .push_constant(0, data=(" << std::endl; + expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << " .draw(inst_len=1, vert_len=3, vert_first=0, res_id=0)" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl; + expected << " .dispatch(1, 1, 1)" << std::endl; + expected << " .dispatch_ref(2, 2, 2)" << std::endl; + expected << " .barrier(4)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_all_commands) + +static void test_draw_pass_sub_ordering() +{ + PassSimple pass = {"test.sub_ordering"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.push_constant("test_pass", 1); + + PassSimple::Sub &sub1 = pass.sub("Sub1"); + sub1.push_constant("test_sub1", 11); + + PassSimple::Sub &sub2 = pass.sub("Sub2"); + sub2.push_constant("test_sub2", 21); + + /* Will execute after both sub. */ + pass.push_constant("test_pass", 2); + + /* Will execute after sub1. */ + sub2.push_constant("test_sub2", 22); + + /* Will execute before sub2. */ + sub1.push_constant("test_sub1", 12); + + /* Will execute before end of pass. */ + sub2.push_constant("test_sub2", 23); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.sub_ordering" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl; + expected << " .push_constant(-1, data=1)" << std::endl; + expected << " .Sub1" << std::endl; + expected << " .push_constant(-1, data=11)" << std::endl; + expected << " .push_constant(-1, data=12)" << std::endl; + expected << " .Sub2" << std::endl; + expected << " .push_constant(-1, data=21)" << std::endl; + expected << " .push_constant(-1, data=22)" << std::endl; + expected << " .push_constant(-1, data=23)" << std::endl; + expected << " .push_constant(-1, data=2)" << std::endl; + + EXPECT_EQ(result, expected.str()); +} +DRAW_TEST(draw_pass_sub_ordering) + +static void test_draw_pass_simple_draw() +{ + PassSimple pass = {"test.simple_draw"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + /* Each draw procedural type uses a different batch. Groups are drawn in correct order. */ + pass.draw_procedural(GPU_PRIM_TRIS, 1, 10, 1, {1}); + pass.draw_procedural(GPU_PRIM_POINTS, 4, 20, 2, {2}); + pass.draw_procedural(GPU_PRIM_TRIS, 2, 30, 3, {3}); + pass.draw_procedural(GPU_PRIM_POINTS, 5, 40, 4, ResourceHandle(4, true)); + pass.draw_procedural(GPU_PRIM_LINES, 1, 50, 5, {5}); + pass.draw_procedural(GPU_PRIM_POINTS, 6, 60, 6, {5}); + pass.draw_procedural(GPU_PRIM_TRIS, 3, 70, 7, {6}); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.simple_draw" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl; + expected << " .draw(inst_len=1, vert_len=10, vert_first=1, res_id=1)" << std::endl; + expected << " .draw(inst_len=4, vert_len=20, vert_first=2, res_id=2)" << std::endl; + expected << " .draw(inst_len=2, vert_len=30, vert_first=3, res_id=3)" << std::endl; + expected << " .draw(inst_len=5, vert_len=40, vert_first=4, res_id=4)" << std::endl; + expected << " .draw(inst_len=1, vert_len=50, vert_first=5, res_id=5)" << std::endl; + expected << " .draw(inst_len=6, vert_len=60, vert_first=6, res_id=5)" << std::endl; + expected << " .draw(inst_len=3, vert_len=70, vert_first=7, res_id=6)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_simple_draw) + +static void test_draw_pass_multi_draw() +{ + PassMain pass = {"test.multi_draw"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + /* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */ + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1}); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2}); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3}); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true)); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5}); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5}); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6}); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.multi_draw" << std::endl; + expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl; + expected << " .draw_multi(3)" << std::endl; + expected << " .group(id=2, len=1)" << std::endl; + expected << " .proto(instance_len=1, resource_id=5, front_face)" << std::endl; + expected << " .group(id=1, len=15)" << std::endl; + expected << " .proto(instance_len=5, resource_id=4, back_face)" << std::endl; + expected << " .proto(instance_len=6, resource_id=5, front_face)" << std::endl; + expected << " .proto(instance_len=4, resource_id=2, front_face)" << std::endl; + expected << " .group(id=0, len=6)" << std::endl; + expected << " .proto(instance_len=3, resource_id=6, front_face)" << std::endl; + expected << " .proto(instance_len=2, resource_id=3, front_face)" << std::endl; + expected << " .proto(instance_len=1, resource_id=1, front_face)" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_multi_draw) + +static void test_draw_pass_sortable() +{ + PassSortable pass = {"test.sortable"}; + pass.init(); + + pass.sub("Sub3", 3.0f); + pass.sub("Sub2", 2.0f); + pass.sub("Sub5", 4.0f); + pass.sub("Sub4", 3.0f); + pass.sub("Sub1", 1.0f); + + std::string result = pass.serialize(); + std::stringstream expected; + expected << ".test.sortable" << std::endl; + expected << " .Sub1" << std::endl; + expected << " .Sub2" << std::endl; + expected << " .Sub3" << std::endl; + expected << " .Sub4" << std::endl; + expected << " .Sub5" << std::endl; + + EXPECT_EQ(result, expected.str()); + + DRW_shape_cache_free(); +} +DRAW_TEST(draw_pass_sortable) + +static void test_draw_resource_id_gen() +{ + float4x4 win_mat; + orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1); + + View view("test_view"); + view.sync(float4x4::identity(), win_mat); + + Manager drw; + + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + drw.begin_sync(); + ResourceHandle handle1 = drw.resource_handle(obmat_1); + ResourceHandle handle2 = drw.resource_handle(obmat_1); + ResourceHandle handle3 = drw.resource_handle(obmat_2); + drw.resource_handle(obmat_2, float3(2), float3(1)); + drw.end_sync(); + + StringRefNull expected = "2 1 1 1 1 3 3 1 1 1 1 1 3 2 2 2 2 2 2 1 1 1 "; + + { + /* Computed on CPU. */ + PassSimple pass = {"test.resource_id"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + + std::stringstream result; + for (auto val : debug.resource_id) { + result << val << " "; + } + + EXPECT_EQ(result.str(), expected); + } + { + /* Same thing with PassMain (computed on GPU) */ + PassSimple pass = {"test.resource_id"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1); + pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3); + pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2); + pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + + std::stringstream result; + for (auto val : debug.resource_id) { + result << val << " "; + } + + EXPECT_EQ(result.str(), expected); + } + + DRW_shape_cache_free(); + DRW_shaders_free(); +} +DRAW_TEST(draw_resource_id_gen) + +static void test_draw_visibility() +{ + float4x4 win_mat; + orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1); + + View view("test_view"); + view.sync(float4x4::identity(), win_mat); + + Manager drw; + + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + drw.begin_sync(); /* Default {0} always visible. */ + drw.resource_handle(obmat_1); /* No bounds, always visible. */ + drw.resource_handle(obmat_1, float3(3), float3(1)); /* Out of view. */ + drw.resource_handle(obmat_2, float3(0), float3(1)); /* Inside view. */ + drw.end_sync(); + + PassMain pass = {"test.visibility"}; + pass.init(); + pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA)); + pass.draw_procedural(GPU_PRIM_TRIS, 1, -1); + + Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view); + Vector<uint32_t> expected_visibility = {0}; + + std::stringstream result; + for (auto val : debug.visibility) { + result << std::bitset<32>(val); + } + + EXPECT_EQ(result.str(), "11111111111111111111111111111011"); + + DRW_shape_cache_free(); + DRW_shaders_free(); +} +DRAW_TEST(draw_visibility) + +static void test_draw_manager_sync() +{ + float4x4 obmat_1 = float4x4::identity(); + float4x4 obmat_2 = float4x4::identity(); + obmat_1.apply_scale(-0.5f); + obmat_2.apply_scale(0.5f); + + /* TODO find a way to create a minimum object to test resource handle creation on it. */ + Manager drw; + + drw.begin_sync(); + drw.resource_handle(obmat_1); + drw.resource_handle(obmat_2, float3(2), float3(1)); + drw.end_sync(); + + Manager::DataDebugOutput debug = drw.data_debug(); + + std::stringstream result; + for (const auto &val : debug.matrices) { + result << val; + } + for (const auto &val : debug.bounds) { + result << val; + } + for (const auto &val : debug.infos) { + result << val; + } + + std::stringstream expected; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( 1.000000, -0.000000, 0.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 1.000000, -0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, -0.000000, 1.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( -0.500000, -0.000000, -0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.500000, -0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.000000, -0.500000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( -2.000000, 0.000000, -0.000000, -0.000000)" << std::endl; + expected << "( 0.000000, -2.000000, 0.000000, 0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -2.000000, 0.000000)" << std::endl; + expected << "( -0.000000, -0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectMatrices(" << std::endl; + expected << "model=(" << std::endl; + expected << "( 0.500000, 0.000000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.500000, 0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.500000, 0.000000)" << std::endl; + expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ", " << std::endl; + expected << "model_inverse=(" << std::endl; + expected << "( 2.000000, -0.000000, 0.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 2.000000, -0.000000, 0.000000)" << std::endl; + expected << "( 0.000000, -0.000000, 2.000000, -0.000000)" << std::endl; + expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl; + expected << ")" << std::endl; + expected << ")" << std::endl; + expected << "ObjectBounds(skipped)" << std::endl; + expected << "ObjectBounds(skipped)" << std::endl; + expected << "ObjectBounds(" << std::endl; + expected << ".bounding_corners[0](0.5, 0.5, 0.5)" << std::endl; + expected << ".bounding_corners[1](1, 0, 0)" << std::endl; + expected << ".bounding_corners[2](0, 1, 0)" << std::endl; + expected << ".bounding_corners[3](0, 0, 1)" << std::endl; + expected << ".sphere=(pos=(1, 1, 1), rad=0.866025" << std::endl; + expected << ")" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + expected << "ObjectInfos(skipped)" << std::endl; + + EXPECT_EQ(result.str(), expected.str()); + + DRW_shaders_free(); +} +DRAW_TEST(draw_manager_sync) + +} // namespace blender::draw
\ No newline at end of file diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 7ae9eae6d44..2f16d788b9d 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -27,6 +27,7 @@ set(INC # For *_info.hh includes. ../draw/engines/eevee_next + ../draw/intern # For node muting stuff. ../nodes diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h index 8f524f72fa1..4935ced7f48 100644 --- a/source/blender/gpu/GPU_batch.h +++ b/source/blender/gpu/GPU_batch.h @@ -70,6 +70,8 @@ typedef struct GPUBatch { GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN]; /** NULL if element list not needed */ GPUIndexBuf *elem; + /** Resource ID attribute workaround. */ + GPUStorageBuf *resource_id_buf; /** Bookkeeping. */ eGPUBatchFlag flag; /** Type of geometry to draw. */ @@ -126,6 +128,11 @@ bool GPU_batch_vertbuf_has(GPUBatch *, GPUVertBuf *); #define GPU_batch_vertbuf_add(batch, verts) GPU_batch_vertbuf_add_ex(batch, verts, false) +/** + * Set resource id buffer to bind as instance attribute to workaround the lack of gl_BaseInstance. + */ +void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf); + void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader); /** * Bind program bound to IMM to the batch. diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc index 9092ad5110c..c871004deac 100644 --- a/source/blender/gpu/intern/gpu_batch.cc +++ b/source/blender/gpu/intern/gpu_batch.cc @@ -200,6 +200,13 @@ bool GPU_batch_vertbuf_has(GPUBatch *batch, GPUVertBuf *verts) return false; } +void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf) +{ + BLI_assert(resource_id_buf); + batch->flag |= GPU_BATCH_DIRTY; + batch->resource_id_buf = resource_id_buf; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_shader_create_info.cc b/source/blender/gpu/intern/gpu_shader_create_info.cc index 110b77f1f52..a18fdcd32df 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.cc +++ b/source/blender/gpu/intern/gpu_shader_create_info.cc @@ -300,6 +300,11 @@ void gpu_shader_create_info_init() draw_modelmat = draw_modelmat_legacy; } + /* WORKAROUND: Replace the use of gpu_BaseInstance by an instance attribute. */ + if (GPU_shader_draw_parameters_support() == false) { + draw_resource_id_new = draw_resource_id_fallback; + } + for (ShaderCreateInfo *info : g_create_infos->values()) { if (info->do_static_compilation_) { info->builtins_ |= gpu_shader_dependency_get_builtins(info->vertex_source_); diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc index d836b73f5d8..6897ac9f4a2 100644 --- a/source/blender/gpu/opengl/gl_vertex_array.cc +++ b/source/blender/gpu/opengl/gl_vertex_array.cc @@ -11,6 +11,7 @@ #include "gl_batch.hh" #include "gl_context.hh" #include "gl_index_buffer.hh" +#include "gl_storage_buffer.hh" #include "gl_vertex_buffer.hh" #include "gl_vertex_array.hh" @@ -118,6 +119,18 @@ void GLVertArray::update_bindings(const GLuint vao, } } + if (batch->resource_id_buf) { + const ShaderInput *input = interface->attr_get("drw_ResourceID"); + if (input) { + dynamic_cast<GLStorageBuf *>(unwrap(batch->resource_id_buf))->bind_as(GL_ARRAY_BUFFER); + glEnableVertexAttribArray(input->location); + glVertexAttribDivisor(input->location, 1); + glVertexAttribIPointer( + input->location, 1, to_gl(GPU_COMP_I32), sizeof(uint32_t), (GLvoid *)nullptr); + attr_mask &= ~(1 << input->location); + } + } + if (attr_mask != 0 && GLContext::vertex_attrib_binding_support) { for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) { if (attr_mask & mask) { diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h index dc461502b10..39fb3690da4 100644 --- a/source/blender/makesdna/DNA_userdef_types.h +++ b/source/blender/makesdna/DNA_userdef_types.h @@ -640,8 +640,8 @@ typedef struct UserDef_Experimental { char use_cycles_debug; char show_asset_debug_info; char no_asset_indexing; + char use_viewport_debug; char SANITIZE_AFTER_HERE; - char _pad0; /* The following options are automatically sanitized (set to 0) * when the release cycle is not alpha. */ char use_new_curves_tools; diff --git a/source/blender/makesdna/DNA_view3d_types.h b/source/blender/makesdna/DNA_view3d_types.h index 0d281032b7e..1ba057d9c40 100644 --- a/source/blender/makesdna/DNA_view3d_types.h +++ b/source/blender/makesdna/DNA_view3d_types.h @@ -296,7 +296,9 @@ typedef struct View3D { char _pad6[2]; int layact DNA_DEPRECATED; unsigned short local_collections_uuid; - short _pad7[3]; + short _pad7[2]; + + short debug_flag; /** Optional bool for 3d cursor to define center. */ short ob_center_cursor; @@ -489,6 +491,11 @@ enum { V3D_SHADING_COMPOSITOR = (1 << 15), }; +/** #View3D.debug_flag */ +enum { + V3D_DEBUG_FREEZE_CULLING = (1 << 0), +}; + #define V3D_USES_SCENE_LIGHTS(v3d) \ ((((v3d)->shading.type == OB_MATERIAL) && ((v3d)->shading.flag & V3D_SHADING_SCENE_LIGHTS)) || \ (((v3d)->shading.type == OB_RENDER) && \ diff --git a/source/blender/makesrna/intern/rna_space.c b/source/blender/makesrna/intern/rna_space.c index 9b08b6ef665..5f2e3c4d1a0 100644 --- a/source/blender/makesrna/intern/rna_space.c +++ b/source/blender/makesrna/intern/rna_space.c @@ -4736,6 +4736,13 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna) RNA_def_property_range(prop, 0.0f, 1.0f); RNA_def_property_ui_text(prop, "Opacity", "Vertex Paint mix factor"); RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, "rna_GPencil_update"); + + /* Developper Debug overlay */ + + prop = RNA_def_property(srna, "use_debug_freeze_view_culling", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, NULL, "debug_flag", V3D_DEBUG_FREEZE_CULLING); + RNA_def_property_ui_text(prop, "Freeze Culling", "Freeze view culling bounds"); + RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL); } static void rna_def_space_view3d(BlenderRNA *brna) diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c index 324c0bb9006..61d4edccb06 100644 --- a/source/blender/makesrna/intern/rna_userdef.c +++ b/source/blender/makesrna/intern/rna_userdef.c @@ -6372,6 +6372,14 @@ static void rna_def_userdef_experimental(BlenderRNA *brna) prop = RNA_def_property(srna, "enable_eevee_next", PROP_BOOLEAN, PROP_NONE); RNA_def_property_boolean_sdna(prop, NULL, "enable_eevee_next", 1); RNA_def_property_ui_text(prop, "EEVEE Next", "Enable the new EEVEE codebase, requires restart"); + + prop = RNA_def_property(srna, "use_viewport_debug", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, NULL, "use_viewport_debug", 1); + RNA_def_property_ui_text(prop, + "Viewport Debug", + "Enable viewport debugging options for developpers in the overlays " + "pop-over"); + RNA_def_property_update(prop, 0, "rna_userdef_ui_update"); } static void rna_def_userdef_addon_collection(BlenderRNA *brna, PropertyRNA *cprop) |