diff options
Diffstat (limited to 'source/blender/draw/intern/draw_command.hh')
-rw-r--r-- | source/blender/draw/intern/draw_command.hh | 534 |
1 files changed, 534 insertions, 0 deletions
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh new file mode 100644 index 00000000000..b9117580d91 --- /dev/null +++ b/source/blender/draw/intern/draw_command.hh @@ -0,0 +1,534 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. */ + +#pragma once + +/** \file + * \ingroup draw + * + * Commands stored inside draw passes. Converted into GPU commands upon pass submission. + * + * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or + * `DrawMultiBuf`. See implementation details at their definition. + */ + +#include "BKE_global.h" +#include "BLI_map.hh" +#include "DRW_gpu_wrapper.hh" + +#include "draw_command_shared.hh" +#include "draw_handle.hh" +#include "draw_state.h" +#include "draw_view.hh" + +namespace blender::draw::command { + +class DrawCommandBuf; +class DrawMultiBuf; + +/* -------------------------------------------------------------------- */ +/** \name Recording State + * \{ */ + +/** + * Command recording state. + * Keep track of several states and avoid redundant state changes. + */ +struct RecordingState { + GPUShader *shader = nullptr; + bool front_facing = true; + bool inverted_view = false; + DRWState pipeline_state = DRW_STATE_NO_DRAW; + int view_clip_plane_count = 0; + /** Used for gl_BaseInstance workaround. */ + GPUStorageBuf *resource_id_buf = nullptr; + + void front_facing_set(bool facing) + { + /* Facing is inverted if view is not in expected handedness. */ + facing = this->inverted_view == facing; + /* Remove redundant changes. */ + if (assign_if_different(this->front_facing, facing)) { + GPU_front_facing(!facing); + } + } + + void cleanup() + { + if (front_facing == false) { + GPU_front_facing(false); + } + + if (G.debug & G_DEBUG_GPU) { + GPU_storagebuf_unbind_all(); + GPU_texture_image_unbind_all(); + GPU_texture_unbind_all(); + GPU_uniformbuf_unbind_all(); + } + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Regular Commands + * \{ */ + +enum class Type : uint8_t { + /** + * None Type commands are either uninitialized or are repurposed as data storage. + * They are skipped during submission. + */ + None = 0, + + /** Commands stored as Undetermined in regular command buffer. */ + Barrier, + Clear, + Dispatch, + DispatchIndirect, + Draw, + DrawIndirect, + PushConstant, + ResourceBind, + ShaderBind, + StateSet, + StencilSet, + + /** Special commands stored in separate buffers. */ + SubPass, + DrawMulti, +}; + +/** + * The index of the group is implicit since it is known by the one who want to + * access it. This also allows to have an indexed object to split the command + * stream. + */ +struct Header { + /** Command type. */ + Type type; + /** Command index in command heap of this type. */ + uint index; +}; + +struct ShaderBind { + GPUShader *shader; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct ResourceBind { + eGPUSamplerState sampler; + int slot; + bool is_reference; + + enum class Type : uint8_t { + Sampler = 0, + Image, + UniformBuf, + StorageBuf, + } type; + + union { + /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more + * debug info. */ + GPUUniformBuf *uniform_buf; + GPUUniformBuf **uniform_buf_ref; + GPUStorageBuf *storage_buf; + GPUStorageBuf **storage_buf_ref; + /** NOTE: Texture is used for both Sampler and Image binds. */ + GPUTexture *texture; + GPUTexture **texture_ref; + }; + + ResourceBind() = default; + + ResourceBind(int slot_, GPUUniformBuf *res) + : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){}; + ResourceBind(int slot_, GPUUniformBuf **res) + : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){}; + ResourceBind(int slot_, GPUStorageBuf *res) + : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){}; + ResourceBind(int slot_, GPUStorageBuf **res) + : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){}; + ResourceBind(int slot_, draw::Image *res) + : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){}; + ResourceBind(int slot_, draw::Image **res) + : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){}; + ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){}; + ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state) + : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){}; + + void execute() const; + std::string serialize() const; +}; + +struct PushConstant { + int location; + uint8_t array_len; + uint8_t comp_len; + enum class Type : uint8_t { + IntValue = 0, + FloatValue, + IntReference, + FloatReference, + } type; + /** + * IMPORTANT: Data is at the end of the struct as it can span over the next commands. + * These next commands are not real commands but just memory to hold the data and are not + * referenced by any Command::Header. + * This is a hack to support float4x4 copy. + */ + union { + int int1_value; + int2 int2_value; + int3 int3_value; + int4 int4_value; + float float1_value; + float2 float2_value; + float3 float3_value; + float4 float4_value; + const int *int_ref; + const int2 *int2_ref; + const int3 *int3_ref; + const int4 *int4_ref; + const float *float_ref; + const float2 *float2_ref; + const float3 *float3_ref; + const float4 *float4_ref; + const float4x4 *float4x4_ref; + }; + + PushConstant() = default; + + PushConstant(int loc, const float &val) + : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){}; + PushConstant(int loc, const float2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){}; + PushConstant(int loc, const float3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){}; + PushConstant(int loc, const float4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){}; + + PushConstant(int loc, const int &val) + : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){}; + PushConstant(int loc, const int2 &val) + : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){}; + PushConstant(int loc, const int3 &val) + : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){}; + PushConstant(int loc, const int4 &val) + : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){}; + + PushConstant(int loc, const float *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){}; + PushConstant(int loc, const float2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){}; + PushConstant(int loc, const float3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){}; + PushConstant(int loc, const float4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){}; + PushConstant(int loc, const float4x4 *val) + : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){}; + + PushConstant(int loc, const int *val, int arr) + : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){}; + PushConstant(int loc, const int2 *val, int arr) + : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){}; + PushConstant(int loc, const int3 *val, int arr) + : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){}; + PushConstant(int loc, const int4 *val, int arr) + : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Draw { + GPUBatch *batch; + uint instance_len; + uint vertex_len; + uint vertex_first; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DrawMulti { + GPUBatch *batch; + DrawMultiBuf *multi_draw_buf; + uint group_first; + uint uuid; + + void execute(RecordingState &state) const; + std::string serialize(std::string line_prefix) const; +}; + +struct DrawIndirect { + GPUBatch *batch; + GPUStorageBuf **indirect_buf; + ResourceHandle handle; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Dispatch { + bool is_reference; + union { + int3 size; + int3 *size_ref; + }; + + Dispatch() = default; + + Dispatch(int3 group_len) : is_reference(false), size(group_len){}; + Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){}; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct DispatchIndirect { + GPUStorageBuf **indirect_buf; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct Barrier { + eGPUBarrier type; + + void execute() const; + std::string serialize() const; +}; + +struct Clear { + uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */ + uint8_t stencil; + float depth; + float4 color; + + void execute() const; + std::string serialize() const; +}; + +struct StateSet { + DRWState new_state; + + void execute(RecordingState &state) const; + std::string serialize() const; +}; + +struct StencilSet { + uint write_mask; + uint compare_mask; + uint reference; + + void execute() const; + std::string serialize() const; +}; + +union Undetermined { + ShaderBind shader_bind; + ResourceBind resource_bind; + PushConstant push_constant; + Draw draw; + DrawMulti draw_multi; + DrawIndirect draw_indirect; + Dispatch dispatch; + DispatchIndirect dispatch_indirect; + Barrier barrier; + Clear clear; + StateSet state_set; + StencilSet stencil_set; +}; + +/** Try to keep the command size as low as possible for performance. */ +BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.") + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Draw Commands + * + * A draw command buffer used to issue single draw commands without instance merging or any + * other optimizations. + * + * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands. + * + * \{ */ + +class DrawCommandBuf { + friend Manager; + + private: + using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>; + + /** Array of resource id. One per instance. Generated on GPU and send to GPU. */ + ResourceIdBuf resource_id_buf_; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear(){}; + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + vertex_first = vertex_first != -1 ? vertex_first : 0; + instance_len = instance_len != -1 ? instance_len : 1; + + int64_t index = commands.append_and_get_index({}); + headers.append({Type::Draw, static_cast<uint>(index)}); + commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle}; + } + + void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands); +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Multi Draw Commands + * + * For efficient rendering of large scene we strive to minimize the number of draw call and state + * changes. To this end, we group many rendering commands and sort them per render state using + * `DrawGroup` as a container. This is done automatically for any successive commands with the + * same state. + * + * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti` + * (Pipeline State). + * + * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect + * command. + * + * To support this arbitrary reordering, we only need to know the offset of all the commands for a + * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to + * GPU and used on CPU to issue the right command indirect. + * + * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the + * GPU. + * + * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or + * not depending on visibility) to the correct draw command using the offset of the `DrawGroup` + * computed on CPU. After that, it also outputs one resource ID for each instance inside a + * `DrawPrototype`. + * + * \{ */ + +class DrawMultiBuf { + friend Manager; + friend DrawMulti; + + private: + using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>; + using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>; + using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>; + using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>; + + using DrawGroupKey = std::pair<uint, GPUBatch *>; + using DrawGroupMap = Map<DrawGroupKey, uint>; + /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */ + DrawGroupMap group_ids_; + + /** DrawGroup Command heap. Uploaded to GPU for sorting. */ + DrawGroupBuf group_buf_ = {"DrawGroupBuf"}; + /** Command Prototypes. Unsorted */ + DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"}; + /** Command list generated by the sorting / compaction steps. Lives on GPU. */ + DrawCommandBuf command_buf_ = {"DrawCommandBuf"}; + /** Array of resource id. One per instance. Lives on GPU. */ + ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"}; + /** Give unique ID to each header so we can use that as hash key. */ + uint header_id_counter_ = 0; + /** Number of groups inside group_buf_. */ + uint group_count_ = 0; + /** Number of prototype command inside prototype_buf_. */ + uint prototype_count_ = 0; + /** Used items in the resource_id_buf_. Not it's allocated length. */ + uint resource_id_count_ = 0; + + public: + void clear() + { + header_id_counter_ = 0; + group_count_ = 0; + prototype_count_ = 0; + group_ids_.clear(); + } + + void append_draw(Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + GPUBatch *batch, + uint instance_len, + uint vertex_len, + uint vertex_first, + ResourceHandle handle) + { + /* Unsupported for now. Use PassSimple. */ + BLI_assert(vertex_first == 0 || vertex_first == -1); + BLI_assert(vertex_len == -1); + UNUSED_VARS_NDEBUG(vertex_len, vertex_first); + + instance_len = instance_len != -1 ? instance_len : 1; + + /* If there was some state changes since previous call, we have to create another command. */ + if (headers.is_empty() || headers.last().type != Type::DrawMulti) { + uint index = commands.append_and_get_index({}); + headers.append({Type::DrawMulti, index}); + commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++}; + } + + DrawMulti &cmd = commands.last().draw_multi; + + uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1); + + bool inverted = handle.has_inverted_handedness(); + + if (group_id == (uint)-1) { + uint new_group_id = group_count_++; + + DrawGroup &group = group_buf_.get_or_resize(new_group_id); + group.next = cmd.group_first; + group.len = instance_len; + group.front_facing_len = inverted ? 0 : instance_len; + group.gpu_batch = batch; + group.front_proto_len = 0; + group.back_proto_len = 0; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + /* Append to list. */ + cmd.group_first = new_group_id; + group_id = new_group_id; + } + else { + DrawGroup &group = group_buf_[group_id]; + group.len += instance_len; + group.front_facing_len += inverted ? 0 : instance_len; + /* For serialization only. */ + (inverted ? group.back_proto_len : group.front_proto_len)++; + } + + DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++); + draw.group_id = group_id; + draw.resource_handle = handle.raw; + draw.instance_len = instance_len; + } + + void bind(RecordingState &state, + Vector<Header, 0> &headers, + Vector<Undetermined, 0> &commands, + VisibilityBuf &visibility_buf); +}; + +/** \} */ + +}; // namespace blender::draw::command
\ No newline at end of file |