/* SPDX-License-Identifier: GPL-2.0-or-later * Copyright 2022 Blender Foundation. */ #pragma once /** \file * \ingroup draw * * Commands stored inside draw passes. Converted into GPU commands upon pass submission. * * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or * `DrawMultiBuf`. See implementation details at their definition. */ #include "BKE_global.h" #include "BLI_map.hh" #include "DRW_gpu_wrapper.hh" #include "draw_command_shared.hh" #include "draw_handle.hh" #include "draw_state.h" #include "draw_view.hh" namespace blender::draw::command { class DrawCommandBuf; class DrawMultiBuf; /* -------------------------------------------------------------------- */ /** \name Recording State * \{ */ /** * Command recording state. * Keep track of several states and avoid redundant state changes. */ struct RecordingState { GPUShader *shader = nullptr; bool front_facing = true; bool inverted_view = false; DRWState pipeline_state = DRW_STATE_NO_DRAW; int clip_plane_count = 0; /** Used for gl_BaseInstance workaround. */ GPUStorageBuf *resource_id_buf = nullptr; void front_facing_set(bool facing) { /* Facing is inverted if view is not in expected handedness. */ facing = this->inverted_view == facing; /* Remove redundant changes. */ if (assign_if_different(this->front_facing, facing)) { GPU_front_facing(!facing); } } void cleanup() { if (front_facing == false) { GPU_front_facing(false); } if (G.debug & G_DEBUG_GPU) { GPU_storagebuf_unbind_all(); GPU_texture_image_unbind_all(); GPU_texture_unbind_all(); GPU_uniformbuf_unbind_all(); } } }; /** \} */ /* -------------------------------------------------------------------- */ /** \name Regular Commands * \{ */ enum class Type : uint8_t { /** * None Type commands are either uninitialized or are repurposed as data storage. * They are skipped during submission. */ None = 0, /** Commands stored as Undetermined in regular command buffer. */ Barrier, Clear, Dispatch, DispatchIndirect, Draw, DrawIndirect, FramebufferBind, PushConstant, ResourceBind, ShaderBind, StateSet, StencilSet, /** Special commands stored in separate buffers. */ SubPass, DrawMulti, }; /** * The index of the group is implicit since it is known by the one who want to * access it. This also allows to have an indexed object to split the command * stream. */ struct Header { /** Command type. */ Type type; /** Command index in command heap of this type. */ uint index; }; struct ShaderBind { GPUShader *shader; void execute(RecordingState &state) const; std::string serialize() const; }; struct FramebufferBind { GPUFrameBuffer *framebuffer; void execute() const; std::string serialize() const; }; struct ResourceBind { eGPUSamplerState sampler; int slot; bool is_reference; enum class Type : uint8_t { Sampler = 0, Image, UniformBuf, StorageBuf, } type; union { /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more * debug info. */ GPUUniformBuf *uniform_buf; GPUUniformBuf **uniform_buf_ref; GPUStorageBuf *storage_buf; GPUStorageBuf **storage_buf_ref; /** NOTE: Texture is used for both Sampler and Image binds. */ GPUTexture *texture; GPUTexture **texture_ref; }; ResourceBind() = default; ResourceBind(int slot_, GPUUniformBuf *res) : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){}; ResourceBind(int slot_, GPUUniformBuf **res) : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){}; ResourceBind(int slot_, GPUStorageBuf *res) : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){}; ResourceBind(int slot_, GPUStorageBuf **res) : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){}; ResourceBind(int slot_, draw::Image *res) : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){}; ResourceBind(int slot_, draw::Image **res) : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){}; ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state) : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){}; ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state) : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){}; void execute() const; std::string serialize() const; }; struct PushConstant { int location; uint8_t array_len; uint8_t comp_len; enum class Type : uint8_t { IntValue = 0, FloatValue, IntReference, FloatReference, } type; /** * IMPORTANT: Data is at the end of the struct as it can span over the next commands. * These next commands are not real commands but just memory to hold the data and are not * referenced by any Command::Header. * This is a hack to support float4x4 copy. */ union { int int1_value; int2 int2_value; int3 int3_value; int4 int4_value; float float1_value; float2 float2_value; float3 float3_value; float4 float4_value; const int *int_ref; const int2 *int2_ref; const int3 *int3_ref; const int4 *int4_ref; const float *float_ref; const float2 *float2_ref; const float3 *float3_ref; const float4 *float4_ref; const float4x4 *float4x4_ref; }; PushConstant() = default; PushConstant(int loc, const float &val) : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){}; PushConstant(int loc, const float2 &val) : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){}; PushConstant(int loc, const float3 &val) : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){}; PushConstant(int loc, const float4 &val) : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){}; PushConstant(int loc, const int &val) : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){}; PushConstant(int loc, const int2 &val) : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){}; PushConstant(int loc, const int3 &val) : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){}; PushConstant(int loc, const int4 &val) : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){}; PushConstant(int loc, const float *val, int arr) : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){}; PushConstant(int loc, const float2 *val, int arr) : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){}; PushConstant(int loc, const float3 *val, int arr) : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){}; PushConstant(int loc, const float4 *val, int arr) : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){}; PushConstant(int loc, const float4x4 *val) : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){}; PushConstant(int loc, const int *val, int arr) : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){}; PushConstant(int loc, const int2 *val, int arr) : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){}; PushConstant(int loc, const int3 *val, int arr) : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){}; PushConstant(int loc, const int4 *val, int arr) : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){}; void execute(RecordingState &state) const; std::string serialize() const; }; struct Draw { GPUBatch *batch; uint instance_len; uint vertex_len; uint vertex_first; ResourceHandle handle; void execute(RecordingState &state) const; std::string serialize() const; }; struct DrawMulti { GPUBatch *batch; DrawMultiBuf *multi_draw_buf; uint group_first; uint uuid; void execute(RecordingState &state) const; std::string serialize(std::string line_prefix) const; }; struct DrawIndirect { GPUBatch *batch; GPUStorageBuf **indirect_buf; ResourceHandle handle; void execute(RecordingState &state) const; std::string serialize() const; }; struct Dispatch { bool is_reference; union { int3 size; int3 *size_ref; }; Dispatch() = default; Dispatch(int3 group_len) : is_reference(false), size(group_len){}; Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){}; void execute(RecordingState &state) const; std::string serialize() const; }; struct DispatchIndirect { GPUStorageBuf **indirect_buf; void execute(RecordingState &state) const; std::string serialize() const; }; struct Barrier { eGPUBarrier type; void execute() const; std::string serialize() const; }; struct Clear { uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */ uint8_t stencil; float depth; float4 color; void execute() const; std::string serialize() const; }; struct StateSet { DRWState new_state; int clip_plane_count; void execute(RecordingState &state) const; std::string serialize() const; }; struct StencilSet { uint write_mask; uint compare_mask; uint reference; void execute() const; std::string serialize() const; }; union Undetermined { ShaderBind shader_bind; ResourceBind resource_bind; PushConstant push_constant; Draw draw; DrawMulti draw_multi; DrawIndirect draw_indirect; Dispatch dispatch; DispatchIndirect dispatch_indirect; Barrier barrier; Clear clear; StateSet state_set; StencilSet stencil_set; }; /** Try to keep the command size as low as possible for performance. */ BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.") /** \} */ /* -------------------------------------------------------------------- */ /** \name Draw Commands * * A draw command buffer used to issue single draw commands without instance merging or any * other optimizations. * * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands. * * \{ */ class DrawCommandBuf { friend Manager; private: using ResourceIdBuf = StorageArrayBuffer; /** Array of resource id. One per instance. Generated on GPU and send to GPU. */ ResourceIdBuf resource_id_buf_; /** Used items in the resource_id_buf_. Not it's allocated length. */ uint resource_id_count_ = 0; public: void clear(){}; void append_draw(Vector &headers, Vector &commands, GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle) { vertex_first = vertex_first != -1 ? vertex_first : 0; instance_len = instance_len != -1 ? instance_len : 1; int64_t index = commands.append_and_get_index({}); headers.append({Type::Draw, uint(index)}); commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle}; } void bind(RecordingState &state, Vector &headers, Vector &commands); }; /** \} */ /* -------------------------------------------------------------------- */ /** \name Multi Draw Commands * * For efficient rendering of large scene we strive to minimize the number of draw call and state * changes. To this end, we group many rendering commands and sort them per render state using * `DrawGroup` as a container. This is done automatically for any successive commands with the * same state. * * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti` * (Pipeline State). * * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect * command. * * To support this arbitrary reordering, we only need to know the offset of all the commands for a * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to * GPU and used on CPU to issue the right command indirect. * * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the * GPU. * * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or * not depending on visibility) to the correct draw command using the offset of the `DrawGroup` * computed on CPU. After that, it also outputs one resource ID for each instance inside a * `DrawPrototype`. * * \{ */ class DrawMultiBuf { friend Manager; friend DrawMulti; private: using DrawGroupBuf = StorageArrayBuffer; using DrawPrototypeBuf = StorageArrayBuffer; using DrawCommandBuf = StorageArrayBuffer; using ResourceIdBuf = StorageArrayBuffer; using DrawGroupKey = std::pair; using DrawGroupMap = Map; /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */ DrawGroupMap group_ids_; /** DrawGroup Command heap. Uploaded to GPU for sorting. */ DrawGroupBuf group_buf_ = {"DrawGroupBuf"}; /** Command Prototypes. Unsorted */ DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"}; /** Command list generated by the sorting / compaction steps. Lives on GPU. */ DrawCommandBuf command_buf_ = {"DrawCommandBuf"}; /** Array of resource id. One per instance. Lives on GPU. */ ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"}; /** Give unique ID to each header so we can use that as hash key. */ uint header_id_counter_ = 0; /** Number of groups inside group_buf_. */ uint group_count_ = 0; /** Number of prototype command inside prototype_buf_. */ uint prototype_count_ = 0; /** Used items in the resource_id_buf_. Not it's allocated length. */ uint resource_id_count_ = 0; public: void clear() { header_id_counter_ = 0; group_count_ = 0; prototype_count_ = 0; group_ids_.clear(); } void append_draw(Vector &headers, Vector &commands, GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle) { /* Custom draw-calls cannot be batched and will produce one group per draw. */ const bool custom_group = ((vertex_first != 0 && vertex_first != -1) || vertex_len != -1); instance_len = instance_len != -1 ? instance_len : 1; /* If there was some state changes since previous call, we have to create another command. */ if (headers.is_empty() || headers.last().type != Type::DrawMulti) { uint index = commands.append_and_get_index({}); headers.append({Type::DrawMulti, index}); commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++}; } DrawMulti &cmd = commands.last().draw_multi; uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), uint(-1)); bool inverted = handle.has_inverted_handedness(); DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++); draw.resource_handle = handle.raw; draw.instance_len = instance_len; draw.group_id = group_id; if (group_id == uint(-1) || custom_group) { uint new_group_id = group_count_++; draw.group_id = new_group_id; DrawGroup &group = group_buf_.get_or_resize(new_group_id); group.next = cmd.group_first; group.len = instance_len; group.front_facing_len = inverted ? 0 : instance_len; group.gpu_batch = batch; group.front_proto_len = 0; group.back_proto_len = 0; group.vertex_len = vertex_len; group.vertex_first = vertex_first; /* Custom group are not to be registered in the group_ids_. */ if (!custom_group) { group_id = new_group_id; } /* For serialization only. */ (inverted ? group.back_proto_len : group.front_proto_len)++; /* Append to list. */ cmd.group_first = new_group_id; } else { DrawGroup &group = group_buf_[group_id]; group.len += instance_len; group.front_facing_len += inverted ? 0 : instance_len; /* For serialization only. */ (inverted ? group.back_proto_len : group.front_proto_len)++; } } void bind(RecordingState &state, Vector &headers, Vector &commands, VisibilityBuf &visibility_buf); }; /** \} */ }; // namespace blender::draw::command