diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2019-05-31 02:45:41 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2019-09-13 18:32:18 +0300 |
commit | ce34a6b0d727bbde6ae373afa8ec6c42bc8980ce (patch) | |
tree | f8cc84f7e2038f2a81ac0141d79205f1df649e4e /source/blender/draw/intern/draw_manager.h | |
parent | f7e8b580989ec70d1cf8f15a11d4f09e6b36f407 (diff) |
DRW: Refactor to support draw call batching
Reviewers: brecht
Differential Revision: D4997
Diffstat (limited to 'source/blender/draw/intern/draw_manager.h')
-rw-r--r-- | source/blender/draw/intern/draw_manager.h | 320 |
1 files changed, 243 insertions, 77 deletions
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index 85f6cf05e83..b55a84b2765 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -28,8 +28,10 @@ #include "DRW_engine.h" #include "DRW_render.h" +#include "BLI_assert.h" #include "BLI_linklist.h" #include "BLI_threads.h" +#include "BLI_memblock.h" #include "GPU_batch.h" #include "GPU_context.h" @@ -43,6 +45,9 @@ /* Use draw manager to call GPU_select, see: DRW_draw_select_loop */ #define USE_GPU_SELECT +/* Use drawcall batching using instanced rendering. */ +#define USE_BATCHING 1 + // #define DRW_DEBUG_CULLING #define DRW_DEBUG_USE_UNIFORM_NAME 0 #define DRW_UNIFORM_BUFFER_NAME 64 @@ -90,20 +95,6 @@ * > DRWUniform */ -/* Used by DRWCallState.flag */ -enum { - DRW_CALL_NEGSCALE = (1 << 1), -}; - -/* Used by DRWCallState.matflag */ -enum { - DRW_CALL_MODELINVERSE = (1 << 0), - DRW_CALL_MODELVIEWPROJECTION = (1 << 1), - DRW_CALL_ORCOTEXFAC = (1 << 2), - DRW_CALL_OBJECTINFO = (1 << 3), - DRW_CALL_OBJECTCOLOR = (1 << 4), -}; - typedef struct DRWCullingState { uint32_t mask; /* Culling: Using Bounding Sphere for now for faster culling. @@ -113,38 +104,161 @@ typedef struct DRWCullingState { void *user_data; } DRWCullingState; -typedef struct DRWCallState { - DRWCullingState *culling; - uchar flag; - uchar matflag; /* Which matrices to compute. */ - short ob_index; - /* Matrices */ +/* Minimum max UBO size is 64KiB. We take the largest + * UBO struct and alloc the max number. + * ((1 << 16) / sizeof(DRWObjectMatrix)) = 512 + * Keep in sync with common_view_lib.glsl */ +#define DRW_RESOURCE_CHUNK_LEN 512 + +/** + * Identifier used to sort similar drawcalls together. + * Also used to reference elements inside memory blocks. + * + * From MSB to LSB + * 1 bit for negative scale. + * 22 bits for chunk id. + * 9 bits for resource id inside the chunk. (can go up to 511) + * |-|----------------------|---------| + * + * Use manual bitsift and mask instead of bitfields to avoid + * compiler dependant behavior that would mess the ordering of + * the members thus changing the sorting order. + */ +typedef uint32_t DRWResourceHandle; + +BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x80000000) != 0; +} + +BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x7FFFFFFF) >> 9; +} + +BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x000001FF); +} + +BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle) +{ + *handle += 1; +} + +BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle) +{ + *handle |= 0x80000000; +} + +BLI_INLINE void *DRW_memblock_elem_from_handle(struct BLI_memblock *memblock, + const DRWResourceHandle *handle) +{ + int elem = DRW_handle_id_get(handle); + int chunk = DRW_handle_chunk_get(handle); + return BLI_memblock_elem_get(memblock, chunk, elem); +} + +typedef struct DRWObjectMatrix { float model[4][4]; float modelinverse[4][4]; - float orcotexfac[2][3]; - float ob_random; +} DRWObjectMatrix; + +typedef struct DRWObjectInfos { + float orcotexfac[2][4]; float ob_color[4]; -} DRWCallState; + float ob_index; + float pad; /* UNUSED*/ + float ob_random; + float ob_neg_scale; +} DRWObjectInfos; + +BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16) +BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16) -typedef struct DRWCall { - struct DRWCall *next; - DRWCallState *state; +typedef enum { + /* Draw Commands */ + DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */ + DRW_CMD_DRAW_RANGE = 1, + DRW_CMD_DRAW_INSTANCE = 2, + DRW_CMD_DRAW_PROCEDURAL = 3, + /* Other Commands */ + DRW_CMD_CLEAR = 12, + DRW_CMD_DRWSTATE = 13, + DRW_CMD_STENCIL = 14, + DRW_CMD_SELECTID = 15, + /* Needs to fit in 4bits */ +} eDRWCommandType; + +#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL + +typedef struct DRWCommandDraw { + GPUBatch *batch; + DRWResourceHandle handle; +} DRWCommandDraw; +/* Assume DRWResourceHandle to be 0. */ +typedef struct DRWCommandDrawRange { GPUBatch *batch; uint vert_first; uint vert_count; +} DRWCommandDrawRange; + +typedef struct DRWCommandDrawInstance { + GPUBatch *batch; + DRWResourceHandle handle; uint inst_count; +} DRWCommandDrawInstance; -#ifdef USE_GPU_SELECT - /* TODO(fclem) remove once we have a dedicated selection engine. */ - int select_id; - GPUVertBuf *inst_selectid; -#endif -} DRWCall; +typedef struct DRWCommandDrawProcedural { + GPUBatch *batch; + DRWResourceHandle handle; + uint vert_count; +} DRWCommandDrawProcedural; + +typedef struct DRWCommandSetMutableState { + /** State changes (or'd or and'd with the pass's state) */ + DRWState enable; + DRWState disable; +} DRWCommandSetMutableState; + +typedef struct DRWCommandSetStencil { + uint mask; +} DRWCommandSetStencil; + +typedef struct DRWCommandSetSelectID { + GPUVertBuf *select_buf; + uint select_id; +} DRWCommandSetSelectID; + +typedef struct DRWCommandClear { + eGPUFrameBufferBits clear_channels; + uchar r, g, b, a; /* [0..1] for each channels. Normalized. */ + float depth; /* [0..1] for depth. Normalized. */ + uchar stencil; /* Stencil value [0..255] */ +} DRWCommandClear; + +typedef union DRWCommand { + DRWCommandDraw draw; + DRWCommandDrawRange range; + DRWCommandDrawInstance instance; + DRWCommandDrawProcedural procedural; + DRWCommandSetMutableState state; + DRWCommandSetStencil stencil; + DRWCommandSetSelectID select_id; + DRWCommandClear clear; +} DRWCommand; + +/* Used for agregating calls into GPUVertBufs. */ +struct DRWCallBuffer { + GPUVertBuf *buf; + GPUVertBuf *buf_select; + int count; +}; /* Used by DRWUniform.type */ typedef enum { - DRW_UNIFORM_INT, + DRW_UNIFORM_INT = 0, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT, DRW_UNIFORM_FLOAT_COPY, @@ -153,55 +267,56 @@ typedef enum { DRW_UNIFORM_TEXTURE_REF, DRW_UNIFORM_BLOCK, DRW_UNIFORM_BLOCK_PERSIST, + DRW_UNIFORM_TFEEDBACK_TARGET, + /** Per drawcall uniforms/UBO */ + DRW_UNIFORM_BLOCK_OBMATS, + DRW_UNIFORM_BLOCK_OBINFOS, + DRW_UNIFORM_RESOURCE_CHUNK, + /** Legacy / Fallback */ + DRW_UNIFORM_BASE_INSTANCE, + DRW_UNIFORM_MODEL_MATRIX, + DRW_UNIFORM_MODEL_MATRIX_INVERSE, + DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX, + /* WARNING: set DRWUniform->type + * bit length accordingly. */ } DRWUniformType; struct DRWUniform { - DRWUniform *next; /* single-linked list */ union { /* For reference or array/vector types. */ const void *pvalue; /* Single values. */ - float fvalue[2]; - int ivalue[2]; + float fvalue[4]; + int ivalue[4]; }; - int name_ofs; /* name offset in name buffer. */ int location; - char type; /* DRWUniformType */ - char length; /* cannot be more than 16 */ - char arraysize; /* cannot be more than 16 too */ + uint32_t type : 5; /* DRWUniformType */ + uint32_t length : 5; /* cannot be more than 16 */ + uint32_t arraysize : 5; /* cannot be more than 16 too */ + uint32_t name_ofs : 17; /* name offset in name buffer. */ }; struct DRWShadingGroup { DRWShadingGroup *next; - GPUShader *shader; /* Shader to bind */ - DRWUniform *uniforms; /* Uniforms pointers */ + GPUShader *shader; /* Shader to bind */ + struct DRWUniformChunk *uniforms; /* Uniforms pointers */ struct { - DRWCall *first, *last; /* Linked list of DRWCall */ - } calls; + /* Chunks of draw calls. */ + struct DRWCommandChunk *first, *last; + } cmd; - /** TODO Maybe remove from here */ - struct GPUVertBuf *tfeedback_target; - - /** State changes for this batch only (or'd with the pass's state) */ - DRWState state_extra; - /** State changes for this batch only (and'd with the pass's state) */ - DRWState state_extra_disable; - /** Stencil mask to use for stencil test / write operations */ - uint stencil_mask; - - /* Builtin matrices locations */ - int model; - int modelinverse; - int modelviewprojection; - int orcotexfac; - int callid; - int objectinfo; - int objectcolor; - uchar matflag; /* Matrices needed, same as DRWCall.flag */ - - DRWPass *pass_parent; /* backlink to pass we're in */ + union { + struct { + int objectinfo; /* Equal to 1 if the shader needs obinfos. */ + DRWResourceHandle pass_handle; /* Memblock key to parent pass. */ + }; + struct { + float distance; /* Distance from camera. */ + uint original_index; /* Original position inside the shgroup list. */ + } z_sorting; + }; }; #define MAX_PASS_NAME 32 @@ -213,6 +328,7 @@ struct DRWPass { DRWShadingGroup *last; } shgroups; + DRWResourceHandle handle; DRWState state; char name[MAX_PASS_NAME]; }; @@ -232,6 +348,8 @@ typedef struct DRWViewUboStorage { float viewcamtexcofac[4]; } DRWViewUboStorage; +BLI_STATIC_ASSERT_ALIGN(DRWViewUboStorage, 16) + #define MAX_CULLED_VIEWS 32 struct DRWView { @@ -253,13 +371,45 @@ struct DRWView { void *user_data; }; -/* TODO(fclem): Future awaits */ -#if 0 -typedef struct ModelUboStorage { - float model[4][4]; - float modelinverse[4][4]; -} ModelUboStorage; -#endif +/* ------------ Data Chunks --------------- */ +/** + * In order to keep a cache friendly data structure, + * we alloc most of our little data into chunks of multiple item. + * Iteration, allocation and memory usage are better. + * We loose a bit of memory by allocating more than what we need + * but it's counterbalanced by not needing the linked-list pointers + * for each item. + **/ + +typedef struct DRWUniformChunk { + struct DRWUniformChunk *next; /* single-linked list */ + uint32_t uniform_len; + uint32_t uniform_used; + DRWUniform uniforms[10]; +} DRWUniformChunk; + +typedef struct DRWCommandChunk { + struct DRWCommandChunk *next; + uint32_t command_len; + uint32_t command_used; + /* 4bits for each command. */ + uint64_t command_type[6]; + /* -- 64 bytes aligned -- */ + DRWCommand commands[96]; + /* -- 64 bytes aligned -- */ +} DRWCommandChunk; + +typedef struct DRWCommandSmallChunk { + struct DRWCommandChunk *next; + uint32_t command_len; + uint32_t command_used; + /* 4bits for each command. */ + /* TODO reduce size of command_type. */ + uint64_t command_type[6]; + DRWCommand commands[6]; +} DRWCommandSmallChunk; + +BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16); /* ------------- DRAW DEBUG ------------ */ @@ -280,21 +430,31 @@ typedef struct DRWDebugSphere { #define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */ #define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */ #define STENCIL_UNDEFINED 256 +#define DRW_DRAWLIST_LEN 256 typedef struct DRWManager { /* TODO clean up this struct a bit */ /* Cache generation */ ViewportMemoryPool *vmempool; DRWInstanceDataList *idatalist; - DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; - /* Default Unit model matrix state without culling. */ - DRWCallState *unit_state; /* State of the object being evaluated if already allocated. */ - DRWCallState *ob_state; + DRWResourceHandle ob_handle; + /** True if current DST.ob_state has its matching DRWObjectInfos init. */ + bool ob_state_obinfo_init; + /** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */ + DRWResourceHandle resource_handle; + /** Handle of next DRWPass to be allocated. */ + DRWResourceHandle pass_handle; + + /** Dupli state. NULL if not dupli. */ struct DupliObject *dupli_source; struct Object *dupli_parent; struct Object *dupli_origin; + /** Ghash containing original objects. */ struct GHash *dupli_ghash; - void **dupli_datas; /* Array of dupli_data (one for each enabled engine) to handle duplis. */ + /** TODO(fclem) try to remove usage of this. */ + DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; + /* Array of dupli_data (one for each enabled engine) to handle duplis. */ + void **dupli_datas; /* Rendering state */ GPUShader *shader; @@ -357,6 +517,8 @@ typedef struct DRWManager { /** Mutex to lock the drw manager and avoid concurrent context usage. */ TicketMutex *gl_context_mutex; + GPUDrawList *draw_list; + /** GPU Resource State: Memory storage between drawing. */ struct { /* High end GPUs supports up to 32 binds per shader stage. @@ -397,9 +559,13 @@ void drw_state_set(DRWState state); void drw_debug_draw(void); void drw_debug_init(void); +eDRWCommandType command_type_get(uint64_t *command_type_bits, int index); + void drw_batch_cache_validate(Object *ob); void drw_batch_cache_generate_requested(struct Object *ob); +void drw_resource_buffer_finish(ViewportMemoryPool *vmempool); + /* Procedural Drawing */ GPUBatch *drw_cache_procedural_points_get(void); GPUBatch *drw_cache_procedural_lines_get(void); |