From fd5c1972cd5c8a826c0d40effb0e2d367389666a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 13 Sep 2019 23:02:45 +0200 Subject: Revert "DRW: Refactor to support draw call batching" This reverts commit ce34a6b0d727bbde6ae373afa8ec6c42bc8980ce. --- source/blender/draw/intern/draw_manager.h | 320 +++++++----------------------- 1 file changed, 77 insertions(+), 243 deletions(-) (limited to 'source/blender/draw/intern/draw_manager.h') diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index b55a84b2765..85f6cf05e83 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -28,10 +28,8 @@ #include "DRW_engine.h" #include "DRW_render.h" -#include "BLI_assert.h" #include "BLI_linklist.h" #include "BLI_threads.h" -#include "BLI_memblock.h" #include "GPU_batch.h" #include "GPU_context.h" @@ -45,9 +43,6 @@ /* Use draw manager to call GPU_select, see: DRW_draw_select_loop */ #define USE_GPU_SELECT -/* Use drawcall batching using instanced rendering. */ -#define USE_BATCHING 1 - // #define DRW_DEBUG_CULLING #define DRW_DEBUG_USE_UNIFORM_NAME 0 #define DRW_UNIFORM_BUFFER_NAME 64 @@ -95,6 +90,20 @@ * > DRWUniform */ +/* Used by DRWCallState.flag */ +enum { + DRW_CALL_NEGSCALE = (1 << 1), +}; + +/* Used by DRWCallState.matflag */ +enum { + DRW_CALL_MODELINVERSE = (1 << 0), + DRW_CALL_MODELVIEWPROJECTION = (1 << 1), + DRW_CALL_ORCOTEXFAC = (1 << 2), + DRW_CALL_OBJECTINFO = (1 << 3), + DRW_CALL_OBJECTCOLOR = (1 << 4), +}; + typedef struct DRWCullingState { uint32_t mask; /* Culling: Using Bounding Sphere for now for faster culling. @@ -104,161 +113,38 @@ typedef struct DRWCullingState { void *user_data; } DRWCullingState; -/* Minimum max UBO size is 64KiB. We take the largest - * UBO struct and alloc the max number. - * ((1 << 16) / sizeof(DRWObjectMatrix)) = 512 - * Keep in sync with common_view_lib.glsl */ -#define DRW_RESOURCE_CHUNK_LEN 512 - -/** - * Identifier used to sort similar drawcalls together. - * Also used to reference elements inside memory blocks. - * - * From MSB to LSB - * 1 bit for negative scale. - * 22 bits for chunk id. - * 9 bits for resource id inside the chunk. (can go up to 511) - * |-|----------------------|---------| - * - * Use manual bitsift and mask instead of bitfields to avoid - * compiler dependant behavior that would mess the ordering of - * the members thus changing the sorting order. - */ -typedef uint32_t DRWResourceHandle; - -BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle) -{ - return (*handle & 0x80000000) != 0; -} - -BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle) -{ - return (*handle & 0x7FFFFFFF) >> 9; -} - -BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle) -{ - return (*handle & 0x000001FF); -} - -BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle) -{ - *handle += 1; -} - -BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle) -{ - *handle |= 0x80000000; -} - -BLI_INLINE void *DRW_memblock_elem_from_handle(struct BLI_memblock *memblock, - const DRWResourceHandle *handle) -{ - int elem = DRW_handle_id_get(handle); - int chunk = DRW_handle_chunk_get(handle); - return BLI_memblock_elem_get(memblock, chunk, elem); -} - -typedef struct DRWObjectMatrix { +typedef struct DRWCallState { + DRWCullingState *culling; + uchar flag; + uchar matflag; /* Which matrices to compute. */ + short ob_index; + /* Matrices */ float model[4][4]; float modelinverse[4][4]; -} DRWObjectMatrix; - -typedef struct DRWObjectInfos { - float orcotexfac[2][4]; - float ob_color[4]; - float ob_index; - float pad; /* UNUSED*/ + float orcotexfac[2][3]; float ob_random; - float ob_neg_scale; -} DRWObjectInfos; - -BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16) -BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16) + float ob_color[4]; +} DRWCallState; -typedef enum { - /* Draw Commands */ - DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */ - DRW_CMD_DRAW_RANGE = 1, - DRW_CMD_DRAW_INSTANCE = 2, - DRW_CMD_DRAW_PROCEDURAL = 3, - /* Other Commands */ - DRW_CMD_CLEAR = 12, - DRW_CMD_DRWSTATE = 13, - DRW_CMD_STENCIL = 14, - DRW_CMD_SELECTID = 15, - /* Needs to fit in 4bits */ -} eDRWCommandType; - -#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL - -typedef struct DRWCommandDraw { - GPUBatch *batch; - DRWResourceHandle handle; -} DRWCommandDraw; +typedef struct DRWCall { + struct DRWCall *next; + DRWCallState *state; -/* Assume DRWResourceHandle to be 0. */ -typedef struct DRWCommandDrawRange { GPUBatch *batch; uint vert_first; uint vert_count; -} DRWCommandDrawRange; - -typedef struct DRWCommandDrawInstance { - GPUBatch *batch; - DRWResourceHandle handle; uint inst_count; -} DRWCommandDrawInstance; - -typedef struct DRWCommandDrawProcedural { - GPUBatch *batch; - DRWResourceHandle handle; - uint vert_count; -} DRWCommandDrawProcedural; -typedef struct DRWCommandSetMutableState { - /** State changes (or'd or and'd with the pass's state) */ - DRWState enable; - DRWState disable; -} DRWCommandSetMutableState; - -typedef struct DRWCommandSetStencil { - uint mask; -} DRWCommandSetStencil; - -typedef struct DRWCommandSetSelectID { - GPUVertBuf *select_buf; - uint select_id; -} DRWCommandSetSelectID; - -typedef struct DRWCommandClear { - eGPUFrameBufferBits clear_channels; - uchar r, g, b, a; /* [0..1] for each channels. Normalized. */ - float depth; /* [0..1] for depth. Normalized. */ - uchar stencil; /* Stencil value [0..255] */ -} DRWCommandClear; - -typedef union DRWCommand { - DRWCommandDraw draw; - DRWCommandDrawRange range; - DRWCommandDrawInstance instance; - DRWCommandDrawProcedural procedural; - DRWCommandSetMutableState state; - DRWCommandSetStencil stencil; - DRWCommandSetSelectID select_id; - DRWCommandClear clear; -} DRWCommand; - -/* Used for agregating calls into GPUVertBufs. */ -struct DRWCallBuffer { - GPUVertBuf *buf; - GPUVertBuf *buf_select; - int count; -}; +#ifdef USE_GPU_SELECT + /* TODO(fclem) remove once we have a dedicated selection engine. */ + int select_id; + GPUVertBuf *inst_selectid; +#endif +} DRWCall; /* Used by DRWUniform.type */ typedef enum { - DRW_UNIFORM_INT = 0, + DRW_UNIFORM_INT, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT, DRW_UNIFORM_FLOAT_COPY, @@ -267,56 +153,55 @@ typedef enum { DRW_UNIFORM_TEXTURE_REF, DRW_UNIFORM_BLOCK, DRW_UNIFORM_BLOCK_PERSIST, - DRW_UNIFORM_TFEEDBACK_TARGET, - /** Per drawcall uniforms/UBO */ - DRW_UNIFORM_BLOCK_OBMATS, - DRW_UNIFORM_BLOCK_OBINFOS, - DRW_UNIFORM_RESOURCE_CHUNK, - /** Legacy / Fallback */ - DRW_UNIFORM_BASE_INSTANCE, - DRW_UNIFORM_MODEL_MATRIX, - DRW_UNIFORM_MODEL_MATRIX_INVERSE, - DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX, - /* WARNING: set DRWUniform->type - * bit length accordingly. */ } DRWUniformType; struct DRWUniform { + DRWUniform *next; /* single-linked list */ union { /* For reference or array/vector types. */ const void *pvalue; /* Single values. */ - float fvalue[4]; - int ivalue[4]; + float fvalue[2]; + int ivalue[2]; }; + int name_ofs; /* name offset in name buffer. */ int location; - uint32_t type : 5; /* DRWUniformType */ - uint32_t length : 5; /* cannot be more than 16 */ - uint32_t arraysize : 5; /* cannot be more than 16 too */ - uint32_t name_ofs : 17; /* name offset in name buffer. */ + char type; /* DRWUniformType */ + char length; /* cannot be more than 16 */ + char arraysize; /* cannot be more than 16 too */ }; struct DRWShadingGroup { DRWShadingGroup *next; - GPUShader *shader; /* Shader to bind */ - struct DRWUniformChunk *uniforms; /* Uniforms pointers */ + GPUShader *shader; /* Shader to bind */ + DRWUniform *uniforms; /* Uniforms pointers */ struct { - /* Chunks of draw calls. */ - struct DRWCommandChunk *first, *last; - } cmd; + DRWCall *first, *last; /* Linked list of DRWCall */ + } calls; - union { - struct { - int objectinfo; /* Equal to 1 if the shader needs obinfos. */ - DRWResourceHandle pass_handle; /* Memblock key to parent pass. */ - }; - struct { - float distance; /* Distance from camera. */ - uint original_index; /* Original position inside the shgroup list. */ - } z_sorting; - }; + /** TODO Maybe remove from here */ + struct GPUVertBuf *tfeedback_target; + + /** State changes for this batch only (or'd with the pass's state) */ + DRWState state_extra; + /** State changes for this batch only (and'd with the pass's state) */ + DRWState state_extra_disable; + /** Stencil mask to use for stencil test / write operations */ + uint stencil_mask; + + /* Builtin matrices locations */ + int model; + int modelinverse; + int modelviewprojection; + int orcotexfac; + int callid; + int objectinfo; + int objectcolor; + uchar matflag; /* Matrices needed, same as DRWCall.flag */ + + DRWPass *pass_parent; /* backlink to pass we're in */ }; #define MAX_PASS_NAME 32 @@ -328,7 +213,6 @@ struct DRWPass { DRWShadingGroup *last; } shgroups; - DRWResourceHandle handle; DRWState state; char name[MAX_PASS_NAME]; }; @@ -348,8 +232,6 @@ typedef struct DRWViewUboStorage { float viewcamtexcofac[4]; } DRWViewUboStorage; -BLI_STATIC_ASSERT_ALIGN(DRWViewUboStorage, 16) - #define MAX_CULLED_VIEWS 32 struct DRWView { @@ -371,45 +253,13 @@ struct DRWView { void *user_data; }; -/* ------------ Data Chunks --------------- */ -/** - * In order to keep a cache friendly data structure, - * we alloc most of our little data into chunks of multiple item. - * Iteration, allocation and memory usage are better. - * We loose a bit of memory by allocating more than what we need - * but it's counterbalanced by not needing the linked-list pointers - * for each item. - **/ - -typedef struct DRWUniformChunk { - struct DRWUniformChunk *next; /* single-linked list */ - uint32_t uniform_len; - uint32_t uniform_used; - DRWUniform uniforms[10]; -} DRWUniformChunk; - -typedef struct DRWCommandChunk { - struct DRWCommandChunk *next; - uint32_t command_len; - uint32_t command_used; - /* 4bits for each command. */ - uint64_t command_type[6]; - /* -- 64 bytes aligned -- */ - DRWCommand commands[96]; - /* -- 64 bytes aligned -- */ -} DRWCommandChunk; - -typedef struct DRWCommandSmallChunk { - struct DRWCommandChunk *next; - uint32_t command_len; - uint32_t command_used; - /* 4bits for each command. */ - /* TODO reduce size of command_type. */ - uint64_t command_type[6]; - DRWCommand commands[6]; -} DRWCommandSmallChunk; - -BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16); +/* TODO(fclem): Future awaits */ +#if 0 +typedef struct ModelUboStorage { + float model[4][4]; + float modelinverse[4][4]; +} ModelUboStorage; +#endif /* ------------- DRAW DEBUG ------------ */ @@ -430,31 +280,21 @@ typedef struct DRWDebugSphere { #define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */ #define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */ #define STENCIL_UNDEFINED 256 -#define DRW_DRAWLIST_LEN 256 typedef struct DRWManager { /* TODO clean up this struct a bit */ /* Cache generation */ ViewportMemoryPool *vmempool; DRWInstanceDataList *idatalist; + DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; + /* Default Unit model matrix state without culling. */ + DRWCallState *unit_state; /* State of the object being evaluated if already allocated. */ - DRWResourceHandle ob_handle; - /** True if current DST.ob_state has its matching DRWObjectInfos init. */ - bool ob_state_obinfo_init; - /** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */ - DRWResourceHandle resource_handle; - /** Handle of next DRWPass to be allocated. */ - DRWResourceHandle pass_handle; - - /** Dupli state. NULL if not dupli. */ + DRWCallState *ob_state; struct DupliObject *dupli_source; struct Object *dupli_parent; struct Object *dupli_origin; - /** Ghash containing original objects. */ struct GHash *dupli_ghash; - /** TODO(fclem) try to remove usage of this. */ - DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; - /* Array of dupli_data (one for each enabled engine) to handle duplis. */ - void **dupli_datas; + void **dupli_datas; /* Array of dupli_data (one for each enabled engine) to handle duplis. */ /* Rendering state */ GPUShader *shader; @@ -517,8 +357,6 @@ typedef struct DRWManager { /** Mutex to lock the drw manager and avoid concurrent context usage. */ TicketMutex *gl_context_mutex; - GPUDrawList *draw_list; - /** GPU Resource State: Memory storage between drawing. */ struct { /* High end GPUs supports up to 32 binds per shader stage. @@ -559,13 +397,9 @@ void drw_state_set(DRWState state); void drw_debug_draw(void); void drw_debug_init(void); -eDRWCommandType command_type_get(uint64_t *command_type_bits, int index); - void drw_batch_cache_validate(Object *ob); void drw_batch_cache_generate_requested(struct Object *ob); -void drw_resource_buffer_finish(ViewportMemoryPool *vmempool); - /* Procedural Drawing */ GPUBatch *drw_cache_procedural_points_get(void); GPUBatch *drw_cache_procedural_lines_get(void); -- cgit v1.2.3