Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2019-05-31 02:45:41 +0300
committerClément Foucault <foucault.clem@gmail.com>2019-09-13 18:32:18 +0300
commitce34a6b0d727bbde6ae373afa8ec6c42bc8980ce (patch)
treef8cc84f7e2038f2a81ac0141d79205f1df649e4e /source/blender/draw/intern/draw_manager.h
parentf7e8b580989ec70d1cf8f15a11d4f09e6b36f407 (diff)
DRW: Refactor to support draw call batching
Reviewers: brecht Differential Revision: D4997
Diffstat (limited to 'source/blender/draw/intern/draw_manager.h')
-rw-r--r--source/blender/draw/intern/draw_manager.h320
1 files changed, 243 insertions, 77 deletions
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 85f6cf05e83..b55a84b2765 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -28,8 +28,10 @@
#include "DRW_engine.h"
#include "DRW_render.h"
+#include "BLI_assert.h"
#include "BLI_linklist.h"
#include "BLI_threads.h"
+#include "BLI_memblock.h"
#include "GPU_batch.h"
#include "GPU_context.h"
@@ -43,6 +45,9 @@
/* Use draw manager to call GPU_select, see: DRW_draw_select_loop */
#define USE_GPU_SELECT
+/* Use drawcall batching using instanced rendering. */
+#define USE_BATCHING 1
+
// #define DRW_DEBUG_CULLING
#define DRW_DEBUG_USE_UNIFORM_NAME 0
#define DRW_UNIFORM_BUFFER_NAME 64
@@ -90,20 +95,6 @@
* > DRWUniform
*/
-/* Used by DRWCallState.flag */
-enum {
- DRW_CALL_NEGSCALE = (1 << 1),
-};
-
-/* Used by DRWCallState.matflag */
-enum {
- DRW_CALL_MODELINVERSE = (1 << 0),
- DRW_CALL_MODELVIEWPROJECTION = (1 << 1),
- DRW_CALL_ORCOTEXFAC = (1 << 2),
- DRW_CALL_OBJECTINFO = (1 << 3),
- DRW_CALL_OBJECTCOLOR = (1 << 4),
-};
-
typedef struct DRWCullingState {
uint32_t mask;
/* Culling: Using Bounding Sphere for now for faster culling.
@@ -113,38 +104,161 @@ typedef struct DRWCullingState {
void *user_data;
} DRWCullingState;
-typedef struct DRWCallState {
- DRWCullingState *culling;
- uchar flag;
- uchar matflag; /* Which matrices to compute. */
- short ob_index;
- /* Matrices */
+/* Minimum max UBO size is 64KiB. We take the largest
+ * UBO struct and alloc the max number.
+ * ((1 << 16) / sizeof(DRWObjectMatrix)) = 512
+ * Keep in sync with common_view_lib.glsl */
+#define DRW_RESOURCE_CHUNK_LEN 512
+
+/**
+ * Identifier used to sort similar drawcalls together.
+ * Also used to reference elements inside memory blocks.
+ *
+ * From MSB to LSB
+ * 1 bit for negative scale.
+ * 22 bits for chunk id.
+ * 9 bits for resource id inside the chunk. (can go up to 511)
+ * |-|----------------------|---------|
+ *
+ * Use manual bitsift and mask instead of bitfields to avoid
+ * compiler dependant behavior that would mess the ordering of
+ * the members thus changing the sorting order.
+ */
+typedef uint32_t DRWResourceHandle;
+
+BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle)
+{
+ return (*handle & 0x80000000) != 0;
+}
+
+BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle)
+{
+ return (*handle & 0x7FFFFFFF) >> 9;
+}
+
+BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle)
+{
+ return (*handle & 0x000001FF);
+}
+
+BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle)
+{
+ *handle += 1;
+}
+
+BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle)
+{
+ *handle |= 0x80000000;
+}
+
+BLI_INLINE void *DRW_memblock_elem_from_handle(struct BLI_memblock *memblock,
+ const DRWResourceHandle *handle)
+{
+ int elem = DRW_handle_id_get(handle);
+ int chunk = DRW_handle_chunk_get(handle);
+ return BLI_memblock_elem_get(memblock, chunk, elem);
+}
+
+typedef struct DRWObjectMatrix {
float model[4][4];
float modelinverse[4][4];
- float orcotexfac[2][3];
- float ob_random;
+} DRWObjectMatrix;
+
+typedef struct DRWObjectInfos {
+ float orcotexfac[2][4];
float ob_color[4];
-} DRWCallState;
+ float ob_index;
+ float pad; /* UNUSED*/
+ float ob_random;
+ float ob_neg_scale;
+} DRWObjectInfos;
+
+BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16)
+BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16)
-typedef struct DRWCall {
- struct DRWCall *next;
- DRWCallState *state;
+typedef enum {
+ /* Draw Commands */
+ DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */
+ DRW_CMD_DRAW_RANGE = 1,
+ DRW_CMD_DRAW_INSTANCE = 2,
+ DRW_CMD_DRAW_PROCEDURAL = 3,
+ /* Other Commands */
+ DRW_CMD_CLEAR = 12,
+ DRW_CMD_DRWSTATE = 13,
+ DRW_CMD_STENCIL = 14,
+ DRW_CMD_SELECTID = 15,
+ /* Needs to fit in 4bits */
+} eDRWCommandType;
+
+#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL
+
+typedef struct DRWCommandDraw {
+ GPUBatch *batch;
+ DRWResourceHandle handle;
+} DRWCommandDraw;
+/* Assume DRWResourceHandle to be 0. */
+typedef struct DRWCommandDrawRange {
GPUBatch *batch;
uint vert_first;
uint vert_count;
+} DRWCommandDrawRange;
+
+typedef struct DRWCommandDrawInstance {
+ GPUBatch *batch;
+ DRWResourceHandle handle;
uint inst_count;
+} DRWCommandDrawInstance;
-#ifdef USE_GPU_SELECT
- /* TODO(fclem) remove once we have a dedicated selection engine. */
- int select_id;
- GPUVertBuf *inst_selectid;
-#endif
-} DRWCall;
+typedef struct DRWCommandDrawProcedural {
+ GPUBatch *batch;
+ DRWResourceHandle handle;
+ uint vert_count;
+} DRWCommandDrawProcedural;
+
+typedef struct DRWCommandSetMutableState {
+ /** State changes (or'd or and'd with the pass's state) */
+ DRWState enable;
+ DRWState disable;
+} DRWCommandSetMutableState;
+
+typedef struct DRWCommandSetStencil {
+ uint mask;
+} DRWCommandSetStencil;
+
+typedef struct DRWCommandSetSelectID {
+ GPUVertBuf *select_buf;
+ uint select_id;
+} DRWCommandSetSelectID;
+
+typedef struct DRWCommandClear {
+ eGPUFrameBufferBits clear_channels;
+ uchar r, g, b, a; /* [0..1] for each channels. Normalized. */
+ float depth; /* [0..1] for depth. Normalized. */
+ uchar stencil; /* Stencil value [0..255] */
+} DRWCommandClear;
+
+typedef union DRWCommand {
+ DRWCommandDraw draw;
+ DRWCommandDrawRange range;
+ DRWCommandDrawInstance instance;
+ DRWCommandDrawProcedural procedural;
+ DRWCommandSetMutableState state;
+ DRWCommandSetStencil stencil;
+ DRWCommandSetSelectID select_id;
+ DRWCommandClear clear;
+} DRWCommand;
+
+/* Used for agregating calls into GPUVertBufs. */
+struct DRWCallBuffer {
+ GPUVertBuf *buf;
+ GPUVertBuf *buf_select;
+ int count;
+};
/* Used by DRWUniform.type */
typedef enum {
- DRW_UNIFORM_INT,
+ DRW_UNIFORM_INT = 0,
DRW_UNIFORM_INT_COPY,
DRW_UNIFORM_FLOAT,
DRW_UNIFORM_FLOAT_COPY,
@@ -153,55 +267,56 @@ typedef enum {
DRW_UNIFORM_TEXTURE_REF,
DRW_UNIFORM_BLOCK,
DRW_UNIFORM_BLOCK_PERSIST,
+ DRW_UNIFORM_TFEEDBACK_TARGET,
+ /** Per drawcall uniforms/UBO */
+ DRW_UNIFORM_BLOCK_OBMATS,
+ DRW_UNIFORM_BLOCK_OBINFOS,
+ DRW_UNIFORM_RESOURCE_CHUNK,
+ /** Legacy / Fallback */
+ DRW_UNIFORM_BASE_INSTANCE,
+ DRW_UNIFORM_MODEL_MATRIX,
+ DRW_UNIFORM_MODEL_MATRIX_INVERSE,
+ DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX,
+ /* WARNING: set DRWUniform->type
+ * bit length accordingly. */
} DRWUniformType;
struct DRWUniform {
- DRWUniform *next; /* single-linked list */
union {
/* For reference or array/vector types. */
const void *pvalue;
/* Single values. */
- float fvalue[2];
- int ivalue[2];
+ float fvalue[4];
+ int ivalue[4];
};
- int name_ofs; /* name offset in name buffer. */
int location;
- char type; /* DRWUniformType */
- char length; /* cannot be more than 16 */
- char arraysize; /* cannot be more than 16 too */
+ uint32_t type : 5; /* DRWUniformType */
+ uint32_t length : 5; /* cannot be more than 16 */
+ uint32_t arraysize : 5; /* cannot be more than 16 too */
+ uint32_t name_ofs : 17; /* name offset in name buffer. */
};
struct DRWShadingGroup {
DRWShadingGroup *next;
- GPUShader *shader; /* Shader to bind */
- DRWUniform *uniforms; /* Uniforms pointers */
+ GPUShader *shader; /* Shader to bind */
+ struct DRWUniformChunk *uniforms; /* Uniforms pointers */
struct {
- DRWCall *first, *last; /* Linked list of DRWCall */
- } calls;
+ /* Chunks of draw calls. */
+ struct DRWCommandChunk *first, *last;
+ } cmd;
- /** TODO Maybe remove from here */
- struct GPUVertBuf *tfeedback_target;
-
- /** State changes for this batch only (or'd with the pass's state) */
- DRWState state_extra;
- /** State changes for this batch only (and'd with the pass's state) */
- DRWState state_extra_disable;
- /** Stencil mask to use for stencil test / write operations */
- uint stencil_mask;
-
- /* Builtin matrices locations */
- int model;
- int modelinverse;
- int modelviewprojection;
- int orcotexfac;
- int callid;
- int objectinfo;
- int objectcolor;
- uchar matflag; /* Matrices needed, same as DRWCall.flag */
-
- DRWPass *pass_parent; /* backlink to pass we're in */
+ union {
+ struct {
+ int objectinfo; /* Equal to 1 if the shader needs obinfos. */
+ DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
+ };
+ struct {
+ float distance; /* Distance from camera. */
+ uint original_index; /* Original position inside the shgroup list. */
+ } z_sorting;
+ };
};
#define MAX_PASS_NAME 32
@@ -213,6 +328,7 @@ struct DRWPass {
DRWShadingGroup *last;
} shgroups;
+ DRWResourceHandle handle;
DRWState state;
char name[MAX_PASS_NAME];
};
@@ -232,6 +348,8 @@ typedef struct DRWViewUboStorage {
float viewcamtexcofac[4];
} DRWViewUboStorage;
+BLI_STATIC_ASSERT_ALIGN(DRWViewUboStorage, 16)
+
#define MAX_CULLED_VIEWS 32
struct DRWView {
@@ -253,13 +371,45 @@ struct DRWView {
void *user_data;
};
-/* TODO(fclem): Future awaits */
-#if 0
-typedef struct ModelUboStorage {
- float model[4][4];
- float modelinverse[4][4];
-} ModelUboStorage;
-#endif
+/* ------------ Data Chunks --------------- */
+/**
+ * In order to keep a cache friendly data structure,
+ * we alloc most of our little data into chunks of multiple item.
+ * Iteration, allocation and memory usage are better.
+ * We loose a bit of memory by allocating more than what we need
+ * but it's counterbalanced by not needing the linked-list pointers
+ * for each item.
+ **/
+
+typedef struct DRWUniformChunk {
+ struct DRWUniformChunk *next; /* single-linked list */
+ uint32_t uniform_len;
+ uint32_t uniform_used;
+ DRWUniform uniforms[10];
+} DRWUniformChunk;
+
+typedef struct DRWCommandChunk {
+ struct DRWCommandChunk *next;
+ uint32_t command_len;
+ uint32_t command_used;
+ /* 4bits for each command. */
+ uint64_t command_type[6];
+ /* -- 64 bytes aligned -- */
+ DRWCommand commands[96];
+ /* -- 64 bytes aligned -- */
+} DRWCommandChunk;
+
+typedef struct DRWCommandSmallChunk {
+ struct DRWCommandChunk *next;
+ uint32_t command_len;
+ uint32_t command_used;
+ /* 4bits for each command. */
+ /* TODO reduce size of command_type. */
+ uint64_t command_type[6];
+ DRWCommand commands[6];
+} DRWCommandSmallChunk;
+
+BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16);
/* ------------- DRAW DEBUG ------------ */
@@ -280,21 +430,31 @@ typedef struct DRWDebugSphere {
#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */
#define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
#define STENCIL_UNDEFINED 256
+#define DRW_DRAWLIST_LEN 256
typedef struct DRWManager {
/* TODO clean up this struct a bit */
/* Cache generation */
ViewportMemoryPool *vmempool;
DRWInstanceDataList *idatalist;
- DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE];
- /* Default Unit model matrix state without culling. */
- DRWCallState *unit_state;
/* State of the object being evaluated if already allocated. */
- DRWCallState *ob_state;
+ DRWResourceHandle ob_handle;
+ /** True if current DST.ob_state has its matching DRWObjectInfos init. */
+ bool ob_state_obinfo_init;
+ /** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */
+ DRWResourceHandle resource_handle;
+ /** Handle of next DRWPass to be allocated. */
+ DRWResourceHandle pass_handle;
+
+ /** Dupli state. NULL if not dupli. */
struct DupliObject *dupli_source;
struct Object *dupli_parent;
struct Object *dupli_origin;
+ /** Ghash containing original objects. */
struct GHash *dupli_ghash;
- void **dupli_datas; /* Array of dupli_data (one for each enabled engine) to handle duplis. */
+ /** TODO(fclem) try to remove usage of this. */
+ DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE];
+ /* Array of dupli_data (one for each enabled engine) to handle duplis. */
+ void **dupli_datas;
/* Rendering state */
GPUShader *shader;
@@ -357,6 +517,8 @@ typedef struct DRWManager {
/** Mutex to lock the drw manager and avoid concurrent context usage. */
TicketMutex *gl_context_mutex;
+ GPUDrawList *draw_list;
+
/** GPU Resource State: Memory storage between drawing. */
struct {
/* High end GPUs supports up to 32 binds per shader stage.
@@ -397,9 +559,13 @@ void drw_state_set(DRWState state);
void drw_debug_draw(void);
void drw_debug_init(void);
+eDRWCommandType command_type_get(uint64_t *command_type_bits, int index);
+
void drw_batch_cache_validate(Object *ob);
void drw_batch_cache_generate_requested(struct Object *ob);
+void drw_resource_buffer_finish(ViewportMemoryPool *vmempool);
+
/* Procedural Drawing */
GPUBatch *drw_cache_procedural_points_get(void);
GPUBatch *drw_cache_procedural_lines_get(void);