Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2019-06-18 23:02:52 +0300
committerClément Foucault <foucault.clem@gmail.com>2019-08-17 15:48:48 +0300
commit15a4171e2dd75059222637ccba44db59733c735d (patch)
tree6da76dc2ff856f1a03eb155896d9b7dbab2687c1
parent0e6d17edfeb9ede447b5bbe9e84fbb4c48175ac9 (diff)
DRW: Add draw call sorting
This makes rendering lots of similar objects much faster (with lower CPU overhead). 29 fps -> 38 fps 34 ms -> 26 ms In my test case with 30K instances of 4 different meshes
-rw-r--r--source/blender/draw/intern/draw_manager.h7
-rw-r--r--source/blender/draw/intern/draw_manager_data.c40
2 files changed, 45 insertions, 2 deletions
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 9ccc6832127..54e35c7e5c4 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -313,7 +313,7 @@ typedef struct DRWCallChunk {
struct DRWCallChunk *next; /* single-linked list */
uchar chunk_len;
uchar call_used;
- DRWCall calls[63];
+ DRWCall calls[126];
} DRWCallChunk;
typedef struct DRWCallSmallChunk {
@@ -322,9 +322,12 @@ typedef struct DRWCallSmallChunk {
uchar call_used;
/* Small chunk to avoid wasting too much memory
* on small shading groups. */
- DRWCall calls[5];
+ DRWCall calls[4];
} DRWCallSmallChunk;
+BLI_STATIC_ASSERT_ALIGN(DRWCallChunk, 16);
+BLI_STATIC_ASSERT_ALIGN(DRWCallSmallChunk, 16);
+
/* ------------- DRAW DEBUG ------------ */
typedef struct DRWDebugLine {
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index 34c67f5c727..35d33917593 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -34,6 +34,7 @@
#include "DNA_mesh_types.h"
#include "DNA_meta_types.h"
+#include "BLI_alloca.h"
#include "BLI_hash.h"
#include "BLI_link_utils.h"
#include "BLI_mempool.h"
@@ -51,6 +52,35 @@
/** \name Uniform Buffer Object (DRW_uniformbuffer)
* \{ */
+static void draw_call_sort(DRWCall *array, DRWCall *array_tmp, int array_len)
+{
+ /* Count unique batches. Tt's not really important if
+ * there is colisions. If there is a lot of different batches,
+ * the sorting benefit will be negligeable. So at least
+ * sort fast! */
+ uchar idx[128] = {0};
+ /* Shift by 7 positions knowing each GPUBatch is > 64 bytes */
+#define KEY(a) ((((size_t)((a).batch)) >> 7) % ARRAY_SIZE(idx))
+ BLI_assert(array_len <= ARRAY_SIZE(idx));
+
+ for (int i = 0; i < array_len; i++) {
+ /* Early out if nothing to sort. */
+ if (++idx[KEY(array[i])] == array_len)
+ return;
+ }
+ /* Cumulate batch indices */
+ for (int i = 1; i < ARRAY_SIZE(idx); i++) {
+ idx[i] += idx[i - 1];
+ }
+ /* Traverse in reverse to not change the order of the resource ids. */
+ for (int src = array_len - 1; src >= 0; src--) {
+ array_tmp[--idx[KEY(array[src])]] = array[src];
+ }
+#undef KEY
+
+ memcpy(array, array_tmp, sizeof(DRWCallChunk) - offsetof(DRWCallChunk, calls));
+}
+
GPUUniformBuffer *DRW_uniformbuffer_create(int size, const void *data)
{
return GPU_uniformbuffer_create(size, data, NULL);
@@ -107,6 +137,16 @@ void drw_resource_buffer_finish(ViewportMemoryPool *vmempool)
GPU_uniformbuffer_update(vmempool->obinfos_ubo[i], data_infos);
}
}
+
+ /* Aligned alloc to avoid unaligned memcpy. */
+ DRWCallChunk *chunk_tmp = MEM_mallocN_aligned(sizeof(DRWCallChunk), 16, "tmp call chunk");
+ DRWCallChunk *chunk;
+ BLI_memblock_iter iter;
+ BLI_memblock_iternew(vmempool->calls, &iter);
+ while ((chunk = BLI_memblock_iterstep(&iter))) {
+ draw_call_sort(chunk->calls, chunk_tmp->calls, chunk->call_used);
+ }
+ MEM_freeN(chunk_tmp);
}
/** \} */