From 119846a6bb366f5ae13db18d965083d7927ff11e Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sun, 4 Jun 2017 23:04:47 +0200 Subject: Mikktspace: Speed up the merging of identical vertices Previously, Mikktspace just bucketed the vertices based on one spatial coordinate and then ran full pairwise comparisons inside each bucket. However, since models are three-dimensional, the bucketing has a massive false-positive rate, and since pairwise comparison is O(n^2), the merging process is very slow. But, since we only care about exactly identical vertices, there is a much more efficient approach - we can just hash all values belonging to each vertex and form buckets based on the hash. Since the hash has 32 bits and considers all values, false-positives are very unlikely - and since both hashing and the radixsort that's used for bucketing are O(n), both asymptotical and real-world performance (as well as code complexity) are significantly improved. --- intern/mikktspace/mikktspace.c | 363 +++++++++++------------------------------ 1 file changed, 95 insertions(+), 268 deletions(-) (limited to 'intern/mikktspace') diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c index f832b356ffe..ebf699c2428 100644 --- a/intern/mikktspace/mikktspace.c +++ b/intern/mikktspace/mikktspace.c @@ -447,305 +447,132 @@ typedef struct { int index; } STmpVert; -static const int g_iCells = 2048; -static const float g_iCells_fl = 2048.0f; +static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); -#ifdef _MSC_VER -# define NOINLINE __declspec(noinline) -#else -# define NOINLINE __attribute__ ((noinline)) -#endif +typedef unsigned int uint; -// it is IMPORTANT that this function is called to evaluate the hash since -// inlining could potentially reorder instructions and generate different -// results for the same effective input value fVal. -static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) +static uint float_as_uint(const float v) { - const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin)); - const int iIndex = (int)fIndex; - return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); + return *((uint*)(&v)); } -static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in); -static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries); -static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +#define HASH(x, y, z) (((x) * 73856093) ^ ((y) * 19349663) ^ ((z) * 83492791)) +#define HASH_F(x, y, z) HASH(float_as_uint(x), float_as_uint(y), float_as_uint(z)) -static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +/* Sort comp and data based on comp. + * comp2 and data2 are used as temporary storage. */ +static void radixsort_pair(uint *comp, int *data, uint *comp2, int *data2, int n) { + int shift = 0; + for(int pass = 0; pass < 4; pass++, shift+=8) { + int bins[257] = {0}; + /* Count number of elements per bin. */ + for(int i = 0; i < n; i++) { + bins[((comp[i] >> shift) & 0xff) + 1]++; + } + /* Compute prefix sum to find position of each bin in the sorted array. */ + for(int i = 2; i < 256; i++) { + bins[i] += bins[i-1]; + } + /* Insert the elements in their correct location based on their bin. */ + for(int i = 0; i < n; i++) { + int pos = bins[(comp[i] >> shift) & 0xff]++; + comp2[pos] = comp[i]; + data2[pos] = data[i]; + } - // Generate bounding box - int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL; - STmpVert * pTmpVert = NULL; - int i=0, iChannel=0, k=0, e=0; - int iMaxCount=0; - SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; - float fMin, fMax; - for (i=1; i<(iNrTrianglesIn*3); i++) - { - const int index = piTriList_in_and_out[i]; - - const SVec3 vP = GetPosition(pContext, index); - if (vMin.x > vP.x) vMin.x = vP.x; - else if (vMax.x < vP.x) vMax.x = vP.x; - if (vMin.y > vP.y) vMin.y = vP.y; - else if (vMax.y < vP.y) vMax.y = vP.y; - if (vMin.z > vP.z) vMin.z = vP.z; - else if (vMax.z < vP.z) vMax.z = vP.z; + /* Swap arrays. */ + int *tmpdata = data; data = data2; data2 = tmpdata; + uint *tmpcomp = comp; comp = comp2; comp2 = tmpcomp; } +} - vDim = vsub(vMax,vMin); - iChannel = 0; - fMin = vMin.x; fMax=vMax.x; - if (vDim.y>vDim.x && vDim.y>vDim.z) - { - iChannel=1; - fMin = vMin.y; - fMax = vMax.y; - } - else if (vDim.z>vDim.x) - { - iChannel=2; - fMin = vMin.z; - fMax = vMax.z; - } +/* Merge identical vertices. + * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 values. + * Then, by sorting based on that hash, identical elements (having identical hashes) will be moved next to each other. + * Since there might be hash collisions, the elements of each block are then compared with each other and duplicates + * are merged. + */ +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + int numVertices = iNrTrianglesIn*3; - // make allocations - piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn); - piHashCount = (int *) malloc(sizeof(int)*g_iCells); - piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); - piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); + uint *hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *indices = (int*) malloc(sizeof(int)*numVertices); + uint *temp_hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *temp_indices = (int*) malloc(sizeof(int)*numVertices); + + if(hashes == NULL || indices == NULL || temp_hashes == NULL || temp_indices == NULL) { + free(hashes); + free(indices); + free(temp_hashes); + free(temp_indices); - if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) - { - if (piHashTable!=NULL) free(piHashTable); - if (piHashCount!=NULL) free(piHashCount); - if (piHashOffsets!=NULL) free(piHashOffsets); - if (piHashCount2!=NULL) free(piHashCount2); GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); return; } - memset(piHashCount, 0, sizeof(int)*g_iCells); - memset(piHashCount2, 0, sizeof(int)*g_iCells); - // count amount of elements in each cell unit - for (i=0; i<(iNrTrianglesIn*3); i++) - { + for (int i = 0; i < numVertices; i++) { const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); - const int iCell = FindGridCell(fMin, fMax, fVal); - ++piHashCount[iCell]; - } - // evaluate start index of each cell. - piHashOffsets[0]=0; - for (k=1; kpTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; - if (fvMax[c]dx && dy>dz) channel=1; - else if (dz>dx) channel=2; - - fSep = 0.5f*(fvMax[channel]+fvMin[channel]); - // stop if all vertices are NaNs - if (!isfinite(fSep)) - return; - - // terminate recursion when the separation/average value - // is no longer strictly between fMin and fMax values. - if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) - { - // complete the weld - for (l=iL_in; l<=iR_in; l++) - { - int i = pTmpVert[l].index; - const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const SVec3 vN = GetNormal(pContext, index); - const SVec3 vT = GetTexCoord(pContext, index); - - tbool bNotFound = TTRUE; - int l2=iL_in, i2rec=-1; - while (l20); // at least 2 entries - - // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] - while (iL < iR) - { - tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; - while ((!bReadyLeftSwap) && iL=iL_in && iL<=iR_in); - bReadyLeftSwap = !(pTmpVert[iL].vert[channel]=iL_in && iR<=iR_in); - bReadyRightSwap = pTmpVert[iR].vert[channel]i has been identified as a duplicate, we can stop since any + * i3>i2>i that is a duplicate of i (and therefore also i2) will also be + * compared to i2 and therefore be identified there anyways. */ + break; + } } } - assert(iL==(iR+1) || (iL==iR)); - if (iL==iR) - { - const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]