diff options
-rw-r--r-- | intern/mikktspace/mikktspace.c | 363 |
1 files changed, 95 insertions, 268 deletions
diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c index f832b356ffe..ebf699c2428 100644 --- a/intern/mikktspace/mikktspace.c +++ b/intern/mikktspace/mikktspace.c @@ -447,305 +447,132 @@ typedef struct { int index; } STmpVert; -static const int g_iCells = 2048; -static const float g_iCells_fl = 2048.0f; +static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); -#ifdef _MSC_VER -# define NOINLINE __declspec(noinline) -#else -# define NOINLINE __attribute__ ((noinline)) -#endif +typedef unsigned int uint; -// it is IMPORTANT that this function is called to evaluate the hash since -// inlining could potentially reorder instructions and generate different -// results for the same effective input value fVal. -static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) +static uint float_as_uint(const float v) { - const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin)); - const int iIndex = (int)fIndex; - return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); + return *((uint*)(&v)); } -static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in); -static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries); -static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +#define HASH(x, y, z) (((x) * 73856093) ^ ((y) * 19349663) ^ ((z) * 83492791)) +#define HASH_F(x, y, z) HASH(float_as_uint(x), float_as_uint(y), float_as_uint(z)) -static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +/* Sort comp and data based on comp. + * comp2 and data2 are used as temporary storage. */ +static void radixsort_pair(uint *comp, int *data, uint *comp2, int *data2, int n) { + int shift = 0; + for(int pass = 0; pass < 4; pass++, shift+=8) { + int bins[257] = {0}; + /* Count number of elements per bin. */ + for(int i = 0; i < n; i++) { + bins[((comp[i] >> shift) & 0xff) + 1]++; + } + /* Compute prefix sum to find position of each bin in the sorted array. */ + for(int i = 2; i < 256; i++) { + bins[i] += bins[i-1]; + } + /* Insert the elements in their correct location based on their bin. */ + for(int i = 0; i < n; i++) { + int pos = bins[(comp[i] >> shift) & 0xff]++; + comp2[pos] = comp[i]; + data2[pos] = data[i]; + } - // Generate bounding box - int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL; - STmpVert * pTmpVert = NULL; - int i=0, iChannel=0, k=0, e=0; - int iMaxCount=0; - SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; - float fMin, fMax; - for (i=1; i<(iNrTrianglesIn*3); i++) - { - const int index = piTriList_in_and_out[i]; - - const SVec3 vP = GetPosition(pContext, index); - if (vMin.x > vP.x) vMin.x = vP.x; - else if (vMax.x < vP.x) vMax.x = vP.x; - if (vMin.y > vP.y) vMin.y = vP.y; - else if (vMax.y < vP.y) vMax.y = vP.y; - if (vMin.z > vP.z) vMin.z = vP.z; - else if (vMax.z < vP.z) vMax.z = vP.z; + /* Swap arrays. */ + int *tmpdata = data; data = data2; data2 = tmpdata; + uint *tmpcomp = comp; comp = comp2; comp2 = tmpcomp; } +} - vDim = vsub(vMax,vMin); - iChannel = 0; - fMin = vMin.x; fMax=vMax.x; - if (vDim.y>vDim.x && vDim.y>vDim.z) - { - iChannel=1; - fMin = vMin.y; - fMax = vMax.y; - } - else if (vDim.z>vDim.x) - { - iChannel=2; - fMin = vMin.z; - fMax = vMax.z; - } +/* Merge identical vertices. + * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 values. + * Then, by sorting based on that hash, identical elements (having identical hashes) will be moved next to each other. + * Since there might be hash collisions, the elements of each block are then compared with each other and duplicates + * are merged. + */ +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + int numVertices = iNrTrianglesIn*3; - // make allocations - piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn); - piHashCount = (int *) malloc(sizeof(int)*g_iCells); - piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); - piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); + uint *hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *indices = (int*) malloc(sizeof(int)*numVertices); + uint *temp_hashes = (uint*) malloc(sizeof(uint)*numVertices); + int *temp_indices = (int*) malloc(sizeof(int)*numVertices); + + if(hashes == NULL || indices == NULL || temp_hashes == NULL || temp_indices == NULL) { + free(hashes); + free(indices); + free(temp_hashes); + free(temp_indices); - if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) - { - if (piHashTable!=NULL) free(piHashTable); - if (piHashCount!=NULL) free(piHashCount); - if (piHashOffsets!=NULL) free(piHashOffsets); - if (piHashCount2!=NULL) free(piHashCount2); GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); return; } - memset(piHashCount, 0, sizeof(int)*g_iCells); - memset(piHashCount2, 0, sizeof(int)*g_iCells); - // count amount of elements in each cell unit - for (i=0; i<(iNrTrianglesIn*3); i++) - { + for (int i = 0; i < numVertices; i++) { const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); - const int iCell = FindGridCell(fMin, fMax, fVal); - ++piHashCount[iCell]; - } - // evaluate start index of each cell. - piHashOffsets[0]=0; - for (k=1; k<g_iCells; k++) - piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1]; - - // insert vertices - for (i=0; i<(iNrTrianglesIn*3); i++) - { - const int index = piTriList_in_and_out[i]; const SVec3 vP = GetPosition(pContext, index); - const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); - const int iCell = FindGridCell(fMin, fMax, fVal); - int * pTable = NULL; - - assert(piHashCount2[iCell]<piHashCount[iCell]); - pTable = &piHashTable[piHashOffsets[iCell]]; - pTable[piHashCount2[iCell]] = i; // vertex i has been inserted. - ++piHashCount2[iCell]; - } - for (k=0; k<g_iCells; k++) - assert(piHashCount2[k] == piHashCount[k]); // verify the count - free(piHashCount2); - - // find maximum amount of entries in any hash entry - iMaxCount = piHashCount[0]; - for (k=1; k<g_iCells; k++) - if (iMaxCount<piHashCount[k]) - iMaxCount=piHashCount[k]; - pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount); + const uint hashP = HASH_F(vP.x, vP.y, vP.z); + const SVec3 vN = GetNormal(pContext, index); + const uint hashN = HASH_F(vN.x, vN.y, vN.z); - // complete the merge - for (k=0; k<g_iCells; k++) - { - // extract table of cell k and amount of entries in it - int * pTable = &piHashTable[piHashOffsets[k]]; - const int iEntries = piHashCount[k]; - if (iEntries < 2) continue; + const SVec3 vT = GetTexCoord(pContext, index); + const uint hashT = HASH_F(vT.x, vT.y, vT.z); - if (pTmpVert!=NULL) - { - for (e=0; e<iEntries; e++) - { - int i = pTable[e]; - const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]); - pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y; - pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i; - } - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1); - } - else - MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries); + hashes[i] = HASH(hashP, hashN, hashT); + indices[i] = i; } - if (pTmpVert!=NULL) { free(pTmpVert); } - free(piHashTable); - free(piHashCount); - free(piHashOffsets); -} - -static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in) -{ - // make bbox - int c=0, l=0, channel=0; - float fvMin[3], fvMax[3]; - float dx=0, dy=0, dz=0, fSep=0; - for (c=0; c<3; c++) - { fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; } - for (l=(iL_in+1); l<=iR_in; l++) { - for (c=0; c<3; c++) { - if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; - if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c]; + radixsort_pair(hashes, indices, temp_hashes, temp_indices, numVertices); + + free(temp_hashes); + free(temp_indices); + + /* Process blocks of vertices with the same hash. + * Vertices in the block might still be separate, but we know for sure that + * vertices in different blocks will never be identical. */ + int blockstart = 0; + while (blockstart < numVertices) { + /* Find end of this block (exclusive). */ + uint hash = hashes[blockstart]; + int blockend = blockstart+1; + for(; blockend < numVertices; blockend++) { + if(hashes[blockend] != hash) break; } - } - - dx = fvMax[0]-fvMin[0]; - dy = fvMax[1]-fvMin[1]; - dz = fvMax[2]-fvMin[2]; - - channel = 0; - if (dy>dx && dy>dz) channel=1; - else if (dz>dx) channel=2; - - fSep = 0.5f*(fvMax[channel]+fvMin[channel]); - // stop if all vertices are NaNs - if (!isfinite(fSep)) - return; - - // terminate recursion when the separation/average value - // is no longer strictly between fMin and fMax values. - if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) - { - // complete the weld - for (l=iL_in; l<=iR_in; l++) - { - int i = pTmpVert[l].index; - const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const SVec3 vN = GetNormal(pContext, index); - const SVec3 vT = GetTexCoord(pContext, index); - - tbool bNotFound = TTRUE; - int l2=iL_in, i2rec=-1; - while (l2<l && bNotFound) - { - const int i2 = pTmpVert[l2].index; - const int index2 = piTriList_in_and_out[i2]; - const SVec3 vP2 = GetPosition(pContext, index2); - const SVec3 vN2 = GetNormal(pContext, index2); - const SVec3 vT2 = GetTexCoord(pContext, index2); - i2rec=i2; - - //if (vP==vP2 && vN==vN2 && vT==vT2) - if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z && - vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z && - vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z) - bNotFound = TFALSE; - else - ++l2; - } - - // merge if previously found - if (!bNotFound) - piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; - } - } - else - { - int iL=iL_in, iR=iR_in; - assert((iR_in-iL_in)>0); // at least 2 entries - - // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] - while (iL < iR) - { - tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; - while ((!bReadyLeftSwap) && iL<iR) - { - assert(iL>=iL_in && iL<=iR_in); - bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep); - if (!bReadyLeftSwap) ++iL; - } - while ((!bReadyRightSwap) && iL<iR) - { - assert(iR>=iL_in && iR<=iR_in); - bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if (!bReadyRightSwap) --iR; - } - assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) ); - - if (bReadyLeftSwap && bReadyRightSwap) - { - const STmpVert sTmp = pTmpVert[iL]; - assert(iL<iR); - pTmpVert[iL] = pTmpVert[iR]; - pTmpVert[iR] = sTmp; - ++iL; --iR; + for(int i = blockstart; i < blockend; i++) { + int index1 = piTriList_in_and_out[indices[i]]; + const SVec3 vP = GetPosition(pContext, index1); + const SVec3 vN = GetNormal(pContext, index1); + const SVec3 vT = GetTexCoord(pContext, index1); + for(int i2 = i+1; i2 < blockend; i2++) { + int index2 = piTriList_in_and_out[indices[i2]]; + if(index1 == index2) continue; + + if(veq(vP, GetPosition(pContext, index2)) && + veq(vN, GetNormal(pContext, index2)) && + veq(vT, GetTexCoord(pContext, index2))) + { + piTriList_in_and_out[indices[i2]] = index1; + /* Once i2>i has been identified as a duplicate, we can stop since any + * i3>i2>i that is a duplicate of i (and therefore also i2) will also be + * compared to i2 and therefore be identified there anyways. */ + break; + } } } - assert(iL==(iR+1) || (iL==iR)); - if (iL==iR) - { - const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep; - if (bReadyRightSwap) ++iL; - else --iR; - } - - // only need to weld when there is more than 1 instance of the (x,y,z) - if (iL_in < iR) - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR); // weld all left of fSep - if (iL < iR_in) - MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in); // weld all right of (or equal to) fSep + /* Advance to next block. */ + blockstart = blockend; } -} - -static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries) -{ - // this can be optimized further using a tree structure or more hashing. - int e=0; - for (e=0; e<iEntries; e++) - { - int i = pTable[e]; - const int index = piTriList_in_and_out[i]; - const SVec3 vP = GetPosition(pContext, index); - const SVec3 vN = GetNormal(pContext, index); - const SVec3 vT = GetTexCoord(pContext, index); - tbool bNotFound = TTRUE; - int e2=0, i2rec=-1; - while (e2<e && bNotFound) - { - const int i2 = pTable[e2]; - const int index2 = piTriList_in_and_out[i2]; - const SVec3 vP2 = GetPosition(pContext, index2); - const SVec3 vN2 = GetNormal(pContext, index2); - const SVec3 vT2 = GetTexCoord(pContext, index2); - i2rec = i2; - - if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2)) - bNotFound = TFALSE; - else - ++e2; - } - - // merge if previously found - if (!bNotFound) - piTriList_in_and_out[i] = piTriList_in_and_out[i2rec]; - } + free(hashes); + free(indices); } static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) |