From c486da0238bd61eb304f402bd07a9ae43c3733e5 Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sun, 17 Apr 2022 20:27:57 +0200 Subject: Mikktspace: Reduce number of data queries to caller The current code for computing tangents is not exactly fast. This has been a long-standing issue, and recently came up again with T97378. The main bottleneck is fetching the mesh data, since it's handled through a callback system and each vertex might have its data queried dozens of times. I've tried a lot of things to optimize `mikktspace.c`, but unfortunately most weren't that useful: - Vectorizing SVec3 gives a ~5% speedup, but I'm not sure if the additional ~70 lines of code are worth it - Keeping an internal copy of the data instead of re-querying all the time helps a lot (~50-60% time reduction), but requires a lot of extra memory (~100 byte per face) - Going C++ and replacing the internal quicksort with std::sort shows no difference - Restructuring the entire file to be a header-only library so that the callbacks can be inlined gives ~10% reduction, but is a major change and deviation from the original library In the end, two simple fixes that actually help remain: - Don't re-query the number of faces in each loop iteration - Don't bother looking for identical vertices if there's only one vertex with that hash With this, time for the test case in T97378 goes from 6.64sec to 4.92sec. It's something I guess. I feel like completely refactoring this library would not be a bad idea at some point, but for now it does the job... Differential Revision: https://developer.blender.org/D14675 --- intern/mikktspace/mikktspace.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'intern') diff --git a/intern/mikktspace/mikktspace.c b/intern/mikktspace/mikktspace.c index 794590d30a4..e39ac4bb6ef 100644 --- a/intern/mikktspace/mikktspace.c +++ b/intern/mikktspace/mikktspace.c @@ -565,23 +565,26 @@ static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], break; } - for (int i = blockstart; i < blockend; i++) { - int index1 = piTriList_in_and_out[indices[i]]; - const SVec3 vP = GetPosition(pContext, index1); - const SVec3 vN = GetNormal(pContext, index1); - const SVec3 vT = GetTexCoord(pContext, index1); - for (int i2 = i + 1; i2 < blockend; i2++) { - int index2 = piTriList_in_and_out[indices[i2]]; - if (index1 == index2) - continue; - - if (veq(vP, GetPosition(pContext, index2)) && veq(vN, GetNormal(pContext, index2)) && - veq(vT, GetTexCoord(pContext, index2))) { - piTriList_in_and_out[indices[i2]] = index1; - /* Once i2>i has been identified as a duplicate, we can stop since any - * i3>i2>i that is a duplicate of i (and therefore also i2) will also be - * compared to i2 and therefore be identified there anyways. */ - break; + /* If there's only one vertex with this hash, we don't have anything to compare. */ + if (blockend > blockstart + 1) { + for (int i = blockstart; i < blockend; i++) { + int index1 = piTriList_in_and_out[indices[i]]; + const SVec3 vP = GetPosition(pContext, index1); + const SVec3 vN = GetNormal(pContext, index1); + const SVec3 vT = GetTexCoord(pContext, index1); + for (int i2 = i + 1; i2 < blockend; i2++) { + int index2 = piTriList_in_and_out[indices[i2]]; + if (index1 == index2) + continue; + + if (veq(vP, GetPosition(pContext, index2)) && veq(vN, GetNormal(pContext, index2)) && + veq(vT, GetTexCoord(pContext, index2))) { + piTriList_in_and_out[indices[i2]] = index1; + /* Once i2>i has been identified as a duplicate, we can stop since any + * i3>i2>i that is a duplicate of i (and therefore also i2) will also be + * compared to i2 and therefore be identified there anyways. */ + break; + } } } } @@ -645,7 +648,8 @@ static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], { int iTSpacesOffs = 0, f = 0, t = 0; int iDstTriIndex = 0; - for (f = 0; f < pContext->m_pInterface->m_getNumFaces(pContext); f++) { + const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext); + for (f = 0; f < iNrFaces; f++) { const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); if (verts != 3 && verts != 4) continue; -- cgit v1.2.3