Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/mikktspace/mikktspace.hh')
-rw-r--r--intern/mikktspace/mikktspace.hh823
1 files changed, 823 insertions, 0 deletions
diff --git a/intern/mikktspace/mikktspace.hh b/intern/mikktspace/mikktspace.hh
new file mode 100644
index 00000000000..4b45fa86e14
--- /dev/null
+++ b/intern/mikktspace/mikktspace.hh
@@ -0,0 +1,823 @@
+/* SPDX-License-Identifier: Apache-2.0
+ *
+ * Original C code:
+ * Copyright 2011 by Morten S. Mikkelsen.
+ *
+ * C++ rewrite:
+ * Copyright 2022 Blender Foundation. All rights reserved.
+ */
+
+/** \file
+ * \ingroup mikktspace
+ */
+
+#include <algorithm>
+#include <cassert>
+
+#ifdef WITH_TBB
+# include <tbb/parallel_for.h>
+#endif
+
+#include "mikk_atomic_hash_set.hh"
+#include "mikk_float3.hh"
+#include "mikk_util.hh"
+
+namespace mikk {
+
+static constexpr uint UNSET_ENTRY = 0xffffffffu;
+
+template<typename Mesh> class Mikktspace {
+ struct Triangle {
+ /* Stores neighboring triangle for group assignment. */
+ std::array<uint, 3> neighbor;
+ /* Stores assigned group of each vertex. */
+ std::array<uint, 3> group;
+ /* Stores vertex indices that make up the triangle. */
+ std::array<uint, 3> vertices;
+
+ /* Computed face tangent, will be accumulated into group. */
+ float3 tangent;
+
+ /* Index of the face that this triangle belongs to. */
+ uint faceIdx;
+ /* Index of the first of this triangle's vertices' TSpaces. */
+ uint tSpaceIdx;
+
+ /* Stores mapping from this triangle's vertices to the original
+ * face's vertices (relevant for quads). */
+ std::array<uint8_t, 3> faceVertex;
+
+ // flags
+ bool markDegenerate : 1;
+ bool quadOneDegenTri : 1;
+ bool groupWithAny : 1;
+ bool orientPreserving : 1;
+
+ Triangle(uint faceIdx_, uint tSpaceIdx_)
+ : tangent{0.0f},
+ faceIdx{faceIdx_},
+ tSpaceIdx{tSpaceIdx_},
+ markDegenerate{false},
+ quadOneDegenTri{false},
+ groupWithAny{true},
+ orientPreserving{false}
+ {
+ neighbor.fill(UNSET_ENTRY);
+ group.fill(UNSET_ENTRY);
+ }
+
+ void setVertices(uint8_t i0, uint8_t i1, uint8_t i2)
+ {
+ faceVertex[0] = i0;
+ faceVertex[1] = i1;
+ faceVertex[2] = i2;
+ vertices[0] = pack_index(faceIdx, i0);
+ vertices[1] = pack_index(faceIdx, i1);
+ vertices[2] = pack_index(faceIdx, i2);
+ }
+ };
+
+ struct Group {
+ float3 tangent;
+ uint vertexRepresentative;
+ bool orientPreserving;
+
+ Group(uint vertexRepresentative_, bool orientPreserving_)
+ : tangent{0.0f},
+ vertexRepresentative{vertexRepresentative_},
+ orientPreserving{orientPreserving_}
+ {
+ }
+
+ void normalizeTSpace()
+ {
+ tangent = tangent.normalize();
+ }
+
+ void accumulateTSpaceAtomic(float3 v_tangent)
+ {
+ float_add_atomic(&tangent.x, v_tangent.x);
+ float_add_atomic(&tangent.y, v_tangent.y);
+ float_add_atomic(&tangent.z, v_tangent.z);
+ }
+
+ void accumulateTSpace(float3 v_tangent)
+ {
+ tangent += v_tangent;
+ }
+ };
+
+ struct TSpace {
+ float3 tangent = float3(1.0f, 0.0f, 0.0f);
+ uint counter = 0;
+ bool orientPreserving = false;
+
+ void accumulateGroup(const Group &group)
+ {
+ assert(counter < 2);
+
+ if (counter == 0) {
+ tangent = group.tangent;
+ }
+ else if (tangent == group.tangent) {
+ // this if is important. Due to floating point precision
+ // averaging when ts0==ts1 will cause a slight difference
+ // which results in tangent space splits later on, so do nothing
+ }
+ else {
+ tangent = (tangent + group.tangent).normalize();
+ }
+
+ counter++;
+ orientPreserving = group.orientPreserving;
+ }
+ };
+
+ Mesh &mesh;
+
+ std::vector<Triangle> triangles;
+ std::vector<TSpace> tSpaces;
+ std::vector<Group> groups;
+
+ uint nrTSpaces, nrFaces, nrTriangles, totalTriangles;
+
+ int nrThreads;
+ bool isParallel;
+
+ public:
+ Mikktspace(Mesh &mesh_) : mesh(mesh_)
+ {
+ }
+
+ void genTangSpace()
+ {
+ nrFaces = (uint)mesh.GetNumFaces();
+
+#ifdef WITH_TBB
+ nrThreads = tbb::this_task_arena::max_concurrency();
+ isParallel = (nrThreads > 1) && (nrFaces > 10000);
+#else
+ nrThreads = 1;
+ isParallel = false;
+#endif
+
+ // make an initial triangle --> face index list
+ generateInitialVerticesIndexList();
+
+ if (nrTriangles == 0) {
+ return;
+ }
+
+ // make a welded index list of identical positions and attributes (pos, norm, texc)
+ generateSharedVerticesIndexList();
+
+ // mark all triangle pairs that belong to a quad with only one
+ // good triangle. These need special treatment in degenEpilogue().
+ // Additionally, move all good triangles to the start of
+ // triangles[] without changing order and
+ // put the degenerate triangles last.
+ degenPrologue();
+
+ // evaluate triangle level attributes and neighbor list
+ initTriangle();
+
+ // match up edge pairs
+ buildNeighbors();
+
+ // based on the 4 rules, identify groups based on connectivity
+ build4RuleGroups();
+
+ // make tspaces, each group is split up into subgroups.
+ // Finally a tangent space is made for every resulting subgroup
+ generateTSpaces();
+
+ // degenerate quads with one good triangle will be fixed by copying a space from
+ // the good triangle to the coinciding vertex.
+ // all other degenerate triangles will just copy a space from any good triangle
+ // with the same welded index in vertices[].
+ degenEpilogue();
+
+ uint index = 0;
+ for (uint f = 0; f < nrFaces; f++) {
+ const uint verts = mesh.GetNumVerticesOfFace(f);
+ if (verts != 3 && verts != 4) {
+ continue;
+ }
+
+ // set data
+ for (uint i = 0; i < verts; i++) {
+ const TSpace &tSpace = tSpaces[index++];
+ mesh.SetTangentSpace(f, i, tSpace.tangent, tSpace.orientPreserving);
+ }
+ }
+ }
+
+ protected:
+ template<typename F> void runParallel(uint start, uint end, F func)
+ {
+#ifdef WITH_TBB
+ if (isParallel) {
+ tbb::parallel_for(start, end, func);
+ }
+ else
+#endif
+ {
+ for (uint i = start; i < end; i++) {
+ func(i);
+ }
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ float3 getPosition(uint vertexID)
+ {
+ uint f, v;
+ unpack_index(f, v, vertexID);
+ return mesh.GetPosition(f, v);
+ }
+
+ float3 getNormal(uint vertexID)
+ {
+ uint f, v;
+ unpack_index(f, v, vertexID);
+ return mesh.GetNormal(f, v);
+ }
+
+ float3 getTexCoord(uint vertexID)
+ {
+ uint f, v;
+ unpack_index(f, v, vertexID);
+ return mesh.GetTexCoord(f, v);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void generateInitialVerticesIndexList()
+ {
+ nrTriangles = 0;
+ for (uint f = 0; f < nrFaces; f++) {
+ const uint verts = mesh.GetNumVerticesOfFace(f);
+ if (verts == 3) {
+ nrTriangles += 1;
+ }
+ else if (verts == 4) {
+ nrTriangles += 2;
+ }
+ }
+
+ triangles.reserve(nrTriangles);
+
+ nrTSpaces = 0;
+ for (uint f = 0; f < nrFaces; f++) {
+ const uint verts = mesh.GetNumVerticesOfFace(f);
+ if (verts != 3 && verts != 4)
+ continue;
+
+ uint tA = (uint)triangles.size();
+ triangles.emplace_back(f, nrTSpaces);
+ Triangle &triA = triangles[tA];
+
+ if (verts == 3) {
+ triA.setVertices(0, 1, 2);
+ }
+ else {
+ uint tB = (uint)triangles.size();
+ triangles.emplace_back(f, nrTSpaces);
+ Triangle &triB = triangles[tB];
+
+ // need an order independent way to evaluate
+ // tspace on quads. This is done by splitting
+ // along the shortest diagonal.
+ float distSQ_02 = (mesh.GetTexCoord(f, 2) - mesh.GetTexCoord(f, 0)).length_squared();
+ float distSQ_13 = (mesh.GetTexCoord(f, 3) - mesh.GetTexCoord(f, 1)).length_squared();
+ bool quadDiagIs_02;
+ if (distSQ_02 != distSQ_13)
+ quadDiagIs_02 = (distSQ_02 < distSQ_13);
+ else {
+ distSQ_02 = (mesh.GetPosition(f, 2) - mesh.GetPosition(f, 0)).length_squared();
+ distSQ_13 = (mesh.GetPosition(f, 3) - mesh.GetPosition(f, 1)).length_squared();
+ quadDiagIs_02 = !(distSQ_13 < distSQ_02);
+ }
+
+ if (quadDiagIs_02) {
+ triA.setVertices(0, 1, 2);
+ triB.setVertices(0, 2, 3);
+ }
+ else {
+ triA.setVertices(0, 1, 3);
+ triB.setVertices(1, 2, 3);
+ }
+ }
+
+ nrTSpaces += verts;
+ }
+ }
+
+ struct VertexHash {
+ Mikktspace<Mesh> *mikk;
+ inline uint operator()(const uint &k) const
+ {
+ return hash_float3x3(mikk->getPosition(k), mikk->getNormal(k), mikk->getTexCoord(k));
+ }
+ };
+
+ struct VertexEqual {
+ Mikktspace<Mesh> *mikk;
+ inline bool operator()(const uint &kA, const uint &kB) const
+ {
+ return mikk->getTexCoord(kA) == mikk->getTexCoord(kB) &&
+ mikk->getNormal(kA) == mikk->getNormal(kB) &&
+ mikk->getPosition(kA) == mikk->getPosition(kB);
+ }
+ };
+
+ /* Merge identical vertices.
+ * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9
+ * values. Then, by sorting based on that hash, identical elements (having identical hashes) will
+ * be moved next to each other. Since there might be hash collisions, the elements of each block
+ * are then compared with each other and duplicates are merged.
+ */
+ template<bool isAtomic> void generateSharedVerticesIndexList_impl()
+ {
+ uint numVertices = nrTriangles * 3;
+ AtomicHashSet<uint, isAtomic, VertexHash, VertexEqual> set(numVertices, {this}, {this});
+ runParallel(0u, nrTriangles, [&](uint t) {
+ for (uint i = 0; i < 3; i++) {
+ auto res = set.emplace(triangles[t].vertices[i]);
+ if (!res.second) {
+ triangles[t].vertices[i] = res.first;
+ }
+ }
+ });
+ }
+ void generateSharedVerticesIndexList()
+ {
+ if (isParallel) {
+ generateSharedVerticesIndexList_impl<true>();
+ }
+ else {
+ generateSharedVerticesIndexList_impl<false>();
+ }
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////// Degenerate triangles ////////////////////////////////////
+
+ void degenPrologue()
+ {
+ // Mark all degenerate triangles
+ totalTriangles = nrTriangles;
+ std::atomic<uint> degenTriangles(0);
+ runParallel(0u, totalTriangles, [&](uint t) {
+ const float3 p0 = getPosition(triangles[t].vertices[0]);
+ const float3 p1 = getPosition(triangles[t].vertices[1]);
+ const float3 p2 = getPosition(triangles[t].vertices[2]);
+ if (p0 == p1 || p0 == p2 || p1 == p2) // degenerate
+ {
+ triangles[t].markDegenerate = true;
+ degenTriangles.fetch_add(1);
+ }
+ });
+ nrTriangles -= degenTriangles.load();
+
+ if (totalTriangles == nrTriangles) {
+ return;
+ }
+
+ // locate quads with only one good triangle
+ runParallel(0u, totalTriangles - 1, [&](uint t) {
+ Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
+ if (triangleA.faceIdx != triangleB.faceIdx) {
+ /* Individual triangle, skip. */
+ return;
+ }
+ if (triangleA.markDegenerate != triangleB.markDegenerate) {
+ triangleA.quadOneDegenTri = true;
+ triangleB.quadOneDegenTri = true;
+ }
+ });
+
+ std::stable_partition(triangles.begin(), triangles.end(), [](const Triangle &tri) {
+ return tri.markDegenerate;
+ });
+ }
+
+ void degenEpilogue()
+ {
+ if (nrTriangles == totalTriangles) {
+ return;
+ }
+
+ std::unordered_map<uint, uint> goodTriangleMap;
+ for (uint t = 0; t < nrTriangles; t++) {
+ for (uint i = 0; i < 3; i++) {
+ goodTriangleMap.emplace(triangles[t].vertices[i], pack_index(t, i));
+ }
+ }
+
+ // deal with degenerate triangles
+ // punishment for degenerate triangles is O(nrTriangles) extra memory.
+ for (uint t = nrTriangles; t < totalTriangles; t++) {
+ // degenerate triangles on a quad with one good triangle are skipped
+ // here but processed in the next loop
+ if (triangles[t].quadOneDegenTri) {
+ continue;
+ }
+
+ for (uint i = 0; i < 3; i++) {
+ const auto entry = goodTriangleMap.find(triangles[t].vertices[i]);
+ if (entry == goodTriangleMap.end()) {
+ // Matching vertex from good triangle is not found.
+ continue;
+ }
+
+ uint tSrc, iSrc;
+ unpack_index(tSrc, iSrc, entry->second);
+ const uint iSrcVert = triangles[tSrc].faceVertex[iSrc];
+ const uint iSrcOffs = triangles[tSrc].tSpaceIdx;
+ const uint iDstVert = triangles[t].faceVertex[i];
+ const uint iDstOffs = triangles[t].tSpaceIdx;
+ // copy tspace
+ tSpaces[iDstOffs + iDstVert] = tSpaces[iSrcOffs + iSrcVert];
+ }
+ }
+
+ // deal with degenerate quads with one good triangle
+ for (uint t = 0; t < nrTriangles; t++) {
+ // this triangle belongs to a quad where the
+ // other triangle is degenerate
+ if (!triangles[t].quadOneDegenTri) {
+ continue;
+ }
+ uint vertFlag = (1u << triangles[t].faceVertex[0]) | (1u << triangles[t].faceVertex[1]) |
+ (1u << triangles[t].faceVertex[2]);
+ uint missingFaceVertex = 0;
+ if ((vertFlag & 2) == 0)
+ missingFaceVertex = 1;
+ else if ((vertFlag & 4) == 0)
+ missingFaceVertex = 2;
+ else if ((vertFlag & 8) == 0)
+ missingFaceVertex = 3;
+
+ uint faceIdx = triangles[t].faceIdx;
+ float3 dstP = mesh.GetPosition(faceIdx, missingFaceVertex);
+ bool found = false;
+ for (uint i = 0; i < 3; i++) {
+ const uint faceVertex = triangles[t].faceVertex[i];
+ const float3 srcP = mesh.GetPosition(faceIdx, faceVertex);
+ if (srcP == dstP) {
+ const uint offset = triangles[t].tSpaceIdx;
+ tSpaces[offset + missingFaceVertex] = tSpaces[offset + faceVertex];
+ found = true;
+ break;
+ }
+ }
+ assert(found);
+ (void)found;
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ // returns the texture area times 2
+ float calcTexArea(uint tri)
+ {
+ const float3 t1 = getTexCoord(triangles[tri].vertices[0]);
+ const float3 t2 = getTexCoord(triangles[tri].vertices[1]);
+ const float3 t3 = getTexCoord(triangles[tri].vertices[2]);
+
+ const float t21x = t2.x - t1.x;
+ const float t21y = t2.y - t1.y;
+ const float t31x = t3.x - t1.x;
+ const float t31y = t3.y - t1.y;
+
+ const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
+ return fabsf(signedAreaSTx2);
+ }
+
+ void initTriangle()
+ {
+ // triangles[f].iFlag is cleared in generateInitialVerticesIndexList()
+ // which is called before this function.
+
+ // evaluate first order derivatives
+ runParallel(0u, nrTriangles, [&](uint t) {
+ Triangle &triangle = triangles[t];
+
+ // initial values
+ const float3 v1 = getPosition(triangle.vertices[0]);
+ const float3 v2 = getPosition(triangle.vertices[1]);
+ const float3 v3 = getPosition(triangle.vertices[2]);
+ const float3 t1 = getTexCoord(triangle.vertices[0]);
+ const float3 t2 = getTexCoord(triangle.vertices[1]);
+ const float3 t3 = getTexCoord(triangle.vertices[2]);
+
+ const float t21x = t2.x - t1.x;
+ const float t21y = t2.y - t1.y;
+ const float t31x = t3.x - t1.x;
+ const float t31y = t3.y - t1.y;
+ const float3 d1 = v2 - v1, d2 = v3 - v1;
+
+ const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
+ const float3 vOs = (t31y * d1) - (t21y * d2); // eq 18
+ const float3 vOt = (-t31x * d1) + (t21x * d2); // eq 19
+
+ triangle.orientPreserving = (signedAreaSTx2 > 0);
+
+ if (not_zero(signedAreaSTx2)) {
+ const float lenOs2 = vOs.length_squared();
+ const float lenOt2 = vOt.length_squared();
+ const float fS = triangle.orientPreserving ? 1.0f : (-1.0f);
+ if (not_zero(lenOs2))
+ triangle.tangent = vOs * (fS / sqrtf(lenOs2));
+
+ // if this is a good triangle
+ if (not_zero(lenOs2) && not_zero(lenOt2))
+ triangle.groupWithAny = false;
+ }
+ });
+
+ // force otherwise healthy quads to a fixed orientation
+ runParallel(0u, nrTriangles - 1, [&](uint t) {
+ Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
+ if (triangleA.faceIdx != triangleB.faceIdx) {
+ // this is not a quad
+ return;
+ }
+
+ // bad triangles should already have been removed by
+ // degenPrologue(), but just in case check that neither are degenerate
+ if (!(triangleA.markDegenerate || triangleB.markDegenerate)) {
+ // if this happens the quad has extremely bad mapping!!
+ if (triangleA.orientPreserving != triangleB.orientPreserving) {
+ bool chooseOrientFirstTri = false;
+ if (triangleB.groupWithAny)
+ chooseOrientFirstTri = true;
+ else if (calcTexArea(t) >= calcTexArea(t + 1))
+ chooseOrientFirstTri = true;
+
+ // force match
+ const uint t0 = chooseOrientFirstTri ? t : (t + 1);
+ const uint t1 = chooseOrientFirstTri ? (t + 1) : t;
+ triangles[t1].orientPreserving = triangles[t0].orientPreserving;
+ }
+ }
+ });
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////// Edges ///////////////////////////////////////////
+
+ struct NeighborShard {
+ struct Entry {
+ Entry(uint32_t key_, uint data_) : key(key_), data(data_)
+ {
+ }
+ uint key, data;
+ };
+ std::vector<Entry> entries;
+
+ NeighborShard(size_t capacity)
+ {
+ entries.reserve(capacity);
+ }
+
+ void buildNeighbors(Mikktspace<Mesh> *mikk)
+ {
+ /* Entries are added by iterating over t, so by using a stable sort,
+ * we don't have to compare based on t as well. */
+ {
+ std::vector<Entry> tempEntries(entries.size(), {0, 0});
+ radixsort(entries, tempEntries, [](const Entry &e) { return e.key; });
+ }
+
+ for (uint i = 0; i < entries.size(); i++) {
+ const Entry &a = entries[i];
+ uint tA, iA;
+ unpack_index(tA, iA, a.data);
+ Mikktspace<Mesh>::Triangle &triA = mikk->triangles[tA];
+
+ if (triA.neighbor[iA] != UNSET_ENTRY) {
+ continue;
+ }
+
+ uint i0A = triA.vertices[iA], i1A = triA.vertices[(iA != 2) ? (iA + 1) : 0];
+ for (uint j = i + 1; j < entries.size(); j++) {
+ const Entry &b = entries[j];
+ uint tB, iB;
+ unpack_index(tB, iB, b.data);
+ Mikktspace<Mesh>::Triangle &triB = mikk->triangles[tB];
+
+ if (b.key != a.key)
+ break;
+
+ if (triB.neighbor[iB] != UNSET_ENTRY) {
+ continue;
+ }
+
+ uint i1B = triB.vertices[iB], i0B = triB.vertices[(iB != 2) ? (iB + 1) : 0];
+ if (i0A == i0B && i1A == i1B) {
+ triA.neighbor[iA] = tB;
+ triB.neighbor[iB] = tA;
+ break;
+ }
+ }
+ }
+ }
+ };
+
+ void buildNeighbors()
+ {
+ /* In order to parallelize the processing, we divide the vertices into shards.
+ * Since only vertex pairs with the same key will be checked, we can process
+ * shards independently as long as we ensure that all vertices with the same
+ * key go into the same shard.
+ * This is done by hashing the key to get the shard index of each vertex.
+ */
+ // TODO: Two-step filling that first counts and then fills? Could be parallel then.
+ uint targetNrShards = isParallel ? uint(4 * nrThreads) : 1;
+ uint nrShards = 1, hashShift = 32;
+ while (nrShards < targetNrShards) {
+ nrShards *= 2;
+ hashShift -= 1;
+ }
+
+ /* Reserve 25% extra to account for variation due to hashing. */
+ size_t reserveSize = size_t(double(3 * nrTriangles) * 1.25 / nrShards);
+ std::vector<NeighborShard> shards(nrShards, {reserveSize});
+
+ for (uint t = 0; t < nrTriangles; t++) {
+ Triangle &triangle = triangles[t];
+ for (uint i = 0; i < 3; i++) {
+ const uint i0 = triangle.vertices[i];
+ const uint i1 = triangle.vertices[(i != 2) ? (i + 1) : 0];
+ const uint high = std::max(i0, i1), low = std::min(i0, i1);
+ const uint hash = hash_uint3(high, low, 0);
+ /* TODO: Reusing the hash here means less hash space inside each shard.
+ * Computing a second hash with a different seed it probably not worth it? */
+ const uint shard = isParallel ? (hash >> hashShift) : 0;
+ shards[shard].entries.emplace_back(hash, pack_index(t, i));
+ }
+ }
+
+ runParallel(0u, nrShards, [&](uint s) { shards[s].buildNeighbors(this); });
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void assignRecur(const uint t, uint groupId)
+ {
+ if (t == UNSET_ENTRY) {
+ return;
+ }
+
+ Triangle &triangle = triangles[t];
+ Group &group = groups[groupId];
+
+ // track down vertex
+ const uint vertRep = group.vertexRepresentative;
+ uint i = 3;
+ if (triangle.vertices[0] == vertRep)
+ i = 0;
+ else if (triangle.vertices[1] == vertRep)
+ i = 1;
+ else if (triangle.vertices[2] == vertRep)
+ i = 2;
+ assert(i < 3);
+
+ // early out
+ if (triangle.group[i] != UNSET_ENTRY)
+ return;
+
+ if (triangle.groupWithAny) {
+ // first to group with a group-with-anything triangle
+ // determines its orientation.
+ // This is the only existing order dependency in the code!!
+ if (triangle.group[0] == UNSET_ENTRY && triangle.group[1] == UNSET_ENTRY &&
+ triangle.group[2] == UNSET_ENTRY) {
+ triangle.orientPreserving = group.orientPreserving;
+ }
+ }
+
+ if (triangle.orientPreserving != group.orientPreserving)
+ return;
+
+ triangle.group[i] = groupId;
+
+ const uint t_L = triangle.neighbor[i];
+ const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
+ assignRecur(t_L, groupId);
+ assignRecur(t_R, groupId);
+ }
+
+ void build4RuleGroups()
+ {
+ /* Note: This could be parallelized by grouping all [t, i] pairs into
+ * shards by hash(triangles[t].vertices[i]). This way, each shard can be processed
+ * independently and in parallel.
+ * However, the groupWithAny logic needs special handling (e.g. lock a mutex when
+ * encountering a groupWithAny triangle, then sort it out, then unlock and proceed).
+ */
+ for (uint t = 0; t < nrTriangles; t++) {
+ Triangle &triangle = triangles[t];
+ for (uint i = 0; i < 3; i++) {
+ // if not assigned to a group
+ if (triangle.groupWithAny || triangle.group[i] != UNSET_ENTRY) {
+ continue;
+ }
+
+ const uint newGroupId = (uint)groups.size();
+ triangle.group[i] = newGroupId;
+
+ groups.emplace_back(triangle.vertices[i], bool(triangle.orientPreserving));
+
+ const uint t_L = triangle.neighbor[i];
+ const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
+ assignRecur(t_L, newGroupId);
+ assignRecur(t_R, newGroupId);
+ }
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ template<bool atomic> void accumulateTSpaces(uint t)
+ {
+ const Triangle &triangle = triangles[t];
+ // only valid triangles get to add their contribution
+ if (triangle.groupWithAny) {
+ return;
+ }
+
+ /* Todo: Vectorize?
+ * Also: Could add special case for flat shading, when all normals are equal half of the fCos
+ * projections and two of the three tangent projections are unnecessary. */
+ std::array<float3, 3> n, p;
+ for (uint i = 0; i < 3; i++) {
+ n[i] = getNormal(triangle.vertices[i]);
+ p[i] = getPosition(triangle.vertices[i]);
+ }
+
+ std::array<float, 3> fCos = {dot(project(n[0], p[1] - p[0]), project(n[0], p[2] - p[0])),
+ dot(project(n[1], p[2] - p[1]), project(n[1], p[0] - p[1])),
+ dot(project(n[2], p[0] - p[2]), project(n[2], p[1] - p[2]))};
+
+ for (uint i = 0; i < 3; i++) {
+ uint groupId = triangle.group[i];
+ if (groupId != UNSET_ENTRY) {
+ float3 tangent = project(n[i], triangle.tangent) *
+ fast_acosf(std::clamp(fCos[i], -1.0f, 1.0f));
+ if constexpr (atomic) {
+ groups[groupId].accumulateTSpaceAtomic(tangent);
+ }
+ else {
+ groups[groupId].accumulateTSpace(tangent);
+ }
+ }
+ }
+ }
+
+ void generateTSpaces()
+ {
+ if (isParallel) {
+ runParallel(0u, nrTriangles, [&](uint t) { accumulateTSpaces<true>(t); });
+ }
+ else {
+ for (uint t = 0; t < nrTriangles; t++) {
+ accumulateTSpaces<false>(t);
+ }
+ }
+
+ /* TODO: Worth parallelizing? Probably not. */
+ for (Group &group : groups) {
+ group.normalizeTSpace();
+ }
+
+ tSpaces.resize(nrTSpaces);
+
+ for (uint t = 0; t < nrTriangles; t++) {
+ Triangle &triangle = triangles[t];
+ for (uint i = 0; i < 3; i++) {
+ uint groupId = triangle.group[i];
+ if (groupId == UNSET_ENTRY) {
+ continue;
+ }
+ const Group group = groups[groupId];
+ assert(triangle.orientPreserving == group.orientPreserving);
+
+ // output tspace
+ const uint offset = triangle.tSpaceIdx;
+ const uint faceVertex = triangle.faceVertex[i];
+ tSpaces[offset + faceVertex].accumulateGroup(group);
+ }
+ }
+ }
+};
+
+} // namespace mikk