Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extern/quadriflow/src/hierarchy.cpp')
-rw-r--r--extern/quadriflow/src/hierarchy.cpp1344
1 files changed, 1344 insertions, 0 deletions
diff --git a/extern/quadriflow/src/hierarchy.cpp b/extern/quadriflow/src/hierarchy.cpp
new file mode 100644
index 00000000000..8cc41da23d0
--- /dev/null
+++ b/extern/quadriflow/src/hierarchy.cpp
@@ -0,0 +1,1344 @@
+#include "hierarchy.hpp"
+#include <fstream>
+#include <algorithm>
+#include <unordered_map>
+#include "config.hpp"
+#include "field-math.hpp"
+#include <queue>
+#include "localsat.hpp"
+#include "pcg32/pcg32.h"
+#ifdef WITH_TBB
+# include "tbb/tbb.h"
+# include "pss/parallel_stable_sort.h"
+#endif
+
+namespace qflow {
+
+Hierarchy::Hierarchy() {
+ mAdj.resize(MAX_DEPTH + 1);
+ mV.resize(MAX_DEPTH + 1);
+ mN.resize(MAX_DEPTH + 1);
+ mA.resize(MAX_DEPTH + 1);
+ mPhases.resize(MAX_DEPTH + 1);
+ mToLower.resize(MAX_DEPTH);
+ mToUpper.resize(MAX_DEPTH);
+ rng_seed = 0;
+
+ mCQ.reserve(MAX_DEPTH + 1);
+ mCQw.reserve(MAX_DEPTH + 1);
+ mCO.reserve(MAX_DEPTH + 1);
+ mCOw.reserve(MAX_DEPTH + 1);
+}
+
+#undef max
+
+void Hierarchy::Initialize(double scale, int with_scale) {
+ this->with_scale = with_scale;
+ generate_graph_coloring_deterministic(mAdj[0], mV[0].cols(), mPhases[0]);
+
+ for (int i = 0; i < MAX_DEPTH; ++i) {
+ DownsampleGraph(mAdj[i], mV[i], mN[i], mA[i], mV[i + 1], mN[i + 1], mA[i + 1], mToUpper[i],
+ mToLower[i], mAdj[i + 1]);
+ generate_graph_coloring_deterministic(mAdj[i + 1], mV[i + 1].cols(), mPhases[i + 1]);
+ if (mV[i + 1].cols() == 1) {
+ mAdj.resize(i + 2);
+ mV.resize(i + 2);
+ mN.resize(i + 2);
+ mA.resize(i + 2);
+ mToUpper.resize(i + 1);
+ mToLower.resize(i + 1);
+ break;
+ }
+ }
+ mQ.resize(mV.size());
+ mO.resize(mV.size());
+ mS.resize(mV.size());
+ mK.resize(mV.size());
+
+ mCO.resize(mV.size());
+ mCOw.resize(mV.size());
+ mCQ.resize(mV.size());
+ mCQw.resize(mV.size());
+
+ //Set random seed
+ srand(rng_seed);
+
+ mScale = scale;
+ for (int i = 0; i < mV.size(); ++i) {
+ mQ[i].resize(mN[i].rows(), mN[i].cols());
+ mO[i].resize(mN[i].rows(), mN[i].cols());
+ mS[i].resize(2, mN[i].cols());
+ mK[i].resize(2, mN[i].cols());
+ for (int j = 0; j < mN[i].cols(); ++j) {
+ Vector3d s, t;
+ coordinate_system(mN[i].col(j), s, t);
+ //rand() is not thread safe!
+ double angle = ((double)rand()) / RAND_MAX * 2 * M_PI;
+ double x = ((double)rand()) / RAND_MAX * 2 - 1.f;
+ double y = ((double)rand()) / RAND_MAX * 2 - 1.f;
+ mQ[i].col(j) = s * std::cos(angle) + t * std::sin(angle);
+ mO[i].col(j) = mV[i].col(j) + (s * x + t * y) * scale;
+ if (with_scale) {
+ mS[i].col(j) = Vector2d(1.0f, 1.0f);
+ mK[i].col(j) = Vector2d(0.0, 0.0);
+ }
+ }
+ }
+#ifdef WITH_CUDA
+ printf("copy to device...\n");
+ CopyToDevice();
+ printf("copy to device finish...\n");
+#endif
+}
+
+#ifdef WITH_TBB
+void Hierarchy::generate_graph_coloring_deterministic(const AdjacentMatrix& adj, int size,
+ std::vector<std::vector<int>>& phases) {
+ struct ColorData {
+ uint8_t nColors;
+ uint32_t nNodes[256];
+ ColorData() : nColors(0) {}
+ };
+
+ const uint8_t INVALID_COLOR = 0xFF;
+ phases.clear();
+
+ /* Generate a permutation */
+ std::vector<uint32_t> perm(size);
+ std::vector<tbb::spin_mutex> mutex(size);
+ for (uint32_t i = 0; i < size; ++i) perm[i] = i;
+
+ tbb::parallel_for(tbb::blocked_range<uint32_t>(0u, size, GRAIN_SIZE),
+ [&](const tbb::blocked_range<uint32_t>& range) {
+ pcg32 rng;
+ rng.advance(range.begin());
+ for (uint32_t i = range.begin(); i != range.end(); ++i) {
+ uint32_t j = i, k = rng.nextUInt(size - i) + i;
+ if (j == k) continue;
+ if (j > k) std::swap(j, k);
+ tbb::spin_mutex::scoped_lock l0(mutex[j]);
+ tbb::spin_mutex::scoped_lock l1(mutex[k]);
+ std::swap(perm[j], perm[k]);
+ }
+ });
+
+ std::vector<uint8_t> color(size, INVALID_COLOR);
+ ColorData colorData = tbb::parallel_reduce(
+ tbb::blocked_range<uint32_t>(0u, size, GRAIN_SIZE), ColorData(),
+ [&](const tbb::blocked_range<uint32_t>& range, ColorData colorData) -> ColorData {
+ std::vector<uint32_t> neighborhood;
+ bool possible_colors[256];
+
+ for (uint32_t pidx = range.begin(); pidx != range.end(); ++pidx) {
+ uint32_t i = perm[pidx];
+
+ neighborhood.clear();
+ neighborhood.push_back(i);
+ // for (const Link *link = adj[i]; link != adj[i + 1]; ++link)
+ for (auto& link : adj[i]) neighborhood.push_back(link.id);
+ std::sort(neighborhood.begin(), neighborhood.end());
+ for (uint32_t j : neighborhood) mutex[j].lock();
+
+ std::fill(possible_colors, possible_colors + colorData.nColors, true);
+
+ // for (const Link *link = adj[i]; link != adj[i + 1]; ++link) {
+ for (auto& link : adj[i]) {
+ uint8_t c = color[link.id];
+ if (c != INVALID_COLOR) {
+ while (c >= colorData.nColors) {
+ possible_colors[colorData.nColors] = true;
+ colorData.nNodes[colorData.nColors] = 0;
+ colorData.nColors++;
+ }
+ possible_colors[c] = false;
+ }
+ }
+
+ uint8_t chosen_color = INVALID_COLOR;
+ for (uint8_t j = 0; j < colorData.nColors; ++j) {
+ if (possible_colors[j]) {
+ chosen_color = j;
+ break;
+ }
+ }
+ if (chosen_color == INVALID_COLOR) {
+ if (colorData.nColors == INVALID_COLOR - 1)
+ throw std::runtime_error(
+ "Ran out of colors during graph coloring! "
+ "The input mesh is very likely corrupt.");
+ colorData.nNodes[colorData.nColors] = 1;
+ color[i] = colorData.nColors++;
+ } else {
+ colorData.nNodes[chosen_color]++;
+ color[i] = chosen_color;
+ }
+
+ for (uint32_t j : neighborhood) mutex[j].unlock();
+ }
+ return colorData;
+ },
+ [](ColorData c1, ColorData c2) -> ColorData {
+ ColorData result;
+ result.nColors = std::max(c1.nColors, c2.nColors);
+ memset(result.nNodes, 0, sizeof(uint32_t) * result.nColors);
+ for (uint8_t i = 0; i < c1.nColors; ++i) result.nNodes[i] += c1.nNodes[i];
+ for (uint8_t i = 0; i < c2.nColors; ++i) result.nNodes[i] += c2.nNodes[i];
+ return result;
+ });
+
+ phases.resize(colorData.nColors);
+ for (int i = 0; i < colorData.nColors; ++i) phases[i].reserve(colorData.nNodes[i]);
+
+ for (uint32_t i = 0; i < size; ++i) phases[color[i]].push_back(i);
+}
+#else
+void Hierarchy::generate_graph_coloring_deterministic(const AdjacentMatrix& adj, int size,
+ std::vector<std::vector<int>>& phases) {
+ phases.clear();
+
+ std::vector<uint32_t> perm(size);
+ for (uint32_t i = 0; i < size; ++i) perm[i] = i;
+ pcg32 rng;
+ rng.shuffle(perm.begin(), perm.end());
+
+ std::vector<int> color(size, -1);
+ std::vector<uint8_t> possible_colors;
+ std::vector<int> size_per_color;
+ int ncolors = 0;
+
+ for (uint32_t i = 0; i < size; ++i) {
+ uint32_t ip = perm[i];
+
+ std::fill(possible_colors.begin(), possible_colors.end(), 1);
+
+ for (auto& link : adj[ip]) {
+ int c = color[link.id];
+ if (c >= 0) possible_colors[c] = 0;
+ }
+
+ int chosen_color = -1;
+ for (uint32_t j = 0; j < possible_colors.size(); ++j) {
+ if (possible_colors[j]) {
+ chosen_color = j;
+ break;
+ }
+ }
+
+ if (chosen_color < 0) {
+ chosen_color = ncolors++;
+ possible_colors.resize(ncolors);
+ size_per_color.push_back(0);
+ }
+
+ color[ip] = chosen_color;
+ size_per_color[chosen_color]++;
+ }
+ phases.resize(ncolors);
+ for (int i = 0; i < ncolors; ++i) phases[i].reserve(size_per_color[i]);
+ for (uint32_t i = 0; i < size; ++i) phases[color[i]].push_back(i);
+}
+#endif
+
+void Hierarchy::DownsampleGraph(const AdjacentMatrix adj, const MatrixXd& V, const MatrixXd& N,
+ const VectorXd& A, MatrixXd& V_p, MatrixXd& N_p, VectorXd& A_p,
+ MatrixXi& to_upper, VectorXi& to_lower, AdjacentMatrix& adj_p) {
+ struct Entry {
+ int i, j;
+ double order;
+ inline Entry() { i = j = -1; };
+ inline Entry(int i, int j, double order) : i(i), j(j), order(order) {}
+ inline bool operator<(const Entry& e) const { return order > e.order; }
+ inline bool operator==(const Entry& e) const { return order == e.order; }
+ };
+
+ int nLinks = 0;
+ for (auto& adj_i : adj) nLinks += adj_i.size();
+ std::vector<Entry> entries(nLinks);
+ std::vector<int> bases(adj.size());
+ for (int i = 1; i < bases.size(); ++i) {
+ bases[i] = bases[i - 1] + adj[i - 1].size();
+ }
+
+#ifdef WITH_OMP
+#pragma omp parallel for
+#endif
+ for (int i = 0; i < V.cols(); ++i) {
+ int base = bases[i];
+ auto& ad = adj[i];
+ auto entry_it = entries.begin() + base;
+ for (auto it = ad.begin(); it != ad.end(); ++it, ++entry_it) {
+ int k = it->id;
+ double dp = N.col(i).dot(N.col(k));
+ double ratio = A[i] > A[k] ? (A[i] / A[k]) : (A[k] / A[i]);
+ *entry_it = Entry(i, k, dp * ratio);
+ }
+ }
+
+#ifdef WITH_TBB
+ pss::parallel_stable_sort(entries.begin(), entries.end(), std::less<Entry>());
+#else
+ std::stable_sort(entries.begin(), entries.end(), std::less<Entry>());
+#endif
+
+ std::vector<bool> mergeFlag(V.cols(), false);
+
+ int nCollapsed = 0;
+ for (int i = 0; i < nLinks; ++i) {
+ const Entry& e = entries[i];
+ if (mergeFlag[e.i] || mergeFlag[e.j]) continue;
+ mergeFlag[e.i] = mergeFlag[e.j] = true;
+ entries[nCollapsed++] = entries[i];
+ }
+ int vertexCount = V.cols() - nCollapsed;
+
+ // Allocate memory for coarsened graph
+ V_p.resize(3, vertexCount);
+ N_p.resize(3, vertexCount);
+ A_p.resize(vertexCount);
+ to_upper.resize(2, vertexCount);
+ to_lower.resize(V.cols());
+
+#ifdef WITH_OMP
+#pragma omp parallel for
+#endif
+ for (int i = 0; i < nCollapsed; ++i) {
+ const Entry& e = entries[i];
+ const double area1 = A[e.i], area2 = A[e.j], surfaceArea = area1 + area2;
+ if (surfaceArea > RCPOVERFLOW)
+ V_p.col(i) = (V.col(e.i) * area1 + V.col(e.j) * area2) / surfaceArea;
+ else
+ V_p.col(i) = (V.col(e.i) + V.col(e.j)) * 0.5f;
+ Vector3d normal = N.col(e.i) * area1 + N.col(e.j) * area2;
+ double norm = normal.norm();
+ N_p.col(i) = norm > RCPOVERFLOW ? Vector3d(normal / norm) : Vector3d::UnitX();
+ A_p[i] = surfaceArea;
+ to_upper.col(i) << e.i, e.j;
+ to_lower[e.i] = i;
+ to_lower[e.j] = i;
+ }
+
+ int offset = nCollapsed;
+
+ for (int i = 0; i < V.cols(); ++i) {
+ if (!mergeFlag[i]) {
+ int idx = offset++;
+ V_p.col(idx) = V.col(i);
+ N_p.col(idx) = N.col(i);
+ A_p[idx] = A[i];
+ to_upper.col(idx) << i, -1;
+ to_lower[i] = idx;
+ }
+ }
+
+ adj_p.resize(V_p.cols());
+ std::vector<int> capacity(V_p.cols());
+ std::vector<std::vector<Link>> scratches(V_p.cols());
+#ifdef WITH_OMP
+#pragma omp parallel for
+#endif
+ for (int i = 0; i < V_p.cols(); ++i) {
+ int t = 0;
+ for (int j = 0; j < 2; ++j) {
+ int upper = to_upper(j, i);
+ if (upper == -1) continue;
+ t += adj[upper].size();
+ }
+ scratches[i].reserve(t);
+ adj_p[i].reserve(t);
+ }
+#ifdef WITH_OMP
+#pragma omp parallel for
+#endif
+ for (int i = 0; i < V_p.cols(); ++i) {
+ auto& scratch = scratches[i];
+ for (int j = 0; j < 2; ++j) {
+ int upper = to_upper(j, i);
+ if (upper == -1) continue;
+ auto& ad = adj[upper];
+ for (auto& link : ad) scratch.push_back(Link(to_lower[link.id], link.weight));
+ }
+ std::sort(scratch.begin(), scratch.end());
+ int id = -1;
+ auto& ad = adj_p[i];
+ for (auto& link : scratch) {
+ if (link.id != i) {
+ if (id != link.id) {
+ ad.push_back(link);
+ id = link.id;
+ } else {
+ ad.back().weight += link.weight;
+ }
+ }
+ }
+ }
+}
+
+void Hierarchy::SaveToFile(FILE* fp) {
+ Save(fp, mScale);
+ Save(fp, mF);
+ Save(fp, mE2E);
+ Save(fp, mAdj);
+ Save(fp, mV);
+ Save(fp, mN);
+ Save(fp, mA);
+ Save(fp, mToLower);
+ Save(fp, mToUpper);
+ Save(fp, mQ);
+ Save(fp, mO);
+ Save(fp, mS);
+ Save(fp, mK);
+ Save(fp, this->mPhases);
+}
+
+void Hierarchy::LoadFromFile(FILE* fp) {
+ Read(fp, mScale);
+ Read(fp, mF);
+ Read(fp, mE2E);
+ Read(fp, mAdj);
+ Read(fp, mV);
+ Read(fp, mN);
+ Read(fp, mA);
+ Read(fp, mToLower);
+ Read(fp, mToUpper);
+ Read(fp, mQ);
+ Read(fp, mO);
+ Read(fp, mS);
+ Read(fp, mK);
+ Read(fp, this->mPhases);
+}
+
+void Hierarchy::UpdateGraphValue(std::vector<Vector3i>& FQ, std::vector<Vector3i>& F2E,
+ std::vector<Vector2i>& edge_diff) {
+ FQ = std::move(mFQ[0]);
+ F2E = std::move(mF2E[0]);
+ edge_diff = std::move(mEdgeDiff[0]);
+}
+
+void Hierarchy::DownsampleEdgeGraph(std::vector<Vector3i>& FQ, std::vector<Vector3i>& F2E,
+ std::vector<Vector2i>& edge_diff,
+ std::vector<int>& allow_changes, int level) {
+ std::vector<Vector2i> E2F(edge_diff.size(), Vector2i(-1, -1));
+ for (int i = 0; i < F2E.size(); ++i) {
+ for (int j = 0; j < 3; ++j) {
+ int e = F2E[i][j];
+ if (E2F[e][0] == -1)
+ E2F[e][0] = i;
+ else
+ E2F[e][1] = i;
+ }
+ }
+ int levels = (level == -1) ? 100 : level;
+ mFQ.resize(levels);
+ mF2E.resize(levels);
+ mE2F.resize(levels);
+ mEdgeDiff.resize(levels);
+ mAllowChanges.resize(levels);
+ mSing.resize(levels);
+ mToUpperEdges.resize(levels - 1);
+ mToUpperOrients.resize(levels - 1);
+ for (int i = 0; i < FQ.size(); ++i) {
+ Vector2i diff(0, 0);
+ for (int j = 0; j < 3; ++j) {
+ diff += rshift90(edge_diff[F2E[i][j]], FQ[i][j]);
+ }
+ if (diff != Vector2i::Zero()) {
+ mSing[0].push_back(i);
+ }
+ }
+ mAllowChanges[0] = allow_changes;
+ mFQ[0] = std::move(FQ);
+ mF2E[0] = std::move(F2E);
+ mE2F[0] = std::move(E2F);
+ mEdgeDiff[0] = std::move(edge_diff);
+ for (int l = 0; l < levels - 1; ++l) {
+ auto& FQ = mFQ[l];
+ auto& E2F = mE2F[l];
+ auto& F2E = mF2E[l];
+ auto& Allow = mAllowChanges[l];
+ auto& EdgeDiff = mEdgeDiff[l];
+ auto& Sing = mSing[l];
+ std::vector<int> fixed_faces(F2E.size(), 0);
+ for (auto& s : Sing) {
+ fixed_faces[s] = 1;
+ }
+
+ auto& toUpper = mToUpperEdges[l];
+ auto& toUpperOrients = mToUpperOrients[l];
+ toUpper.resize(E2F.size(), -1);
+ toUpperOrients.resize(E2F.size(), 0);
+
+ auto& nFQ = mFQ[l + 1];
+ auto& nE2F = mE2F[l + 1];
+ auto& nF2E = mF2E[l + 1];
+ auto& nAllow = mAllowChanges[l + 1];
+ auto& nEdgeDiff = mEdgeDiff[l + 1];
+ auto& nSing = mSing[l + 1];
+
+ for (int i = 0; i < E2F.size(); ++i) {
+ if (EdgeDiff[i] != Vector2i::Zero()) continue;
+ if ((E2F[i][0] >= 0 && fixed_faces[E2F[i][0]]) ||
+ (E2F[i][1] >= 0 && fixed_faces[E2F[i][1]])) {
+ continue;
+ }
+ for (int j = 0; j < 2; ++j) {
+ int f = E2F[i][j];
+ if (f < 0)
+ continue;
+ for (int k = 0; k < 3; ++k) {
+ int neighbor_e = F2E[f][k];
+ for (int m = 0; m < 2; ++m) {
+ int neighbor_f = E2F[neighbor_e][m];
+ if (neighbor_f < 0)
+ continue;
+ if (fixed_faces[neighbor_f] == 0) fixed_faces[neighbor_f] = 1;
+ }
+ }
+ }
+ if (E2F[i][0] >= 0)
+ fixed_faces[E2F[i][0]] = 2;
+ if (E2F[i][1] >= 0)
+ fixed_faces[E2F[i][1]] = 2;
+ toUpper[i] = -2;
+ }
+ for (int i = 0; i < E2F.size(); ++i) {
+ if (toUpper[i] == -2) continue;
+ if ((E2F[i][0] < 0 || fixed_faces[E2F[i][0]] == 2) && (E2F[i][1] < 0 || fixed_faces[E2F[i][1]] == 2)) {
+ toUpper[i] = -3;
+ continue;
+ }
+ }
+ int numE = 0;
+ for (int i = 0; i < toUpper.size(); ++i) {
+ if (toUpper[i] == -1) {
+ if ((E2F[i][0] < 0 || fixed_faces[E2F[i][0]] < 2) && (E2F[i][1] < 0 || fixed_faces[E2F[i][1]] < 2)) {
+ nE2F.push_back(E2F[i]);
+ toUpperOrients[i] = 0;
+ toUpper[i] = numE++;
+ continue;
+ }
+ int f0 = (E2F[i][1] < 0 || fixed_faces[E2F[i][0]] < 2) ? E2F[i][0] : E2F[i][1];
+ int e = i;
+ int f = f0;
+ std::vector<std::pair<int, int>> paths;
+ paths.push_back(std::make_pair(i, 0));
+ while (true) {
+ if (E2F[e][0] == f)
+ f = E2F[e][1];
+ else if (E2F[e][1] == f)
+ f = E2F[e][0];
+ if (f < 0 || fixed_faces[f] < 2) {
+ for (int j = 0; j < paths.size(); ++j) {
+ auto& p = paths[j];
+ toUpper[p.first] = numE;
+ int orient = p.second;
+ if (j > 0) orient = (orient + toUpperOrients[paths[j - 1].first]) % 4;
+ toUpperOrients[p.first] = orient;
+ }
+ nE2F.push_back(Vector2i(f0, f));
+ numE += 1;
+ break;
+ }
+ int ind0 = -1, ind1 = -1;
+ int e0 = e;
+ for (int j = 0; j < 3; ++j) {
+ if (F2E[f][j] == e) {
+ ind0 = j;
+ break;
+ }
+ }
+ for (int j = 0; j < 3; ++j) {
+ int e1 = F2E[f][j];
+ if (e1 != e && toUpper[e1] != -2) {
+ e = e1;
+ ind1 = j;
+ break;
+ }
+ }
+
+ if (ind1 != -1) {
+ paths.push_back(std::make_pair(e, (FQ[f][ind1] - FQ[f][ind0] + 6) % 4));
+ } else {
+ if (EdgeDiff[e] != Vector2i::Zero()) {
+ printf("Unsatisfied !!!...\n");
+ printf("%d %d %d: %d %d\n", F2E[f][0], F2E[f][1], F2E[f][2], e0, e);
+ exit(0);
+ }
+ for (auto& p : paths) {
+ toUpper[p.first] = numE;
+ toUpperOrients[p.first] = 0;
+ }
+ numE += 1;
+ nE2F.push_back(Vector2i(f0, f0));
+ break;
+ }
+ }
+ }
+ }
+ nEdgeDiff.resize(numE);
+ nAllow.resize(numE * 2, 1);
+ for (int i = 0; i < toUpper.size(); ++i) {
+ if (toUpper[i] >= 0 && toUpperOrients[i] == 0) {
+ nEdgeDiff[toUpper[i]] = EdgeDiff[i];
+ }
+ if (toUpper[i] >= 0) {
+ int dimension = toUpperOrients[i] % 2;
+ if (Allow[i * 2 + dimension] == 0)
+ nAllow[toUpper[i] * 2] = 0;
+ else if (Allow[i * 2 + dimension] == 2)
+ nAllow[toUpper[i] * 2] = 2;
+ if (Allow[i * 2 + 1 - dimension] == 0)
+ nAllow[toUpper[i] * 2 + 1] = 0;
+ else if (Allow[i * 2 + 1 - dimension] == 2)
+ nAllow[toUpper[i] * 2 + 1] = 2;
+ }
+ }
+ std::vector<int> upperface(F2E.size(), -1);
+
+ for (int i = 0; i < F2E.size(); ++i) {
+ Vector3i eid;
+ for (int j = 0; j < 3; ++j) {
+ eid[j] = toUpper[F2E[i][j]];
+ }
+ if (eid[0] >= 0 && eid[1] >= 0 && eid[2] >= 0) {
+ Vector3i eid_orient;
+ for (int j = 0; j < 3; ++j) {
+ eid_orient[j] = (FQ[i][j] + 4 - toUpperOrients[F2E[i][j]]) % 4;
+ }
+ upperface[i] = nF2E.size();
+ nF2E.push_back(eid);
+ nFQ.push_back(eid_orient);
+ }
+ }
+ for (int i = 0; i < nE2F.size(); ++i) {
+ for (int j = 0; j < 2; ++j) {
+ if (nE2F[i][j] >= 0)
+ nE2F[i][j] = upperface[nE2F[i][j]];
+ }
+ }
+
+ for (auto& s : Sing) {
+ if (upperface[s] >= 0) nSing.push_back(upperface[s]);
+ }
+ mToUpperFaces.push_back(std::move(upperface));
+
+ if (nEdgeDiff.size() == EdgeDiff.size()) {
+ levels = l + 1;
+ break;
+ }
+ }
+
+ mFQ.resize(levels);
+ mF2E.resize(levels);
+ mAllowChanges.resize(levels);
+ mE2F.resize(levels);
+ mEdgeDiff.resize(levels);
+ mSing.resize(levels);
+ mToUpperEdges.resize(levels - 1);
+ mToUpperOrients.resize(levels - 1);
+}
+
+int Hierarchy::FixFlipSat(int depth, int threshold) {
+ if (system("which minisat > /dev/null 2>&1")) {
+ printf("minisat not found, \"-sat\" will not be used!\n");
+ return 0;
+ }
+ if (system("which timeout > /dev/null 2>&1")) {
+ printf("timeout not found, \"-sat\" will not be used!\n");
+ return 0;
+ }
+
+ auto& F2E = mF2E[depth];
+ auto& E2F = mE2F[depth];
+ auto& FQ = mFQ[depth];
+ auto& EdgeDiff = mEdgeDiff[depth];
+ auto& AllowChanges = mAllowChanges[depth];
+
+ // build E2E
+ std::vector<int> E2E(F2E.size() * 3, -1);
+ for (int i = 0; i < E2F.size(); ++i) {
+ int f1 = E2F[i][0];
+ int f2 = E2F[i][1];
+ int t1 = 0;
+ int t2 = 2;
+ if (f1 != -1) while (F2E[f1][t1] != i) t1 += 1;
+ if (f2 != -1) while (F2E[f2][t2] != i) t2 -= 1;
+ t1 += f1 * 3;
+ t2 += f2 * 3;
+ if (f1 != -1) E2E[t1] = (f2 == -1) ? -1 : t2;
+ if (f2 != -1) E2E[t2] = (f1 == -1) ? -1 : t1;
+ }
+
+ auto IntegerArea = [&](int f) {
+ Vector2i diff1 = rshift90(EdgeDiff[F2E[f][0]], FQ[f][0]);
+ Vector2i diff2 = rshift90(EdgeDiff[F2E[f][1]], FQ[f][1]);
+ return diff1[0] * diff2[1] - diff1[1] * diff2[0];
+ };
+
+ std::deque<std::pair<int, int>> Q;
+ std::vector<bool> mark_dedges(F2E.size() * 3, false);
+ for (int f = 0; f < F2E.size(); ++f) {
+ if (IntegerArea(f) < 0) {
+ for (int j = 0; j < 3; ++j) {
+ if (mark_dedges[f * 3 + j]) continue;
+ Q.push_back(std::make_pair(f * 3 + j, 0));
+ mark_dedges[f * 3 + j] = true;
+ }
+ }
+ }
+
+ int mark_count = 0;
+ while (!Q.empty()) {
+ int e0 = Q.front().first;
+ int depth = Q.front().second;
+ Q.pop_front();
+ mark_count++;
+
+ int e = e0, e1;
+ do {
+ e1 = E2E[e];
+ if (e1 == -1) break;
+ int length = EdgeDiff[F2E[e1 / 3][e1 % 3]].array().abs().sum();
+ if (length == 0 && !mark_dedges[e1]) {
+ mark_dedges[e1] = true;
+ Q.push_front(std::make_pair(e1, depth));
+ }
+ e = (e1 / 3) * 3 + (e1 + 1) % 3;
+ mark_dedges[e] = true;
+ } while (e != e0);
+ if (e1 == -1) {
+ do {
+ e1 = E2E[e];
+ if (e1 == -1) break;
+ int length = EdgeDiff[F2E[e1 / 3][e1 % 3]].array().abs().sum();
+ if (length == 0 && !mark_dedges[e1]) {
+ mark_dedges[e1] = true;
+ Q.push_front(std::make_pair(e1, depth));
+ }
+ e = (e1 / 3) * 3 + (e1 + 2) % 3;
+ mark_dedges[e] = true;
+ } while (e != e0);
+ }
+
+ do {
+ e1 = E2E[e];
+ if (e1 == -1) break;
+ int length = EdgeDiff[F2E[e1 / 3][e1 % 3]].array().abs().sum();
+ if (length > 0 && depth + length <= threshold && !mark_dedges[e1]) {
+ mark_dedges[e1] = true;
+ Q.push_back(std::make_pair(e1, depth + length));
+ }
+ e = e1 / 3 * 3 + (e1 + 1) % 3;
+ mark_dedges[e] = true;
+ } while (e != e0);
+ if (e1 == -1) {
+ do {
+ e1 = E2E[e];
+ if (e1 == -1) break;
+ int length = EdgeDiff[F2E[e1 / 3][e1 % 3]].array().abs().sum();
+ if (length > 0 && depth + length <= threshold && !mark_dedges[e1]) {
+ mark_dedges[e1] = true;
+ Q.push_back(std::make_pair(e1, depth + length));
+ }
+ e = e1 / 3 * 3 + (e1 + 2) % 3;
+ mark_dedges[e] = true;
+ } while (e != e0);
+ }
+ }
+ lprintf("[FlipH] Depth %2d: marked = %d\n", depth, mark_count);
+
+ std::vector<bool> flexible(EdgeDiff.size(), false);
+ for (int i = 0; i < F2E.size(); ++i) {
+ for (int j = 0; j < 3; ++j) {
+ int dedge = i * 3 + j;
+ int edgeid = F2E[i][j];
+ if (mark_dedges[dedge]) {
+ flexible[edgeid] = true;
+ }
+ }
+ }
+ for (int i = 0; i < flexible.size(); ++i) {
+ if (E2F[i][0] == E2F[i][1]) flexible[i] = false;
+ if (AllowChanges[i] == 0) flexible[i] = false;
+ }
+
+ // Reindexing and solve
+ int num_group = 0;
+ std::vector<int> groups(EdgeDiff.size(), -1);
+ std::vector<int> indices(EdgeDiff.size(), -1);
+ for (int i = 0; i < EdgeDiff.size(); ++i) {
+ if (groups[i] == -1 && flexible[i]) {
+ // group it
+ std::queue<int> q;
+ q.push(i);
+ groups[i] = num_group;
+ while (!q.empty()) {
+ int e = q.front();
+ q.pop();
+ int f[] = {E2F[e][0], E2F[e][1]};
+ for (int j = 0; j < 2; ++j) {
+ if (f[j] == -1) continue;
+ for (int k = 0; k < 3; ++k) {
+ int e1 = F2E[f[j]][k];
+ if (flexible[e1] && groups[e1] == -1) {
+ groups[e1] = num_group;
+ q.push(e1);
+ }
+ }
+ }
+ }
+ num_group += 1;
+ }
+ }
+
+ std::vector<int> num_edges(num_group);
+ std::vector<int> num_flips(num_group);
+ std::vector<std::vector<int>> values(num_group);
+ std::vector<std::vector<Vector3i>> variable_eq(num_group);
+ std::vector<std::vector<Vector3i>> constant_eq(num_group);
+ std::vector<std::vector<Vector4i>> variable_ge(num_group);
+ std::vector<std::vector<Vector2i>> constant_ge(num_group);
+ for (int i = 0; i < groups.size(); ++i) {
+ if (groups[i] != -1) {
+ indices[i] = num_edges[groups[i]]++;
+ values[groups[i]].push_back(EdgeDiff[i][0]);
+ values[groups[i]].push_back(EdgeDiff[i][1]);
+ }
+ }
+ std::vector<int> num_edges_flexible = num_edges;
+ std::map<std::pair<int, int>, int> fixed_variables;
+ for (int i = 0; i < F2E.size(); ++i) {
+ Vector2i var[3];
+ Vector2i cst[3];
+ int gind = 0;
+ while (gind < 3 && groups[F2E[i][gind]] == -1) gind += 1;
+ if (gind == 3) continue;
+ int group = groups[F2E[i][gind]];
+ int ind[3] = {-1, -1, -1};
+ for (int j = 0; j < 3; ++j) {
+ int g = groups[F2E[i][j]];
+ if (g != group) {
+ if (g == -1) {
+ auto key = std::make_pair(F2E[i][j], group);
+ auto it = fixed_variables.find(key);
+ if (it == fixed_variables.end()) {
+ ind[j] = num_edges[group];
+ values[group].push_back(EdgeDiff[F2E[i][j]][0]);
+ values[group].push_back(EdgeDiff[F2E[i][j]][1]);
+ fixed_variables[key] = num_edges[group]++;
+ } else {
+ ind[j] = it->second;
+ }
+ }
+ } else {
+ ind[j] = indices[F2E[i][j]];
+ }
+ }
+ for (int j = 0; j < 3; ++j) assert(ind[j] != -1);
+ for (int j = 0; j < 3; ++j) {
+ var[j] = rshift90(Vector2i(ind[j] * 2 + 1, ind[j] * 2 + 2), FQ[i][j]);
+ cst[j] = var[j].array().sign();
+ var[j] = var[j].array().abs() - 1;
+ }
+
+ num_flips[group] += IntegerArea(i) < 0;
+ variable_eq[group].push_back(Vector3i(var[0][0], var[1][0], var[2][0]));
+ constant_eq[group].push_back(Vector3i(cst[0][0], cst[1][0], cst[2][0]));
+ variable_eq[group].push_back(Vector3i(var[0][1], var[1][1], var[2][1]));
+ constant_eq[group].push_back(Vector3i(cst[0][1], cst[1][1], cst[2][1]));
+
+ variable_ge[group].push_back(Vector4i(var[0][0], var[1][1], var[0][1], var[1][0]));
+ constant_ge[group].push_back(Vector2i(cst[0][0] * cst[1][1], cst[0][1] * cst[1][0]));
+ }
+ int flip_before = 0, flip_after = 0;
+ for (int i = 0; i < F2E.size(); ++i) {
+ int area = IntegerArea(i);
+ if (area < 0) flip_before++;
+ }
+
+ for (int i = 0; i < num_group; ++i) {
+ std::vector<bool> flexible(values[i].size(), true);
+ for (int j = num_edges_flexible[i] * 2; j < flexible.size(); ++j) {
+ flexible[j] = false;
+ }
+ SolveSatProblem(values[i].size(), values[i], flexible, variable_eq[i], constant_eq[i],
+ variable_ge[i], constant_ge[i]);
+ }
+
+ for (int i = 0; i < EdgeDiff.size(); ++i) {
+ int group = groups[i];
+ if (group == -1) continue;
+ EdgeDiff[i][0] = values[group][2 * indices[i] + 0];
+ EdgeDiff[i][1] = values[group][2 * indices[i] + 1];
+ }
+ for (int i = 0; i < F2E.size(); ++i) {
+ Vector2i diff(0, 0);
+ for (int j = 0; j < 3; ++j) {
+ diff += rshift90(EdgeDiff[F2E[i][j]], FQ[i][j]);
+ }
+ assert(diff == Vector2i::Zero());
+
+ int area = IntegerArea(i);
+ if (area < 0) flip_after++;
+ }
+
+ lprintf("[FlipH] FlipArea, Before: %d After %d\n", flip_before, flip_after);
+ return flip_after;
+}
+
+void Hierarchy::PushDownwardFlip(int depth) {
+ auto& EdgeDiff = mEdgeDiff[depth];
+ auto& nEdgeDiff = mEdgeDiff[depth - 1];
+ auto& toUpper = mToUpperEdges[depth - 1];
+ auto& toUpperOrients = mToUpperOrients[depth - 1];
+ auto& toUpperFaces = mToUpperFaces[depth - 1];
+ for (int i = 0; i < toUpper.size(); ++i) {
+ if (toUpper[i] >= 0) {
+ int orient = (4 - toUpperOrients[i]) % 4;
+ nEdgeDiff[i] = rshift90(EdgeDiff[toUpper[i]], orient);
+ } else {
+ nEdgeDiff[i] = Vector2i(0, 0);
+ }
+ }
+ auto& nF2E = mF2E[depth - 1];
+ auto& nFQ = mFQ[depth - 1];
+ for (int i = 0; i < nF2E.size(); ++i) {
+ Vector2i diff(0, 0);
+ for (int j = 0; j < 3; ++j) {
+ diff += rshift90(nEdgeDiff[nF2E[i][j]], nFQ[i][j]);
+ }
+ if (diff != Vector2i::Zero()) {
+ printf("Fail!!!!!!! %d\n", i);
+ for (int j = 0; j < 3; ++j) {
+ Vector2i d = rshift90(nEdgeDiff[nF2E[i][j]], nFQ[i][j]);
+ printf("<%d %d %d>\n", nF2E[i][j], nFQ[i][j], toUpperOrients[nF2E[i][j]]);
+ printf("%d %d\n", d[0], d[1]);
+ printf("%d -> %d\n", nF2E[i][j], toUpper[nF2E[i][j]]);
+ }
+ printf("%d -> %d\n", i, toUpperFaces[i]);
+ exit(1);
+ }
+ }
+}
+
+void Hierarchy::FixFlip() {
+ int l = mF2E.size() - 1;
+ auto& F2E = mF2E[l];
+ auto& E2F = mE2F[l];
+ auto& FQ = mFQ[l];
+ auto& EdgeDiff = mEdgeDiff[l];
+ auto& AllowChange = mAllowChanges[l];
+
+ // build E2E
+ std::vector<int> E2E(F2E.size() * 3, -1);
+ for (int i = 0; i < E2F.size(); ++i) {
+ int v1 = E2F[i][0];
+ int v2 = E2F[i][1];
+ int t1 = 0;
+ int t2 = 2;
+ if (v1 != -1)
+ while (F2E[v1][t1] != i) t1 += 1;
+ if (v2 != -1)
+ while (F2E[v2][t2] != i) t2 -= 1;
+ t1 += v1 * 3;
+ t2 += v2 * 3;
+ if (v1 != -1)
+ E2E[t1] = (v2 == -1) ? -1 : t2;
+ if (v2 != -1)
+ E2E[t2] = (v1 == -1) ? -1 : t1;
+ }
+
+ auto Area = [&](int f) {
+ Vector2i diff1 = rshift90(EdgeDiff[F2E[f][0]], FQ[f][0]);
+ Vector2i diff2 = rshift90(EdgeDiff[F2E[f][1]], FQ[f][1]);
+ return diff1[0] * diff2[1] - diff1[1] * diff2[0];
+ };
+ std::vector<int> valences(F2E.size() * 3, -10000); // comment this line
+ auto CheckShrink = [&](int deid, int allowed_edge_length) {
+ // Check if we want shrink direct edge deid so that all edge length is smaller than
+ // allowed_edge_length
+ if (deid == -1) {
+ return false;
+ }
+ std::vector<int> corresponding_faces;
+ std::vector<int> corresponding_edges;
+ std::vector<Vector2i> corresponding_diff;
+ int deid0 = deid;
+ while (deid != -1) {
+ deid = deid / 3 * 3 + (deid + 2) % 3;
+ if (E2E[deid] == -1)
+ break;
+ deid = E2E[deid];
+ if (deid == deid0)
+ break;
+ }
+ Vector2i diff = EdgeDiff[F2E[deid / 3][deid % 3]];
+ do {
+ corresponding_diff.push_back(diff);
+ corresponding_edges.push_back(deid);
+ corresponding_faces.push_back(deid / 3);
+
+ // transform to the next face
+ deid = E2E[deid];
+ if (deid == -1) {
+ return false;
+ }
+ // transform for the target incremental diff
+ diff = -rshift90(diff, FQ[deid / 3][deid % 3]);
+ deid = deid / 3 * 3 + (deid + 1) % 3;
+ // transform to local
+ diff = rshift90(diff, (4 - FQ[deid / 3][deid % 3]) % 4);
+ } while (deid != corresponding_edges.front());
+ // check diff
+ if (deid != -1 && diff != corresponding_diff.front()) {
+ return false;
+ }
+ std::unordered_map<int, Vector2i> new_values;
+ for (int i = 0; i < corresponding_diff.size(); ++i) {
+ int deid = corresponding_edges[i];
+ int eid = F2E[deid / 3][deid % 3];
+ new_values[eid] = EdgeDiff[eid];
+ }
+ for (int i = 0; i < corresponding_diff.size(); ++i) {
+ int deid = corresponding_edges[i];
+ int eid = F2E[deid / 3][deid % 3];
+ for (int j = 0; j < 2; ++j) {
+ if (corresponding_diff[i][j] != 0 && AllowChange[eid * 2 + j] == 0) return false;
+ }
+ auto& res = new_values[eid];
+ res -= corresponding_diff[i];
+ int edge_thres = allowed_edge_length;
+ if (abs(res[0]) > edge_thres || abs(res[1]) > edge_thres) {
+ return false;
+ }
+ if ((abs(res[0]) > 1 && abs(res[1]) != 0) || (abs(res[1]) > 1 && abs(res[0]) != 0))
+ return false;
+ }
+ int prev_area = 0, current_area = 0;
+ for (int f = 0; f < corresponding_faces.size(); ++f) {
+ int area = Area(corresponding_faces[f]);
+ if (area < 0) prev_area += 1;
+ }
+ for (auto& p : new_values) {
+ std::swap(EdgeDiff[p.first], p.second);
+ }
+ for (int f = 0; f < corresponding_faces.size(); ++f) {
+ int area = Area(corresponding_faces[f]);
+ if (area < 0) {
+ current_area += 1;
+ }
+ }
+ if (current_area < prev_area) {
+ return true;
+ }
+ for (auto& p : new_values) {
+ std::swap(EdgeDiff[p.first], p.second);
+ }
+ return false;
+ };
+
+ std::queue<int> flipped;
+ for (int i = 0; i < F2E.size(); ++i) {
+ int area = Area(i);
+ if (area < 0) {
+ flipped.push(i);
+ }
+ }
+
+ bool update = false;
+ int max_len = 1;
+ while (!update && max_len <= 2) {
+ while (!flipped.empty()) {
+ int f = flipped.front();
+ if (Area(f) >= 0) {
+ flipped.pop();
+ continue;
+ }
+ for (int i = 0; i < 3; ++i) {
+ if (CheckShrink(f * 3 + i, max_len) || CheckShrink(E2E[f * 3 + i], max_len)) {
+ update = true;
+ break;
+ }
+ }
+ flipped.pop();
+ }
+ max_len += 1;
+ }
+ if (update) {
+ Hierarchy flip_hierarchy;
+ flip_hierarchy.DownsampleEdgeGraph(mFQ.back(), mF2E.back(), mEdgeDiff.back(),
+ mAllowChanges.back(), -1);
+ flip_hierarchy.FixFlip();
+ flip_hierarchy.UpdateGraphValue(mFQ.back(), mF2E.back(), mEdgeDiff.back());
+ }
+ PropagateEdge();
+}
+
+void Hierarchy::PropagateEdge() {
+ for (int level = mToUpperEdges.size(); level > 0; --level) {
+ auto& EdgeDiff = mEdgeDiff[level];
+ auto& nEdgeDiff = mEdgeDiff[level - 1];
+ auto& FQ = mFQ[level];
+ auto& nFQ = mFQ[level - 1];
+ auto& F2E = mF2E[level - 1];
+ auto& toUpper = mToUpperEdges[level - 1];
+ auto& toUpperFace = mToUpperFaces[level - 1];
+ auto& toUpperOrients = mToUpperOrients[level - 1];
+ for (int i = 0; i < toUpper.size(); ++i) {
+ if (toUpper[i] >= 0) {
+ int orient = (4 - toUpperOrients[i]) % 4;
+ nEdgeDiff[i] = rshift90(EdgeDiff[toUpper[i]], orient);
+ } else {
+ nEdgeDiff[i] = Vector2i(0, 0);
+ }
+ }
+ for (int i = 0; i < toUpperFace.size(); ++i) {
+ if (toUpperFace[i] == -1) continue;
+ Vector3i eid_orient = FQ[toUpperFace[i]];
+ for (int j = 0; j < 3; ++j) {
+ nFQ[i][j] = (eid_orient[j] + toUpperOrients[F2E[i][j]]) % 4;
+ }
+ }
+ }
+}
+
+void Hierarchy::clearConstraints() {
+ int levels = mV.size();
+ if (levels == 0) return;
+ for (int i = 0; i < levels; ++i) {
+ int size = mV[i].cols();
+ mCQ[i].resize(3, size);
+ mCO[i].resize(3, size);
+ mCQw[i].resize(size);
+ mCOw[i].resize(size);
+ mCQw[i].setZero();
+ mCOw[i].setZero();
+ }
+}
+
+void Hierarchy::propagateConstraints() {
+ int levels = mV.size();
+ if (levels == 0) return;
+
+ for (int l = 0; l < levels - 1; ++l) {
+ auto& N = mN[l];
+ auto& N_next = mN[l + 1];
+ auto& V = mV[l];
+ auto& V_next = mV[l + 1];
+ auto& CQ = mCQ[l];
+ auto& CQ_next = mCQ[l + 1];
+ auto& CQw = mCQw[l];
+ auto& CQw_next = mCQw[l + 1];
+ auto& CO = mCO[l];
+ auto& CO_next = mCO[l + 1];
+ auto& COw = mCOw[l];
+ auto& COw_next = mCOw[l + 1];
+ auto& toUpper = mToUpper[l];
+ // FIXME
+ // MatrixXd& S = mS[l];
+
+ for (uint32_t i = 0; i != mV[l + 1].cols(); ++i) {
+ Vector2i upper = toUpper.col(i);
+ Vector3d cq = Vector3d::Zero(), co = Vector3d::Zero();
+ float cqw = 0.0f, cow = 0.0f;
+
+ bool has_cq0 = CQw[upper[0]] != 0;
+ bool has_cq1 = upper[1] != -1 && CQw[upper[1]] != 0;
+ bool has_co0 = COw[upper[0]] != 0;
+ bool has_co1 = upper[1] != -1 && COw[upper[1]] != 0;
+
+ if (has_cq0 && !has_cq1) {
+ cq = CQ.col(upper[0]);
+ cqw = CQw[upper[0]];
+ } else if (has_cq1 && !has_cq0) {
+ cq = CQ.col(upper[1]);
+ cqw = CQw[upper[1]];
+ } else if (has_cq1 && has_cq0) {
+ Vector3d q_i = CQ.col(upper[0]);
+ Vector3d n_i = CQ.col(upper[0]);
+ Vector3d q_j = CQ.col(upper[1]);
+ Vector3d n_j = CQ.col(upper[1]);
+ auto result = compat_orientation_extrinsic_4(q_i, n_i, q_j, n_j);
+ cq = result.first * CQw[upper[0]] + result.second * CQw[upper[1]];
+ cqw = (CQw[upper[0]] + CQw[upper[1]]);
+ }
+ if (cq != Vector3d::Zero()) {
+ Vector3d n = N_next.col(i);
+ cq -= n.dot(cq) * n;
+ if (cq.squaredNorm() > RCPOVERFLOW) cq.normalize();
+ }
+
+ if (has_co0 && !has_co1) {
+ co = CO.col(upper[0]);
+ cow = COw[upper[0]];
+ } else if (has_co1 && !has_co0) {
+ co = CO.col(upper[1]);
+ cow = COw[upper[1]];
+ } else if (has_co1 && has_co0) {
+ double scale_x = mScale;
+ double scale_y = mScale;
+ if (with_scale) {
+ // FIXME
+ // scale_x *= S(0, i);
+ // scale_y *= S(1, i);
+ }
+ double inv_scale_x = 1.0f / scale_x;
+ double inv_scale_y = 1.0f / scale_y;
+
+ double scale_x_1 = mScale;
+ double scale_y_1 = mScale;
+ if (with_scale) {
+ // FIXME
+ // scale_x_1 *= S(0, j);
+ // scale_y_1 *= S(1, j);
+ }
+ double inv_scale_x_1 = 1.0f / scale_x_1;
+ double inv_scale_y_1 = 1.0f / scale_y_1;
+ auto result = compat_position_extrinsic_4(
+ V.col(upper[0]), N.col(upper[0]), CQ.col(upper[0]), CO.col(upper[0]),
+ V.col(upper[1]), N.col(upper[1]), CQ.col(upper[1]), CO.col(upper[1]), scale_x,
+ scale_y, inv_scale_x, inv_scale_y, scale_x_1, scale_y_1, inv_scale_x_1,
+ inv_scale_y_1);
+ cow = COw[upper[0]] + COw[upper[1]];
+ co = (result.first * COw[upper[0]] + result.second * COw[upper[1]]) / cow;
+ }
+ if (co != Vector3d::Zero()) {
+ Vector3d n = N_next.col(i), v = V_next.col(i);
+ co -= n.dot(cq - v) * n;
+ }
+#if 0
+ cqw *= 0.5f;
+ cow *= 0.5f;
+#else
+ if (cqw > 0) cqw = 1;
+ if (cow > 0) cow = 1;
+#endif
+
+ CQw_next[i] = cqw;
+ COw_next[i] = cow;
+ CQ_next.col(i) = cq;
+ CO_next.col(i) = co;
+ }
+ }
+}
+#ifdef WITH_CUDA
+#include <cuda_runtime.h>
+
+void Hierarchy::CopyToDevice() {
+ if (cudaAdj.empty()) {
+ cudaAdj.resize(mAdj.size());
+ cudaAdjOffset.resize(mAdj.size());
+ for (int i = 0; i < mAdj.size(); ++i) {
+ std::vector<int> offset(mAdj[i].size() + 1, 0);
+ for (int j = 0; j < mAdj[i].size(); ++j) {
+ offset[j + 1] = offset[j] + mAdj[i][j].size();
+ }
+ cudaMalloc(&cudaAdjOffset[i], sizeof(int) * (mAdj[i].size() + 1));
+ cudaMemcpy(cudaAdjOffset[i], offset.data(), sizeof(int) * (mAdj[i].size() + 1),
+ cudaMemcpyHostToDevice);
+ // cudaAdjOffset[i] = (int*)malloc(sizeof(int) * (mAdj[i].size() + 1));
+ // memcpy(cudaAdjOffset[i], offset.data(), sizeof(int) * (mAdj[i].size() +
+ // 1));
+
+ cudaMalloc(&cudaAdj[i], sizeof(Link) * offset.back());
+ // cudaAdj[i] = (Link*)malloc(sizeof(Link) * offset.back());
+ std::vector<Link> plainlink(offset.back());
+ for (int j = 0; j < mAdj[i].size(); ++j) {
+ memcpy(plainlink.data() + offset[j], mAdj[i][j].data(),
+ mAdj[i][j].size() * sizeof(Link));
+ }
+ cudaMemcpy(cudaAdj[i], plainlink.data(), plainlink.size() * sizeof(Link),
+ cudaMemcpyHostToDevice);
+ }
+ }
+
+ if (cudaN.empty()) {
+ cudaN.resize(mN.size());
+ for (int i = 0; i < mN.size(); ++i) {
+ cudaMalloc(&cudaN[i], sizeof(glm::dvec3) * mN[i].cols());
+ // cudaN[i] = (glm::dvec3*)malloc(sizeof(glm::dvec3) * mN[i].cols());
+ }
+ }
+ for (int i = 0; i < mN.size(); ++i) {
+ cudaMemcpy(cudaN[i], mN[i].data(), sizeof(glm::dvec3) * mN[i].cols(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaN[i], mN[i].data(), sizeof(glm::dvec3) * mN[i].cols());
+ }
+
+ if (cudaV.empty()) {
+ cudaV.resize(mV.size());
+ for (int i = 0; i < mV.size(); ++i) {
+ cudaMalloc(&cudaV[i], sizeof(glm::dvec3) * mV[i].cols());
+ // cudaV[i] = (glm::dvec3*)malloc(sizeof(glm::dvec3) * mV[i].cols());
+ }
+ }
+ for (int i = 0; i < mV.size(); ++i) {
+ cudaMemcpy(cudaV[i], mV[i].data(), sizeof(glm::dvec3) * mV[i].cols(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaV[i], mV[i].data(), sizeof(glm::dvec3) * mV[i].cols());
+ }
+
+ if (cudaQ.empty()) {
+ cudaQ.resize(mQ.size());
+ for (int i = 0; i < mQ.size(); ++i) {
+ cudaMalloc(&cudaQ[i], sizeof(glm::dvec3) * mQ[i].cols());
+ // cudaQ[i] = (glm::dvec3*)malloc(sizeof(glm::dvec3) * mQ[i].cols());
+ }
+ }
+ for (int i = 0; i < mQ.size(); ++i) {
+ cudaMemcpy(cudaQ[i], mQ[i].data(), sizeof(glm::dvec3) * mQ[i].cols(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaQ[i], mQ[i].data(), sizeof(glm::dvec3) * mQ[i].cols());
+ }
+ if (cudaO.empty()) {
+ cudaO.resize(mO.size());
+ for (int i = 0; i < mO.size(); ++i) {
+ cudaMalloc(&cudaO[i], sizeof(glm::dvec3) * mO[i].cols());
+ // cudaO[i] = (glm::dvec3*)malloc(sizeof(glm::dvec3) * mO[i].cols());
+ }
+ }
+ for (int i = 0; i < mO.size(); ++i) {
+ cudaMemcpy(cudaO[i], mO[i].data(), sizeof(glm::dvec3) * mO[i].cols(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaO[i], mO[i].data(), sizeof(glm::dvec3) * mO[i].cols());
+ }
+ if (cudaPhases.empty()) {
+ cudaPhases.resize(mPhases.size());
+ for (int i = 0; i < mPhases.size(); ++i) {
+ cudaPhases[i].resize(mPhases[i].size());
+ for (int j = 0; j < mPhases[i].size(); ++j) {
+ cudaMalloc(&cudaPhases[i][j], sizeof(int) * mPhases[i][j].size());
+ // cudaPhases[i][j] = (int*)malloc(sizeof(int) *
+ // mPhases[i][j].size());
+ }
+ }
+ }
+ for (int i = 0; i < mPhases.size(); ++i) {
+ for (int j = 0; j < mPhases[i].size(); ++j) {
+ cudaMemcpy(cudaPhases[i][j], mPhases[i][j].data(), sizeof(int) * mPhases[i][j].size(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaPhases[i][j], mPhases[i][j].data(), sizeof(int) *
+ // mPhases[i][j].size());
+ }
+ }
+ if (cudaToUpper.empty()) {
+ cudaToUpper.resize(mToUpper.size());
+ for (int i = 0; i < mToUpper.size(); ++i) {
+ cudaMalloc(&cudaToUpper[i], mToUpper[i].cols() * sizeof(glm::ivec2));
+ // cudaToUpper[i] = (glm::ivec2*)malloc(mToUpper[i].cols() *
+ // sizeof(glm::ivec2));
+ }
+ }
+ for (int i = 0; i < mToUpper.size(); ++i) {
+ cudaMemcpy(cudaToUpper[i], mToUpper[i].data(), sizeof(glm::ivec2) * mToUpper[i].cols(),
+ cudaMemcpyHostToDevice);
+ // memcpy(cudaToUpper[i], mToUpper[i].data(), sizeof(glm::ivec2) *
+ // mToUpper[i].cols());
+ }
+ cudaDeviceSynchronize();
+}
+
+void Hierarchy::CopyToHost() {}
+
+#endif
+
+} // namespace qflow