diff options
Diffstat (limited to 'extern/quadriflow/src/optimizer.cpp')
-rw-r--r-- | extern/quadriflow/src/optimizer.cpp | 1419 |
1 files changed, 1419 insertions, 0 deletions
diff --git a/extern/quadriflow/src/optimizer.cpp b/extern/quadriflow/src/optimizer.cpp new file mode 100644 index 00000000000..1c59ad0f70c --- /dev/null +++ b/extern/quadriflow/src/optimizer.cpp @@ -0,0 +1,1419 @@ +#include "optimizer.hpp" + +#include <Eigen/Sparse> +#include <cmath> +#include <fstream> +#include <iostream> +#include <memory> +#include <queue> +#include <unordered_map> + +#include "config.hpp" +#include "field-math.hpp" +#include "flow.hpp" +#include "parametrizer.hpp" + +namespace qflow { + +#ifdef WITH_CUDA +# include <cuda_runtime.h> +#endif + +#ifndef EIGEN_MPL2_ONLY +template<class T> +using LinearSolver = Eigen::SimplicialLLT<T>; +#else +template<class T> +using LinearSolver = Eigen::SparseLU<T>; +#endif + +Optimizer::Optimizer() {} + +void Optimizer::optimize_orientations(Hierarchy& mRes) { +#ifdef WITH_CUDA + optimize_orientations_cuda(mRes); + printf("%s\n", cudaGetErrorString(cudaDeviceSynchronize())); + cudaMemcpy(mRes.mQ[0].data(), mRes.cudaQ[0], sizeof(glm::dvec3) * mRes.mQ[0].cols(), + cudaMemcpyDeviceToHost); + +#else + + int levelIterations = 6; + for (int level = mRes.mN.size() - 1; level >= 0; --level) { + AdjacentMatrix& adj = mRes.mAdj[level]; + const MatrixXd& N = mRes.mN[level]; + const MatrixXd& CQ = mRes.mCQ[level]; + const VectorXd& CQw = mRes.mCQw[level]; + MatrixXd& Q = mRes.mQ[level]; + auto& phases = mRes.mPhases[level]; + for (int iter = 0; iter < levelIterations; ++iter) { + for (int phase = 0; phase < phases.size(); ++phase) { + auto& p = phases[phase]; +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int pi = 0; pi < p.size(); ++pi) { + int i = p[pi]; + const Vector3d n_i = N.col(i); + double weight_sum = 0.0f; + Vector3d sum = Q.col(i); + for (auto& link : adj[i]) { + const int j = link.id; + const double weight = link.weight; + if (weight == 0) continue; + const Vector3d n_j = N.col(j); + Vector3d q_j = Q.col(j); + std::pair<Vector3d, Vector3d> value = + compat_orientation_extrinsic_4(sum, n_i, q_j, n_j); + sum = value.first * weight_sum + value.second * weight; + sum -= n_i * n_i.dot(sum); + weight_sum += weight; + double norm = sum.norm(); + if (norm > RCPOVERFLOW) sum /= norm; + } + + if (CQw.size() > 0) { + float cw = CQw[i]; + if (cw != 0) { + std::pair<Vector3d, Vector3d> value = + compat_orientation_extrinsic_4(sum, n_i, CQ.col(i), n_i); + sum = value.first * (1 - cw) + value.second * cw; + sum -= n_i * n_i.dot(sum); + + float norm = sum.norm(); + if (norm > RCPOVERFLOW) sum /= norm; + } + } + + if (weight_sum > 0) { + Q.col(i) = sum; + } + } + } + } + if (level > 0) { + const MatrixXd& srcField = mRes.mQ[level]; + const MatrixXi& toUpper = mRes.mToUpper[level - 1]; + MatrixXd& destField = mRes.mQ[level - 1]; + const MatrixXd& N = mRes.mN[level - 1]; +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int i = 0; i < srcField.cols(); ++i) { + for (int k = 0; k < 2; ++k) { + int dest = toUpper(k, i); + if (dest == -1) continue; + Vector3d q = srcField.col(i), n = N.col(dest); + destField.col(dest) = q - n * n.dot(q); + } + } + } + } + + for (int l = 0; l < mRes.mN.size() - 1; ++l) { + const MatrixXd& N = mRes.mN[l]; + const MatrixXd& N_next = mRes.mN[l + 1]; + const MatrixXd& Q = mRes.mQ[l]; + MatrixXd& Q_next = mRes.mQ[l + 1]; + auto& toUpper = mRes.mToUpper[l]; +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int i = 0; i < toUpper.cols(); ++i) { + Vector2i upper = toUpper.col(i); + Vector3d q0 = Q.col(upper[0]); + Vector3d n0 = N.col(upper[0]); + Vector3d q; + + if (upper[1] != -1) { + Vector3d q1 = Q.col(upper[1]); + Vector3d n1 = N.col(upper[1]); + auto result = compat_orientation_extrinsic_4(q0, n0, q1, n1); + q = result.first + result.second; + } else { + q = q0; + } + Vector3d n = N_next.col(i); + q -= n.dot(q) * n; + if (q.squaredNorm() > RCPOVERFLOW) q.normalize(); + + Q_next.col(i) = q; + } + } + +#endif +} + +void Optimizer::optimize_scale(Hierarchy& mRes, VectorXd& rho, int adaptive) { + const MatrixXd& N = mRes.mN[0]; + MatrixXd& Q = mRes.mQ[0]; + MatrixXd& V = mRes.mV[0]; + MatrixXd& S = mRes.mS[0]; + MatrixXd& K = mRes.mK[0]; + MatrixXi& F = mRes.mF; + + if (adaptive) { + std::vector<Eigen::Triplet<double>> lhsTriplets; + + lhsTriplets.reserve(F.cols() * 6); + for (int i = 0; i < V.cols(); ++i) { + for (int j = 0; j < 2; ++j) { + S(j, i) = 1.0; + double sc1 = std::max(0.75 * S(j, i), rho[i] * 1.0 / mRes.mScale); + S(j, i) = std::min(S(j, i), sc1); + } + } + + std::vector<std::map<int, double>> entries(V.cols() * 2); + double lambda = 1; + for (int i = 0; i < entries.size(); ++i) { + entries[i][i] = lambda; + } + for (int i = 0; i < F.cols(); ++i) { + for (int j = 0; j < 3; ++j) { + int v1 = F(j, i); + int v2 = F((j + 1) % 3, i); + Vector3d diff = V.col(v2) - V.col(v1); + Vector3d q_1 = Q.col(v1); + Vector3d q_2 = Q.col(v2); + Vector3d n_1 = N.col(v1); + Vector3d n_2 = N.col(v2); + Vector3d q_1_y = n_1.cross(q_1); + auto index = compat_orientation_extrinsic_index_4(q_1, n_1, q_2, n_2); + int v1_x = v1 * 2, v1_y = v1 * 2 + 1, v2_x = v2 * 2, v2_y = v2 * 2 + 1; + + double dx = diff.dot(q_1); + double dy = diff.dot(q_1_y); + + double kx_g = K(0, v1); + double ky_g = K(1, v1); + + if (index.first % 2 != index.second % 2) { + std::swap(v2_x, v2_y); + } + double scale_x = (fmin(fmax(1 + kx_g * dy, 0.3), 3)); + double scale_y = (fmin(fmax(1 + ky_g * dx, 0.3), 3)); + // (v2_x - scale_x * v1_x)^2 = 0 + // x^2 - 2s xy + s^2 y^2 + entries[v2_x][v2_x] += 1; + entries[v1_x][v1_x] += scale_x * scale_x; + entries[v2_y][v2_y] += 1; + entries[v1_y][v1_y] += scale_y * scale_y; + auto it = entries[v1_x].find(v2_x); + if (it == entries[v1_x].end()) { + entries[v1_x][v2_x] = -scale_x; + entries[v2_x][v1_x] = -scale_x; + entries[v1_y][v2_y] = -scale_y; + entries[v2_y][v1_y] = -scale_y; + } else { + it->second -= scale_x; + entries[v2_x][v1_x] -= scale_x; + entries[v1_y][v2_y] -= scale_y; + entries[v2_y][v1_y] -= scale_y; + } + } + } + + Eigen::SparseMatrix<double> A(V.cols() * 2, V.cols() * 2); + VectorXd rhs(V.cols() * 2); + rhs.setZero(); + for (int i = 0; i < entries.size(); ++i) { + rhs(i) = lambda * S(i % 2, i / 2); + for (auto& rec : entries[i]) { + lhsTriplets.push_back(Eigen::Triplet<double>(i, rec.first, rec.second)); + } + } + A.setFromTriplets(lhsTriplets.begin(), lhsTriplets.end()); + LinearSolver<Eigen::SparseMatrix<double>> solver; + solver.analyzePattern(A); + + solver.factorize(A); + + VectorXd result = solver.solve(rhs); + + double total_area = 0; + for (int i = 0; i < V.cols(); ++i) { + S(0, i) = (result(i * 2)); + S(1, i) = (result(i * 2 + 1)); + total_area += S(0, i) * S(1, i); + } + total_area = sqrt(V.cols() / total_area); + for (int i = 0; i < V.cols(); ++i) { + // S(0, i) *= total_area; + // S(1, i) *= total_area; + } + } else { + for (int i = 0; i < V.cols(); ++i) { + S(0, i) = 1; + S(1, i) = 1; + } + } + + for (int l = 0; l < mRes.mS.size() - 1; ++l) { + const MatrixXd& S = mRes.mS[l]; + MatrixXd& S_next = mRes.mS[l + 1]; + auto& toUpper = mRes.mToUpper[l]; + for (int i = 0; i < toUpper.cols(); ++i) { + Vector2i upper = toUpper.col(i); + Vector2d q0 = S.col(upper[0]); + + if (upper[1] != -1) { + q0 = (q0 + S.col(upper[1])) * 0.5; + } + S_next.col(i) = q0; + } + } +} + +void Optimizer::optimize_positions(Hierarchy& mRes, int with_scale) { + int levelIterations = 6; +#ifdef WITH_CUDA + optimize_positions_cuda(mRes); + cudaMemcpy(mRes.mO[0].data(), mRes.cudaO[0], sizeof(glm::dvec3) * mRes.mO[0].cols(), + cudaMemcpyDeviceToHost); +#else + for (int level = mRes.mAdj.size() - 1; level >= 0; --level) { + for (int iter = 0; iter < levelIterations; ++iter) { + AdjacentMatrix& adj = mRes.mAdj[level]; + const MatrixXd &N = mRes.mN[level], &Q = mRes.mQ[level], &V = mRes.mV[level]; + const MatrixXd& CQ = mRes.mCQ[level]; + const MatrixXd& CO = mRes.mCO[level]; + const VectorXd& COw = mRes.mCOw[level]; + MatrixXd& O = mRes.mO[level]; + MatrixXd& S = mRes.mS[level]; + auto& phases = mRes.mPhases[level]; + for (int phase = 0; phase < phases.size(); ++phase) { + auto& p = phases[phase]; +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int pi = 0; pi < p.size(); ++pi) { + int i = p[pi]; + double scale_x = mRes.mScale; + double scale_y = mRes.mScale; + if (with_scale) { + scale_x *= S(0, i); + scale_y *= S(1, i); + } + double inv_scale_x = 1.0f / scale_x; + double inv_scale_y = 1.0f / scale_y; + const Vector3d n_i = N.col(i), v_i = V.col(i); + Vector3d q_i = Q.col(i); + + Vector3d sum = O.col(i); + double weight_sum = 0.0f; + + q_i.normalize(); + for (auto& link : adj[i]) { + const int j = link.id; + const double weight = link.weight; + if (weight == 0) continue; + double scale_x_1 = mRes.mScale; + double scale_y_1 = mRes.mScale; + if (with_scale) { + scale_x_1 *= S(0, j); + scale_y_1 *= S(1, j); + } + double inv_scale_x_1 = 1.0f / scale_x_1; + double inv_scale_y_1 = 1.0f / scale_y_1; + + const Vector3d n_j = N.col(j), v_j = V.col(j); + Vector3d q_j = Q.col(j), o_j = O.col(j); + + q_j.normalize(); + + std::pair<Vector3d, Vector3d> value = compat_position_extrinsic_4( + v_i, n_i, q_i, sum, v_j, n_j, q_j, o_j, scale_x, scale_y, inv_scale_x, + inv_scale_y, scale_x_1, scale_y_1, inv_scale_x_1, inv_scale_y_1); + + sum = value.first * weight_sum + value.second * weight; + weight_sum += weight; + if (weight_sum > RCPOVERFLOW) sum /= weight_sum; + sum -= n_i.dot(sum - v_i) * n_i; + } + + if (COw.size() > 0) { + float cw = COw[i]; + if (cw != 0) { + Vector3d co = CO.col(i), cq = CQ.col(i); + Vector3d d = co - sum; + d -= cq.dot(d) * cq; + sum += cw * d; + sum -= n_i.dot(sum - v_i) * n_i; + } + } + + if (weight_sum > 0) { + O.col(i) = position_round_4(sum, q_i, n_i, v_i, scale_x, scale_y, + inv_scale_x, inv_scale_y); + } + } + } + } + if (level > 0) { + const MatrixXd& srcField = mRes.mO[level]; + const MatrixXi& toUpper = mRes.mToUpper[level - 1]; + MatrixXd& destField = mRes.mO[level - 1]; + const MatrixXd& N = mRes.mN[level - 1]; + const MatrixXd& V = mRes.mV[level - 1]; +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int i = 0; i < srcField.cols(); ++i) { + for (int k = 0; k < 2; ++k) { + int dest = toUpper(k, i); + if (dest == -1) continue; + Vector3d o = srcField.col(i), n = N.col(dest), v = V.col(dest); + o -= n * n.dot(o - v); + destField.col(dest) = o; + } + } + } + } +#endif +} + +void Optimizer::optimize_positions_dynamic( + MatrixXi& F, MatrixXd& V, MatrixXd& N, MatrixXd& Q, std::vector<std::vector<int>>& Vset, + std::vector<Vector3d>& O_compact, std::vector<Vector4i>& F_compact, + std::vector<int>& V2E_compact, std::vector<int>& E2E_compact, double mScale, + std::vector<Vector3d>& diffs, std::vector<int>& diff_count, + std::map<std::pair<int, int>, int>& o2e, std::vector<int>& sharp_o, + std::map<int, std::pair<Vector3d, Vector3d>>& compact_sharp_constraints, int with_scale) { + std::set<int> uncertain; + for (auto& info : o2e) { + if (diff_count[info.second] == 0) { + uncertain.insert(info.first.first); + uncertain.insert(info.first.second); + } + } + std::vector<int> Vind(O_compact.size(), -1); + std::vector<std::list<int>> links(O_compact.size()); + std::vector<std::list<int>> dedges(O_compact.size()); + std::vector<std::vector<int>> adj(V.cols()); + for (int i = 0; i < F.cols(); ++i) { + for (int j = 0; j < 3; ++j) { + int v1 = F(j, i); + int v2 = F((j + 1) % 3, i); + adj[v1].push_back(v2); + } + } + auto FindNearest = [&]() { + for (int i = 0; i < O_compact.size(); ++i) { + if (Vind[i] == -1) { + double min_dis = 1e30; + int min_ind = -1; + for (auto v : Vset[i]) { + double dis = (V.col(v) - O_compact[i]).squaredNorm(); + if (dis < min_dis) { + min_dis = dis; + min_ind = v; + } + } + if (min_ind > -1) { + Vind[i] = min_ind; + double x = (O_compact[i] - V.col(min_ind)).dot(N.col(min_ind)); + O_compact[i] -= x * N.col(min_ind); + } + } else { + int current_v = Vind[i]; + Vector3d n = N.col(current_v); + double current_dis = (O_compact[i] - V.col(current_v)).squaredNorm(); + while (true) { + int next_v = -1; + for (auto& v : adj[current_v]) { + if (N.col(v).dot(n) < cos(10.0 / 180.0 * 3.141592654)) continue; + double dis = (O_compact[i] - V.col(v)).squaredNorm(); + if (dis < current_dis) { + current_dis = dis; + next_v = v; + } + } + if (next_v == -1) break; + // rotate ideal distance + Vector3d n1 = N.col(current_v); + Vector3d n2 = N.col(next_v); + Vector3d axis = n1.cross(n2); + double len = axis.norm(); + double angle = atan2(len, n1.dot(n2)); + axis.normalized(); + Matrix3d m = AngleAxisd(angle, axis).toRotationMatrix(); + for (auto e : dedges[i]) { + Vector3d& d = diffs[e]; + d = m * d; + } + current_v = next_v; + } + Vind[i] = current_v; + } + } + }; + + auto BuildConnection = [&]() { + for (int i = 0; i < links.size(); ++i) { + int deid0 = V2E_compact[i]; + if (deid0 != -1) { + std::list<int>& connection = links[i]; + std::list<int>& dedge = dedges[i]; + int deid = deid0; + do { + connection.push_back(F_compact[deid / 4][(deid + 1) % 4]); + dedge.push_back(deid); + deid = E2E_compact[deid / 4 * 4 + (deid + 3) % 4]; + } while (deid != -1 && deid != deid0); + if (deid == -1) { + deid = deid0; + do { + deid = E2E_compact[deid]; + if (deid == -1) break; + deid = deid / 4 * 4 + (deid + 1) % 4; + connection.push_front(F_compact[deid / 4][(deid + 1) % 4]); + dedge.push_front(deid); + } while (true); + } + } + } + }; + + std::vector<Vector3d> lines; + auto ComputeDistance = [&]() { + std::set<int> unobserved; + for (auto& info : o2e) { + if (diff_count[info.second] == 0) { + unobserved.insert(info.first.first); + } + } + while (true) { + bool update = false; + std::set<int> observed; + for (auto& p : unobserved) { + std::vector<int> observations, edges; + int count = 0; + for (auto& e : dedges[p]) { + edges.push_back(e); + if (diff_count[e]) { + count += 1; + observations.push_back(1); + } else { + observations.push_back(0); + } + } + if (count <= 1) continue; + update = true; + observed.insert(p); + for (int i = 0; i < observations.size(); ++i) { + if (observations[i] == 1) continue; + int j = i; + std::list<int> interp; + while (observations[j] == 0) { + interp.push_front(j); + j -= 1; + if (j < 0) j = edges.size() - 1; + } + j = (i + 1) % edges.size(); + while (observations[j] == 0) { + interp.push_back(j); + j += 1; + if (j == edges.size()) j = 0; + } + Vector3d dl = diffs[edges[(interp.front() + edges.size() - 1) % edges.size()]]; + double lenl = dl.norm(); + Vector3d dr = diffs[edges[(interp.back() + 1) % edges.size()]]; + double lenr = dr.norm(); + dl /= lenl; + dr /= lenr; + Vector3d n = dl.cross(dr).normalized(); + double angle = atan2(dl.cross(dr).norm(), dl.dot(dr)); + if (angle < 0) angle += 2 * 3.141592654; + Vector3d nc = N.col(Vind[p]); + if (n.dot(nc) < 0) { + n = -n; + angle = 2 * 3.141592654 - angle; + } + double step = (lenr - lenl) / (interp.size() + 1); + angle /= interp.size() + 1; + Vector3d dlp = nc.cross(dl).normalized(); + int t = 0; + for (auto q : interp) { + t += 1; + observations[q] = 1; + double ad = angle * t; + int e = edges[q]; + int re = E2E_compact[e]; + diff_count[e] = 2; + diffs[e] = (cos(ad) * dl + sin(ad) * dlp) * (lenl + step * t); + if (re != -1) { + diff_count[re] = 2; + diffs[re] = -diffs[e]; + } + } + for (int i = 0; i < edges.size(); ++i) { + lines.push_back(O_compact[p]); + lines.push_back(O_compact[p] + diffs[edges[i]]); + } + } + } + if (!update) break; + for (auto& p : observed) unobserved.erase(p); + } + }; + + BuildConnection(); + int max_iter = 10; + for (int iter = 0; iter < max_iter; ++iter) { + FindNearest(); + ComputeDistance(); + + std::vector<std::unordered_map<int, double>> entries(O_compact.size() * 2); + std::vector<int> fixed_dim(O_compact.size() * 2, 0); + for (auto& info : compact_sharp_constraints) { + fixed_dim[info.first * 2 + 1] = 1; + if (info.second.second.norm() < 0.5) fixed_dim[info.first * 2] = 1; + } + std::vector<double> b(O_compact.size() * 2); + std::vector<double> x(O_compact.size() * 2); + std::vector<Vector3d> Q_compact(O_compact.size()); + std::vector<Vector3d> N_compact(O_compact.size()); + std::vector<Vector3d> V_compact(O_compact.size()); +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int i = 0; i < O_compact.size(); ++i) { + Q_compact[i] = Q.col(Vind[i]); + N_compact[i] = N.col(Vind[i]); + V_compact[i] = V.col(Vind[i]); + if (fixed_dim[i * 2 + 1] && !fixed_dim[i * 2]) { + Q_compact[i] = compact_sharp_constraints[i].second; + V_compact[i] = compact_sharp_constraints[i].first; + } + } + for (int i = 0; i < O_compact.size(); ++i) { + Vector3d q = Q_compact[i]; + Vector3d n = N_compact[i]; + Vector3d q_y = n.cross(q); + auto Vi = V_compact[i]; + x[i * 2] = (O_compact[i] - Vi).dot(q); + x[i * 2 + 1] = (O_compact[i] - Vi).dot(q_y); + } + for (int i = 0; i < O_compact.size(); ++i) { + Vector3d qx = Q_compact[i]; + Vector3d qy = N_compact[i]; + qy = qy.cross(qx); + auto dedge_it = dedges[i].begin(); + for (auto it = links[i].begin(); it != links[i].end(); ++it, ++dedge_it) { + int j = *it; + Vector3d qx2 = Q_compact[j]; + Vector3d qy2 = N_compact[j]; + qy2 = qy2.cross(qx2); + + int de = o2e[std::make_pair(i, j)]; + double lambda = (diff_count[de] == 1) ? 1 : 1; + Vector3d target_offset = diffs[de]; + + auto Vi = V_compact[i]; + auto Vj = V_compact[j]; + + Vector3d offset = Vj - Vi; + + // target_offset.normalize(); + // target_offset *= mScale; + Vector3d C = target_offset - offset; + int vid[] = {j * 2, j * 2 + 1, i * 2, i * 2 + 1}; + Vector3d weights[] = {qx2, qy2, -qx, -qy}; + for (int ii = 0; ii < 4; ++ii) { + for (int jj = 0; jj < 4; ++jj) { + auto it = entries[vid[ii]].find(vid[jj]); + if (it == entries[vid[ii]].end()) { + entries[vid[ii]][vid[jj]] = lambda * weights[ii].dot(weights[jj]); + } else { + entries[vid[ii]][vid[jj]] += lambda * weights[ii].dot(weights[jj]); + } + } + b[vid[ii]] += lambda * weights[ii].dot(C); + } + } + } + + // fix sharp edges + for (int i = 0; i < entries.size(); ++i) { + if (entries[i].size() == 0) { + entries[i][i] = 1; + b[i] = x[i]; + } + if (fixed_dim[i]) { + b[i] = x[i]; + entries[i].clear(); + entries[i][i] = 1; + } else { + std::unordered_map<int, double> newmap; + for (auto& rec : entries[i]) { + if (fixed_dim[rec.first]) { + b[i] -= rec.second * x[rec.first]; + } else { + newmap[rec.first] = rec.second; + } + } + std::swap(entries[i], newmap); + } + } + std::vector<Eigen::Triplet<double>> lhsTriplets; + lhsTriplets.reserve(F_compact.size() * 8); + Eigen::SparseMatrix<double> A(O_compact.size() * 2, O_compact.size() * 2); + VectorXd rhs(O_compact.size() * 2); + rhs.setZero(); + for (int i = 0; i < entries.size(); ++i) { + rhs(i) = b[i]; + for (auto& rec : entries[i]) { + lhsTriplets.push_back(Eigen::Triplet<double>(i, rec.first, rec.second)); + } + } + + A.setFromTriplets(lhsTriplets.begin(), lhsTriplets.end()); + +#ifdef LOG_OUTPUT + int t1 = GetCurrentTime64(); +#endif + + // FIXME: IncompleteCholesky Preconditioner will fail here so I fallback to Diagonal one. + // I suspected either there is a implementation bug in IncompleteCholesky Preconditioner + // or there is a memory corruption somewhere. However, g++'s address sanitizer does not + // report anything useful. + LinearSolver<Eigen::SparseMatrix<double>> solver; + solver.analyzePattern(A); + solver.factorize(A); + // Eigen::setNbThreads(1); + // ConjugateGradient<SparseMatrix<double>, Lower | Upper> solver; + // VectorXd x0 = VectorXd::Map(x.data(), x.size()); + // solver.setMaxIterations(40); + + // solver.compute(A); + VectorXd x_new = solver.solve(rhs); // solver.solveWithGuess(rhs, x0); + +#ifdef LOG_OUTPUT + // std::cout << "[LSQ] n_iteration:" << solver.iterations() << std::endl; + // std::cout << "[LSQ] estimated error:" << solver.error() << std::endl; + int t2 = GetCurrentTime64(); + printf("[LSQ] Linear solver uses %lf seconds.\n", (t2 - t1) * 1e-3); +#endif + for (int i = 0; i < O_compact.size(); ++i) { + // Vector3d q = Q.col(Vind[i]); + Vector3d q = Q_compact[i]; + // Vector3d n = N.col(Vind[i]); + Vector3d n = N_compact[i]; + Vector3d q_y = n.cross(q); + auto Vi = V_compact[i]; + O_compact[i] = Vi + q * x_new[i * 2] + q_y * x_new[i * 2 + 1]; + } + + // forgive my hack... + if (iter + 1 == max_iter) { + for (int iter = 0; iter < 5; ++iter) { + for (int i = 0; i < O_compact.size(); ++i) { + if (sharp_o[i]) continue; + if (dedges[i].size() != 4 || uncertain.count(i)) { + Vector3d n(0, 0, 0), v(0, 0, 0); + Vector3d v0 = O_compact[i]; + for (auto e : dedges[i]) { + Vector3d v1 = O_compact[F_compact[e / 4][(e + 1) % 4]]; + Vector3d v2 = O_compact[F_compact[e / 4][(e + 3) % 4]]; + n += (v1 - v0).cross(v2 - v0); + v += v1; + } + n.normalize(); + Vector3d offset = v / dedges[i].size() - v0; + offset -= offset.dot(n) * n; + O_compact[i] += offset; + } + } + } + } + } +} + +void Optimizer::optimize_positions_sharp( + Hierarchy& mRes, std::vector<DEdge>& edge_values, std::vector<Vector2i>& edge_diff, + std::vector<int>& sharp_edges, std::set<int>& sharp_vertices, + std::map<int, std::pair<Vector3d, Vector3d>>& sharp_constraints, int with_scale) { + auto& V = mRes.mV[0]; + auto& F = mRes.mF; + auto& Q = mRes.mQ[0]; + auto& N = mRes.mN[0]; + auto& O = mRes.mO[0]; + auto& S = mRes.mS[0]; + + DisajointTree tree(V.cols()); + for (int i = 0; i < edge_diff.size(); ++i) { + if (edge_diff[i].array().abs().sum() == 0) { + tree.Merge(edge_values[i].x, edge_values[i].y); + } + } + tree.BuildCompactParent(); + std::map<int, int> compact_sharp_indices; + std::set<DEdge> compact_sharp_edges; + for (int i = 0; i < sharp_edges.size(); ++i) { + if (sharp_edges[i] == 1) { + int v1 = tree.Index(F(i % 3, i / 3)); + int v2 = tree.Index(F((i + 1) % 3, i / 3)); + compact_sharp_edges.insert(DEdge(v1, v2)); + } + } + for (auto& v : sharp_vertices) { + int p = tree.Index(v); + if (compact_sharp_indices.count(p) == 0) { + int s = compact_sharp_indices.size(); + compact_sharp_indices[p] = s; + } + } + std::map<int, std::set<int>> sharp_vertices_links; + std::set<DEdge> sharp_dedges; + for (int i = 0; i < sharp_edges.size(); ++i) { + if (sharp_edges[i]) { + int v1 = F(i % 3, i / 3); + int v2 = F((i + 1) % 3, i / 3); + if (sharp_vertices_links.count(v1) == 0) sharp_vertices_links[v1] = std::set<int>(); + sharp_vertices_links[v1].insert(v2); + sharp_dedges.insert(DEdge(v1, v2)); + } + } + std::vector<std::vector<int>> sharp_to_original_indices(compact_sharp_indices.size()); + for (auto& v : sharp_vertices_links) { + if (v.second.size() == 2) continue; + int p = tree.Index(v.first); + sharp_to_original_indices[compact_sharp_indices[p]].push_back(v.first); + } + for (auto& v : sharp_vertices_links) { + if (v.second.size() != 2) continue; + int p = tree.Index(v.first); + sharp_to_original_indices[compact_sharp_indices[p]].push_back(v.first); + } + + for (int i = 0; i < V.cols(); ++i) { + if (sharp_vertices.count(i)) continue; + int p = tree.Index(i); + if (compact_sharp_indices.count(p)) + sharp_to_original_indices[compact_sharp_indices[p]].push_back(i); + } + + int num = sharp_to_original_indices.size(); + std::vector<std::set<int>> links(sharp_to_original_indices.size()); + for (int e = 0; e < edge_diff.size(); ++e) { + int v1 = edge_values[e].x; + int v2 = edge_values[e].y; + int p1 = tree.Index(v1); + int p2 = tree.Index(v2); + if (p1 == p2 || compact_sharp_edges.count(DEdge(p1, p2)) == 0) continue; + p1 = compact_sharp_indices[p1]; + p2 = compact_sharp_indices[p2]; + + links[p1].insert(p2); + links[p2].insert(p1); + } + + std::vector<int> hash(links.size(), 0); + std::vector<std::vector<Vector3d>> loops; + for (int i = 0; i < num; ++i) { + if (hash[i] == 1) continue; + if (links[i].size() == 2) { + std::vector<int> q; + q.push_back(i); + hash[i] = 1; + int v = i; + int prev_v = -1; + bool is_loop = false; + while (links[v].size() == 2) { + int next_v = -1; + for (auto nv : links[v]) + if (nv != prev_v) next_v = nv; + if (hash[next_v]) { + is_loop = true; + break; + } + if (links[next_v].size() == 2) hash[next_v] = true; + q.push_back(next_v); + prev_v = v; + v = next_v; + } + if (!is_loop && q.size() >= 2) { + std::vector<int> q1; + int v = i; + int prev_v = q[1]; + while (links[v].size() == 2) { + int next_v = -1; + for (auto nv : links[v]) + if (nv != prev_v) next_v = nv; + if (hash[next_v]) { + is_loop = true; + break; + } + if (links[next_v].size() == 2) hash[next_v] = true; + q1.push_back(next_v); + prev_v = v; + v = next_v; + } + std::reverse(q1.begin(), q1.end()); + q1.insert(q1.end(), q.begin(), q.end()); + std::swap(q1, q); + } + if (q.size() < 3) continue; + if (is_loop) q.push_back(q.front()); + double len = 0, scale = 0; + std::vector<Vector3d> o(q.size()), new_o(q.size()); + std::vector<double> sc(q.size()); + + for (int i = 0; i < q.size() - 1; ++i) { + int v1 = q[i]; + int v2 = q[i + 1]; + auto it = links[v1].find(v2); + if (it == links[v1].end()) { + printf("Non exist!\n"); + exit(0); + } + } + + for (int i = 0; i < q.size(); ++i) { + if (sharp_to_original_indices[q[i]].size() == 0) { + continue; + } + o[i] = O.col(sharp_to_original_indices[q[i]][0]); + Vector3d qx = Q.col(sharp_to_original_indices[q[i]][0]); + Vector3d qy = Vector3d(N.col(sharp_to_original_indices[q[i]][0])).cross(qx); + int fst = sharp_to_original_indices[q[1]][0]; + Vector3d dis = (i == 0) ? (Vector3d(O.col(fst)) - o[i]) : o[i] - o[i - 1]; + if (with_scale) + sc[i] = (abs(qx.dot(dis)) > abs(qy.dot(dis))) + ? S(0, sharp_to_original_indices[q[i]][0]) + : S(1, sharp_to_original_indices[q[i]][0]); + else + sc[i] = 1; + new_o[i] = o[i]; + } + + if (is_loop) { + for (int i = 0; i < q.size(); ++i) { + Vector3d dir = + (o[(i + 1) % q.size()] - o[(i + q.size() - 1) % q.size()]).normalized(); + for (auto& ind : sharp_to_original_indices[q[i]]) { + sharp_constraints[ind] = std::make_pair(o[i], dir); + } + } + } else { + for (int i = 0; i < q.size(); ++i) { + Vector3d dir(0, 0, 0); + if (i != 0 && i + 1 != q.size()) + dir = (o[i + 1] - o[i - 1]).normalized(); + else if (links[q[i]].size() == 1) { + if (i == 0) + dir = (o[i + 1] - o[i]).normalized(); + else + dir = (o[i] - o[i - 1]).normalized(); + } + for (auto& ind : sharp_to_original_indices[q[i]]) { + sharp_constraints[ind] = std::make_pair(o[i], dir); + } + } + } + + for (int i = 0; i < q.size() - 1; ++i) { + len += (o[i + 1] - o[i]).norm(); + scale += sc[i]; + } + + int next_m = q.size() - 1; + + double left_norm = len * sc[0] / scale; + int current_v = 0; + double current_norm = (o[1] - o[0]).norm(); + for (int i = 1; i < next_m; ++i) { + while (left_norm >= current_norm) { + left_norm -= current_norm; + current_v += 1; + current_norm = (o[current_v + 1] - o[current_v]).norm(); + } + new_o[i] = + (o[current_v + 1] * left_norm + o[current_v] * (current_norm - left_norm)) / + current_norm; + o[current_v] = new_o[i]; + current_norm -= left_norm; + left_norm = len * sc[current_v] / scale; + } + + for (int i = 0; i < q.size(); ++i) { + for (auto v : sharp_to_original_indices[q[i]]) { + O.col(v) = new_o[i]; + } + } + + loops.push_back(new_o); + } + } + return; + std::ofstream os("/Users/jingwei/Desktop/sharp.obj"); + for (int i = 0; i < loops.size(); ++i) { + for (auto& v : loops[i]) { + os << "v " << v[0] << " " << v[1] << " " << v[2] << "\n"; + } + } + int offset = 1; + for (int i = 0; i < loops.size(); ++i) { + for (int j = 0; j < loops[i].size() - 1; ++j) { + os << "l " << offset + j << " " << offset + j + 1 << "\n"; + } + offset += loops[i].size(); + } + os.close(); + exit(0); +} + +void Optimizer::optimize_positions_fixed( + Hierarchy& mRes, std::vector<DEdge>& edge_values, std::vector<Vector2i>& edge_diff, + std::set<int>& sharp_vertices, std::map<int, std::pair<Vector3d, Vector3d>>& sharp_constraints, + int with_scale) { + auto& V = mRes.mV[0]; + auto& F = mRes.mF; + auto& Q = mRes.mQ[0]; + auto& N = mRes.mN[0]; + auto& O = mRes.mO[0]; + auto& S = mRes.mS[0]; + + DisajointTree tree(V.cols()); + for (int i = 0; i < edge_diff.size(); ++i) { + if (edge_diff[i].array().abs().sum() == 0) { + tree.Merge(edge_values[i].x, edge_values[i].y); + } + } + tree.BuildCompactParent(); + int num = tree.CompactNum(); + + // Find the most descriptive vertex + std::vector<Vector3d> v_positions(num, Vector3d(0, 0, 0)); + std::vector<int> v_count(num); + std::vector<double> v_distance(num, 1e30); + std::vector<int> v_index(num, -1); + + for (int i = 0; i < V.cols(); ++i) { + v_positions[tree.Index(i)] += O.col(i); + v_count[tree.Index(i)] += 1; + } + for (int i = 0; i < num; ++i) { + if (v_count[i] > 0) v_positions[i] /= v_count[i]; + } + for (int i = 0; i < V.cols(); ++i) { + int p = tree.Index(i); + double dis = (v_positions[p] - V.col(i)).squaredNorm(); + if (dis < v_distance[p]) { + v_distance[p] = dis; + v_index[p] = i; + } + } + + std::set<int> compact_sharp_vertices; + for (auto& v : sharp_vertices) { + v_positions[tree.Index(v)] = O.col(v); + v_index[tree.Index(v)] = v; + V.col(v) = O.col(v); + compact_sharp_vertices.insert(tree.Index(v)); + } + std::vector<std::map<int, std::pair<int, Vector3d>>> ideal_distances(tree.CompactNum()); + for (int e = 0; e < edge_diff.size(); ++e) { + int v1 = edge_values[e].x; + int v2 = edge_values[e].y; + + int p1 = tree.Index(v1); + int p2 = tree.Index(v2); + int q1 = v_index[p1]; + int q2 = v_index[p2]; + + Vector3d q_1 = Q.col(v1); + Vector3d q_2 = Q.col(v2); + + Vector3d n_1 = N.col(v1); + Vector3d n_2 = N.col(v2); + Vector3d q_1_y = n_1.cross(q_1); + Vector3d q_2_y = n_2.cross(q_2); + auto index = compat_orientation_extrinsic_index_4(q_1, n_1, q_2, n_2); + double s_x1 = S(0, v1), s_y1 = S(1, v1); + double s_x2 = S(0, v2), s_y2 = S(1, v2); + int rank_diff = (index.second + 4 - index.first) % 4; + if (rank_diff % 2 == 1) std::swap(s_x2, s_y2); + Vector3d qd_x = 0.5 * (rotate90_by(q_2, n_2, rank_diff) + q_1); + Vector3d qd_y = 0.5 * (rotate90_by(q_2_y, n_2, rank_diff) + q_1_y); + double scale_x = (with_scale ? 0.5 * (s_x1 + s_x2) : 1) * mRes.mScale; + double scale_y = (with_scale ? 0.5 * (s_y1 + s_y2) : 1) * mRes.mScale; + Vector2i diff = edge_diff[e]; + + Vector3d origin1 = + /*(sharp_constraints.count(q1)) ? sharp_constraints[q1].first : */ V.col(q1); + Vector3d origin2 = + /*(sharp_constraints.count(q2)) ? sharp_constraints[q2].first : */ V.col(q2); + Vector3d C = diff[0] * scale_x * qd_x + diff[1] * scale_y * qd_y + origin1 - origin2; + auto it = ideal_distances[p1].find(p2); + if (it == ideal_distances[p1].end()) { + ideal_distances[p1][p2] = std::make_pair(1, C); + } else { + it->second.first += 1; + it->second.second += C; + } + } + + std::vector<std::unordered_map<int, double>> entries(num * 2); + std::vector<double> b(num * 2); + + for (int m = 0; m < num; ++m) { + int v1 = v_index[m]; + for (auto& info : ideal_distances[m]) { + int v2 = v_index[info.first]; + Vector3d q_1 = Q.col(v1); + Vector3d q_2 = Q.col(v2); + if (sharp_constraints.count(v1)) { + Vector3d d = sharp_constraints[v1].second; + if (d != Vector3d::Zero()) q_1 = d; + } + if (sharp_constraints.count(v2)) { + Vector3d d = sharp_constraints[v2].second; + if (d != Vector3d::Zero()) q_2 = d; + } + + Vector3d n_1 = N.col(v1); + Vector3d n_2 = N.col(v2); + Vector3d q_1_y = n_1.cross(q_1); + Vector3d q_2_y = n_2.cross(q_2); + Vector3d weights[] = {q_2, q_2_y, -q_1, -q_1_y}; + int vid[] = {info.first * 2, info.first * 2 + 1, m * 2, m * 2 + 1}; + Vector3d dis = info.second.second / info.second.first; + double lambda = 1; + if (sharp_vertices.count(v1) && sharp_vertices.count(v2)) lambda = 1; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + auto it = entries[vid[i]].find(vid[j]); + if (it == entries[vid[i]].end()) { + entries[vid[i]][vid[j]] = weights[i].dot(weights[j]) * lambda; + } else { + entries[vid[i]][vid[j]] += weights[i].dot(weights[j]) * lambda; + } + } + b[vid[i]] += weights[i].dot(dis) * lambda; + } + } + } + + std::vector<int> fixed_dim(num * 2, 0); + std::vector<double> x(num * 2); +#ifdef WITH_OMP +#pragma omp parallel for +#endif + for (int i = 0; i < num; ++i) { + int p = v_index[i]; + Vector3d q = Q.col(p); + + if (sharp_constraints.count(p)) { + Vector3d dir = sharp_constraints[p].second; + fixed_dim[i * 2 + 1] = 1; + if (dir != Vector3d::Zero()) { + q = dir; + } else + fixed_dim[i * 2] = 1; + } + Vector3d n = N.col(p); + Vector3d q_y = n.cross(q); + x[i * 2] = (v_positions[i] - V.col(p)).dot(q); + x[i * 2 + 1] = (v_positions[i] - V.col(p)).dot(q_y); + } + + // fix sharp edges + for (int i = 0; i < entries.size(); ++i) { + if (fixed_dim[i]) { + b[i] = x[i]; + entries[i].clear(); + entries[i][i] = 1; + } else { + std::unordered_map<int, double> newmap; + for (auto& rec : entries[i]) { + if (fixed_dim[rec.first]) { + b[i] -= rec.second * x[rec.first]; + } else { + newmap[rec.first] = rec.second; + } + } + std::swap(entries[i], newmap); + } + } + for (int i = 0; i < entries.size(); ++i) { + if (entries[i].size() == 0) { + entries[i][i] = 1; + } + } + + std::vector<Eigen::Triplet<double>> lhsTriplets; + lhsTriplets.reserve(F.cols() * 6); + Eigen::SparseMatrix<double> A(num * 2, num * 2); + VectorXd rhs(num * 2); + rhs.setZero(); + for (int i = 0; i < entries.size(); ++i) { + rhs(i) = b[i]; + if (std::isnan(b[i])) { + printf("Equation has nan!\n"); + exit(0); + } + for (auto& rec : entries[i]) { + lhsTriplets.push_back(Eigen::Triplet<double>(i, rec.first, rec.second)); + if (std::isnan(rec.second)) { + printf("Equation has nan!\n"); + exit(0); + } + } + } + A.setFromTriplets(lhsTriplets.begin(), lhsTriplets.end()); + +#ifdef LOG_OUTPUT + int t1 = GetCurrentTime64(); +#endif + /* + Eigen::setNbThreads(1); + ConjugateGradient<SparseMatrix<double>, Lower | Upper> solver; + VectorXd x0 = VectorXd::Map(x.data(), x.size()); + solver.setMaxIterations(40); + + solver.compute(A); + */ + LinearSolver<Eigen::SparseMatrix<double>> solver; + solver.analyzePattern(A); + solver.factorize(A); + + VectorXd x_new = solver.solve(rhs); +#ifdef LOG_OUTPUT + // std::cout << "[LSQ] n_iteration:" << solver.iterations() << std::endl; + // std::cout << "[LSQ] estimated error:" << solver.error() << std::endl; + int t2 = GetCurrentTime64(); + printf("[LSQ] Linear solver uses %lf seconds.\n", (t2 - t1) * 1e-3); +#endif + + for (int i = 0; i < x.size(); ++i) { + if (!std::isnan(x_new[i])) { + if (!fixed_dim[i / 2 * 2 + 1]) { + double total = 0; + for (auto& res : entries[i]) { + double t = x_new[res.first]; + if (std::isnan(t)) t = 0; + total += t * res.second; + } + } + x[i] = x_new[i]; + } + } + + for (int i = 0; i < O.cols(); ++i) { + int p = tree.Index(i); + int c = v_index[p]; + Vector3d q = Q.col(c); + if (fixed_dim[p * 2 + 1]) { + Vector3d dir = sharp_constraints[c].second; + if (dir != Vector3d::Zero()) q = dir; + } + Vector3d n = N.col(c); + Vector3d q_y = n.cross(q); + O.col(i) = V.col(c) + q * x[p * 2] + q_y * x[p * 2 + 1]; + } +} + +void Optimizer::optimize_integer_constraints(Hierarchy& mRes, std::map<int, int>& singularities, + bool use_minimum_cost_flow) { + int edge_capacity = 2; + bool fullFlow = false; + std::vector<std::vector<int>>& AllowChange = mRes.mAllowChanges; + for (int level = mRes.mToUpperEdges.size(); level >= 0; --level) { + auto& EdgeDiff = mRes.mEdgeDiff[level]; + auto& FQ = mRes.mFQ[level]; + auto& F2E = mRes.mF2E[level]; + auto& E2F = mRes.mE2F[level]; + + int iter = 0; + while (!fullFlow) { + std::vector<Vector4i> edge_to_constraints(E2F.size() * 2, Vector4i(-1, 0, -1, 0)); + std::vector<int> initial(F2E.size() * 2, 0); + for (int i = 0; i < F2E.size(); ++i) { + for (int j = 0; j < 3; ++j) { + int e = F2E[i][j]; + Vector2i index = rshift90(Vector2i(e * 2 + 1, e * 2 + 2), FQ[i][j]); + for (int k = 0; k < 2; ++k) { + int l = abs(index[k]); + int s = index[k] / l; + int ind = l - 1; + int equationID = i * 2 + k; + if (edge_to_constraints[ind][0] == -1) { + edge_to_constraints[ind][0] = equationID; + edge_to_constraints[ind][1] = s; + } else { + edge_to_constraints[ind][2] = equationID; + edge_to_constraints[ind][3] = s; + } + initial[equationID] += s * EdgeDiff[ind / 2][ind % 2]; + } + } + } + std::vector<std::pair<Vector2i, int>> arcs; + std::vector<int> arc_ids; + for (int i = 0; i < edge_to_constraints.size(); ++i) { + if (AllowChange[level][i] == 0) continue; + if (edge_to_constraints[i][0] == -1 || edge_to_constraints[i][2] == -1) continue; + if (edge_to_constraints[i][1] == -edge_to_constraints[i][3]) { + int v1 = edge_to_constraints[i][0]; + int v2 = edge_to_constraints[i][2]; + if (edge_to_constraints[i][1] < 0) std::swap(v1, v2); + int current_v = EdgeDiff[i / 2][i % 2]; + arcs.push_back(std::make_pair(Vector2i(v1, v2), current_v)); + if (AllowChange[level][i] == 1) + arc_ids.push_back(i + 1); + else { + arc_ids.push_back(-(i + 1)); + } + } + } + int supply = 0; + int demand = 0; + for (int i = 0; i < initial.size(); ++i) { + int init_val = initial[i]; + if (init_val > 0) { + arcs.push_back(std::make_pair(Vector2i(-1, i), initial[i])); + supply += init_val; + } else if (init_val < 0) { + demand -= init_val; + arcs.push_back(std::make_pair(Vector2i(i, initial.size()), -init_val)); + } + } + + std::unique_ptr<MaxFlowHelper> solver = nullptr; + if (use_minimum_cost_flow && level == mRes.mToUpperEdges.size()) { + lprintf("network simplex MCF is used\n"); + solver = std::make_unique<NetworkSimplexFlowHelper>(); + } else if (supply < 20) { + solver = std::make_unique<ECMaxFlowHelper>(); + } else { + solver = std::make_unique<BoykovMaxFlowHelper>(); + } + +#ifdef WITH_GUROBI + if (use_minimum_cost_flow && level == mRes.mToUpperEdges.size()) { + solver = std::make_unique<GurobiFlowHelper>(); + } +#endif + solver->resize(initial.size() + 2, arc_ids.size()); + + std::set<int> ids; + for (int i = 0; i < arcs.size(); ++i) { + int v1 = arcs[i].first[0] + 1; + int v2 = arcs[i].first[1] + 1; + int c = arcs[i].second; + if (v1 == 0 || v2 == initial.size() + 1) { + solver->addEdge(v1, v2, c, 0, -1); + } else { + if (arc_ids[i] > 0) + solver->addEdge(v1, v2, std::max(0, c + edge_capacity), + std::max(0, -c + edge_capacity), arc_ids[i] - 1); + else { + if (c > 0) + solver->addEdge(v1, v2, std::max(0, c - 1), + std::max(0, -c + edge_capacity), -1 - arc_ids[i]); + else + solver->addEdge(v1, v2, std::max(0, c + edge_capacity), + std::max(0, -c - 1), -1 - arc_ids[i]); + } + } + } + int flow_count = solver->compute(); + + solver->applyTo(EdgeDiff); + + lprintf("flow_count = %d, supply = %d\n", flow_count, supply); + if (flow_count == supply) fullFlow = true; + if (level != 0 || fullFlow) break; + edge_capacity += 1; + iter++; + if (iter == 10) { + /* Probably won't converge. */ + break; + } + lprintf("Not full flow, edge_capacity += 1\n"); + } + + if (level != 0) { + auto& nEdgeDiff = mRes.mEdgeDiff[level - 1]; + auto& toUpper = mRes.mToUpperEdges[level - 1]; + auto& toUpperOrients = mRes.mToUpperOrients[level - 1]; + for (int i = 0; i < toUpper.size(); ++i) { + if (toUpper[i] >= 0) { + int orient = (4 - toUpperOrients[i]) % 4; + nEdgeDiff[i] = rshift90(EdgeDiff[toUpper[i]], orient); + } + } + } + } +} + +#ifdef WITH_CUDA + +void Optimizer::optimize_orientations_cuda(Hierarchy& mRes) { + int levelIterations = 6; + for (int level = mRes.mN.size() - 1; level >= 0; --level) { + Link* adj = mRes.cudaAdj[level]; + int* adjOffset = mRes.cudaAdjOffset[level]; + glm::dvec3* N = mRes.cudaN[level]; + glm::dvec3* Q = mRes.cudaQ[level]; + auto& phases = mRes.cudaPhases[level]; + for (int iter = 0; iter < levelIterations; ++iter) { + for (int phase = 0; phase < phases.size(); ++phase) { + int* p = phases[phase]; + UpdateOrientation(p, mRes.mPhases[level][phase].size(), N, Q, adj, adjOffset, + mRes.mAdj[level][phase].size()); + } + } + if (level > 0) { + glm::dvec3* srcField = mRes.cudaQ[level]; + glm::ivec2* toUpper = mRes.cudaToUpper[level - 1]; + glm::dvec3* destField = mRes.cudaQ[level - 1]; + glm::dvec3* N = mRes.cudaN[level - 1]; + PropagateOrientationUpper(srcField, mRes.mQ[level].cols(), toUpper, N, destField); + } + } + + for (int l = 0; l < mRes.mN.size() - 1; ++l) { + glm::dvec3* N = mRes.cudaN[l]; + glm::dvec3* N_next = mRes.cudaN[l + 1]; + glm::dvec3* Q = mRes.cudaQ[l]; + glm::dvec3* Q_next = mRes.cudaQ[l + 1]; + glm::ivec2* toUpper = mRes.cudaToUpper[l]; + + PropagateOrientationLower(toUpper, Q, N, Q_next, N_next, mRes.mToUpper[l].cols()); + } +} + +void Optimizer::optimize_positions_cuda(Hierarchy& mRes) { + int levelIterations = 6; + for (int level = mRes.mAdj.size() - 1; level >= 0; --level) { + Link* adj = mRes.cudaAdj[level]; + int* adjOffset = mRes.cudaAdjOffset[level]; + glm::dvec3* N = mRes.cudaN[level]; + glm::dvec3* Q = mRes.cudaQ[level]; + glm::dvec3* V = mRes.cudaV[level]; + glm::dvec3* O = mRes.cudaO[level]; + std::vector<int*> phases = mRes.cudaPhases[level]; + for (int iter = 0; iter < levelIterations; ++iter) { + for (int phase = 0; phase < phases.size(); ++phase) { + int* p = phases[phase]; + UpdatePosition(p, mRes.mPhases[level][phase].size(), N, Q, adj, adjOffset, + mRes.mAdj[level][phase].size(), V, O, mRes.mScale); + } + } + if (level > 0) { + glm::dvec3* srcField = mRes.cudaO[level]; + glm::ivec2* toUpper = mRes.cudaToUpper[level - 1]; + glm::dvec3* destField = mRes.cudaO[level - 1]; + glm::dvec3* N = mRes.cudaN[level - 1]; + glm::dvec3* V = mRes.cudaV[level - 1]; + PropagatePositionUpper(srcField, mRes.mO[level].cols(), toUpper, N, V, destField); + } + } +} + +#endif + +} // namespace qflow |