diff options
Diffstat (limited to 'extern/mantaflow/preprocessed/multigrid.cpp')
-rw-r--r-- | extern/mantaflow/preprocessed/multigrid.cpp | 1857 |
1 files changed, 1857 insertions, 0 deletions
diff --git a/extern/mantaflow/preprocessed/multigrid.cpp b/extern/mantaflow/preprocessed/multigrid.cpp new file mode 100644 index 00000000000..9e35c6f9368 --- /dev/null +++ b/extern/mantaflow/preprocessed/multigrid.cpp @@ -0,0 +1,1857 @@ + + +// DO NOT EDIT ! +// This file is generated using the MantaFlow preprocessor (prep generate). + +/****************************************************************************** + * + * MantaFlow fluid solver framework + * Multigrid solver + * + * This program is free software, distributed under the terms of the + * Apache License, Version 2.0 + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Copyright 2016, by Florian Ferstl (florian.ferstl.ff@gmail.com) + * + * This is an implementation of the solver developed by Dick et al. [1] + * without topology awareness (= vertex duplication on coarser levels). This + * simplification allows us to use regular grids for all levels of the multigrid + * hierarchy and works well for moderately complex domains. + * + * [1] Solving the Fluid Pressure Poisson Equation Using Multigrid-Evaluation + * and Improvements, C. Dick, M. Rogowsky, R. Westermann, IEEE TVCG 2015 + * + ******************************************************************************/ + +#include "multigrid.h" + +#define FOR_LVL(IDX, LVL) for (int IDX = 0; IDX < mb[LVL].size(); IDX++) + +#define FOR_VEC_MINMAX(VEC, MIN, MAX) \ + Vec3i VEC; \ + const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ + for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \ + for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \ + for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++) + +#define FOR_VECLIN_MINMAX(VEC, LIN, MIN, MAX) \ + Vec3i VEC; \ + int LIN = 0; \ + const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ + for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \ + for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \ + for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++, LIN++) + +#define MG_TIMINGS(X) +//#define MG_TIMINGS(X) X + +using namespace std; +namespace Manta { + +// Helper class for calling mantaflow kernels with a specific number of threads +class ThreadSize { + IndexInt s; + + public: + ThreadSize(IndexInt _s) + { + s = _s; + } + IndexInt size() + { + return s; + } +}; + +// ---------------------------------------------------------------------------- +// Efficient min heap for <ID, key> pairs with 0<=ID<N and 0<=key<K +// (elements are stored in K buckets, where each bucket is a doubly linked list). +// - if K<<N, all ops are O(1) on avg (worst case O(K)). +// - memory usage O(K+N): (K+N) * 3 * sizeof(int). +class NKMinHeap { + private: + struct Entry { + int key, prev, next; + Entry() : key(-1), prev(-1), next(-1) + { + } + }; + + int mN, mK, mSize, mMinKey; + + // Double linked lists of IDs, one for each bucket/key. + // The first K entries are the buckets' head pointers, + // and the last N entries correspond to the IDs. + std::vector<Entry> mEntries; + + public: + NKMinHeap(int N, int K) : mN(N), mK(K), mSize(0), mMinKey(-1), mEntries(N + K) + { + } + + int size() + { + return mSize; + } + int getKey(int ID) + { + return mEntries[mK + ID].key; + } + + // Insert, decrease or increase key (or delete by setting key to -1) + void setKey(int ID, int key); + + // peek min key (returns ID/key pair) + std::pair<int, int> peekMin(); + + // pop min key (returns ID/key pair) + std::pair<int, int> popMin(); + + void print(); // for debugging +}; + +void NKMinHeap::setKey(int ID, int key) +{ + assertMsg(0 <= ID && ID < mN, "NKMinHeap::setKey: ID out of range"); + assertMsg(-1 <= key && key < mK, "NKMinHeap::setKey: key out of range"); + + const int kid = mK + ID; + + if (mEntries[kid].key == key) + return; // nothing changes + + // remove from old key-list if ID existed previously + if (mEntries[kid].key != -1) { + int pred = mEntries[kid].prev; + int succ = mEntries[kid].next; // can be -1 + + mEntries[pred].next = succ; + if (succ != -1) + mEntries[succ].prev = pred; + + // if removed key was minimum key, mMinKey may need to be updated + int removedKey = mEntries[kid].key; + if (removedKey == mMinKey) { + if (mSize == 1) { + mMinKey = -1; + } + else { + for (; mMinKey < mK; mMinKey++) { + if (mEntries[mMinKey].next != -1) + break; + } + } + } + + mSize--; + } + + // set new key of ID + mEntries[kid].key = key; + + if (key == -1) { + // finished if key was set to -1 + mEntries[kid].next = mEntries[kid].prev = -1; + return; + } + + // add key + mSize++; + if (mMinKey == -1) + mMinKey = key; + else + mMinKey = std::min(mMinKey, key); + + // insert into new key-list (headed by mEntries[key]) + int tmp = mEntries[key].next; + + mEntries[key].next = kid; + mEntries[kid].prev = key; + + mEntries[kid].next = tmp; + if (tmp != -1) + mEntries[tmp].prev = kid; +} + +std::pair<int, int> NKMinHeap::peekMin() +{ + if (mSize == 0) + return std::pair<int, int>(-1, -1); // error + + const int ID = mEntries[mMinKey].next - mK; + return std::pair<int, int>(ID, mMinKey); +} + +std::pair<int, int> NKMinHeap::popMin() +{ + if (mSize == 0) + return std::pair<int, int>(-1, -1); // error + + const int kid = mEntries[mMinKey].next; + const int ID = kid - mK; + const int key = mMinKey; + + // remove from key-list + int pred = mEntries[kid].prev; + int succ = mEntries[kid].next; // can be -1 + + mEntries[pred].next = succ; + if (succ != -1) + mEntries[succ].prev = pred; + + // remove entry + mEntries[kid] = Entry(); + mSize--; + + // update mMinKey + if (mSize == 0) { + mMinKey = -1; + } + else { + for (; mMinKey < mK; mMinKey++) { + if (mEntries[mMinKey].next != -1) + break; + } + } + + // return result + return std::pair<int, int>(ID, key); +} + +void NKMinHeap::print() +{ + std::cout << "Size: " << mSize << ", MinKey: " << mMinKey << std::endl; + for (int key = 0; key < mK; key++) { + if (mEntries[key].next != -1) { + std::cout << "Key " << key << ": "; + int kid = mEntries[key].next; + while (kid != -1) { + std::cout << kid - mK << " "; + kid = mEntries[kid].next; + } + std::cout << std::endl; + } + } + std::cout << std::endl; +} + +// ---------------------------------------------------------------------------- +// GridMg methods +// +// Illustration of 27-point stencil indices +// y | z = -1 z = 0 z = 1 +// ^ | 6 7 8, 15 16 17, 24 25 26 +// | | 3 4 5, 12 13 14, 21 22 23 +// o-> x | 0 1 2, 9 10 11, 18 19 20 +// +// Symmetric storage with only 14 entries per vertex +// y | z = -1 z = 0 z = 1 +// ^ | - - -, 2 3 4, 11 12 13 +// | | - - -, - 0 1, 8 9 10 +// o-> x | - - -, - - -, 5 6 7 + +GridMg::GridMg(const Vec3i &gridSize) + : mNumPreSmooth(1), + mNumPostSmooth(1), + mCoarsestLevelAccuracy(Real(1E-8)), + mTrivialEquationScale(Real(1E-6)), + mIsASet(false), + mIsRhsSet(false) +{ + MG_TIMINGS(MuTime time;) + + // 2D or 3D mode + mIs3D = (gridSize.z > 1); + mDim = mIs3D ? 3 : 2; + mStencilSize = mIs3D ? 14 : 5; // A has a full 27-point stencil on levels > 0 + mStencilSize0 = mIs3D ? 4 : 3; // A has a 7-point stencil on level 0 + mStencilMin = Vec3i(-1, -1, mIs3D ? -1 : 0); + mStencilMax = Vec3i(1, 1, mIs3D ? 1 : 0); + + // Create level 0 (=original grid) + mSize.push_back(gridSize); + mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y)); + int n = mSize.back().x * mSize.back().y * mSize.back().z; + + mA.push_back(std::vector<Real>(n * mStencilSize0)); + mx.push_back(std::vector<Real>(n)); + mb.push_back(std::vector<Real>(n)); + mr.push_back(std::vector<Real>(n)); + mType.push_back(std::vector<VertexType>(n)); + mCGtmp1.push_back(std::vector<double>()); + mCGtmp2.push_back(std::vector<double>()); + mCGtmp3.push_back(std::vector<double>()); + mCGtmp4.push_back(std::vector<double>()); + + debMsg("GridMg::GridMg level 0: " << mSize[0].x << " x " << mSize[0].y << " x " << mSize[0].z + << " x ", + 2); + + // Create coarse levels >0 + for (int l = 1; l <= 100; l++) { + if (mSize[l - 1].x <= 5 && mSize[l - 1].y <= 5 && mSize[l - 1].z <= 5) + break; + if (n <= 1000) + break; + + mSize.push_back((mSize[l - 1] + 2) / 2); + mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y)); + n = mSize.back().x * mSize.back().y * mSize.back().z; + + mA.push_back(std::vector<Real>(n * mStencilSize)); + mx.push_back(std::vector<Real>(n)); + mb.push_back(std::vector<Real>(n)); + mr.push_back(std::vector<Real>(n)); + mType.push_back(std::vector<VertexType>(n)); + mCGtmp1.push_back(std::vector<double>()); + mCGtmp2.push_back(std::vector<double>()); + mCGtmp3.push_back(std::vector<double>()); + mCGtmp4.push_back(std::vector<double>()); + + debMsg("GridMg::GridMg level " << l << ": " << mSize[l].x << " x " << mSize[l].y << " x " + << mSize[l].z << " x ", + 2); + } + + // Additional memory for CG on coarsest level + mCGtmp1.back() = std::vector<double>(n); + mCGtmp2.back() = std::vector<double>(n); + mCGtmp3.back() = std::vector<double>(n); + mCGtmp4.back() = std::vector<double>(n); + + MG_TIMINGS(debMsg("GridMg: Allocation done in " << time.update(), 1);) + + // Precalculate coarsening paths: + // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) + Vec3i p7stencil[7] = {Vec3i(0, 0, 0), + Vec3i(-1, 0, 0), + Vec3i(1, 0, 0), + Vec3i(0, -1, 0), + Vec3i(0, 1, 0), + Vec3i(0, 0, -1), + Vec3i(0, 0, 1)}; + Vec3i V(1, 1, 1); // reference coarse grid vertex at (1,1,1) + FOR_VEC_MINMAX(U, V * 2 + mStencilMin, V * 2 + mStencilMax) + { + for (int i = 0; i < 1 + 2 * mDim; i++) { + Vec3i W = U + p7stencil[i]; + FOR_VEC_MINMAX(N, W / 2, (W + 1) / 2) + { + int s = dot(N, Vec3i(1, 3, 9)); + + if (s >= 13) { + CoarseningPath path; + path.N = N - 1; // offset of N on coarse grid + path.U = U - V * 2; // offset of U on fine grid + path.W = W - V * 2; // offset of W on fine grid + path.sc = s - 13; // stencil index corresponding to V<-N on coarse grid + path.sf = (i + 1) / 2; // stencil index corresponding to U<-W on coarse grid + path.inUStencil = (i % 2 == 0); // fine grid stencil entry stored at U or W? + path.rw = Real(1) / + Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); // restriction weight V<-U + path.iw = Real(1) / + Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight W<-N + mCoarseningPaths0.push_back(path); + } + } + } + } + + auto pathLess = [](const GridMg::CoarseningPath &p1, const GridMg::CoarseningPath &p2) { + if (p1.sc == p2.sc) + return dot(p1.U + 1, Vec3i(1, 3, 9)) < dot(p2.U + 1, Vec3i(1, 3, 9)); + return p1.sc < p2.sc; + }; + std::sort(mCoarseningPaths0.begin(), mCoarseningPaths0.end(), pathLess); +} + +void GridMg::analyzeStencil(int v, + bool is3D, + bool &isStencilSumNonZero, + bool &isEquationTrivial) const +{ + Vec3i V = vecIdx(v, 0); + + // collect stencil entries + Real A[7]; + A[0] = mA[0][v * mStencilSize0 + 0]; + A[1] = mA[0][v * mStencilSize0 + 1]; + A[2] = mA[0][v * mStencilSize0 + 2]; + A[3] = is3D ? mA[0][v * mStencilSize0 + 3] : Real(0); + A[4] = V.x != 0 ? mA[0][(v - mPitch[0].x) * mStencilSize0 + 1] : Real(0); + A[5] = V.y != 0 ? mA[0][(v - mPitch[0].y) * mStencilSize0 + 2] : Real(0); + A[6] = V.z != 0 && is3D ? mA[0][(v - mPitch[0].z) * mStencilSize0 + 3] : Real(0); + + // compute sum of stencil entries + Real stencilMax = Real(0), stencilSum = Real(0); + for (int i = 0; i < 7; i++) { + stencilSum += A[i]; + stencilMax = max(stencilMax, std::abs(A[i])); + } + + // check if sum is numerically zero + isStencilSumNonZero = std::abs(stencilSum / stencilMax) > Real(1E-6); + + // check for trivial equation (exact comparisons) + isEquationTrivial = A[0] == Real(1) && A[1] == Real(0) && A[2] == Real(0) && A[3] == Real(0) && + A[4] == Real(0) && A[5] == Real(0) && A[6] == Real(0); +} + +struct knCopyA : public KernelBase { + knCopyA(std::vector<Real> &sizeRef, + std::vector<Real> &A0, + int stencilSize0, + bool is3D, + const Grid<Real> *pA0, + const Grid<Real> *pAi, + const Grid<Real> *pAj, + const Grid<Real> *pAk) + : KernelBase(sizeRef.size()), + sizeRef(sizeRef), + A0(A0), + stencilSize0(stencilSize0), + is3D(is3D), + pA0(pA0), + pAi(pAi), + pAj(pAj), + pAk(pAk) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + std::vector<Real> &sizeRef, + std::vector<Real> &A0, + int stencilSize0, + bool is3D, + const Grid<Real> *pA0, + const Grid<Real> *pAi, + const Grid<Real> *pAj, + const Grid<Real> *pAk) const + { + A0[idx * stencilSize0 + 0] = (*pA0)[idx]; + A0[idx * stencilSize0 + 1] = (*pAi)[idx]; + A0[idx * stencilSize0 + 2] = (*pAj)[idx]; + if (is3D) + A0[idx * stencilSize0 + 3] = (*pAk)[idx]; + } + inline std::vector<Real> &getArg0() + { + return sizeRef; + } + typedef std::vector<Real> type0; + inline std::vector<Real> &getArg1() + { + return A0; + } + typedef std::vector<Real> type1; + inline int &getArg2() + { + return stencilSize0; + } + typedef int type2; + inline bool &getArg3() + { + return is3D; + } + typedef bool type3; + inline const Grid<Real> *getArg4() + { + return pA0; + } + typedef Grid<Real> type4; + inline const Grid<Real> *getArg5() + { + return pAi; + } + typedef Grid<Real> type5; + inline const Grid<Real> *getArg6() + { + return pAj; + } + typedef Grid<Real> type6; + inline const Grid<Real> *getArg7() + { + return pAk; + } + typedef Grid<Real> type7; + void runMessage() + { + debMsg("Executing kernel knCopyA ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &sizeRef; + std::vector<Real> &A0; + int stencilSize0; + bool is3D; + const Grid<Real> *pA0; + const Grid<Real> *pAi; + const Grid<Real> *pAj; + const Grid<Real> *pAk; +}; + +struct knActivateVertices : public KernelBase { + knActivateVertices(std::vector<GridMg::VertexType> &type_0, + std::vector<Real> &A0, + bool &nonZeroStencilSumFound, + bool &trivialEquationsFound, + const GridMg &mg) + : KernelBase(type_0.size()), + type_0(type_0), + A0(A0), + nonZeroStencilSumFound(nonZeroStencilSumFound), + trivialEquationsFound(trivialEquationsFound), + mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + std::vector<GridMg::VertexType> &type_0, + std::vector<Real> &A0, + bool &nonZeroStencilSumFound, + bool &trivialEquationsFound, + const GridMg &mg) const + { + // active vertices on level 0 are vertices with non-zero diagonal entry in A + type_0[idx] = GridMg::vtInactive; + + if (mg.mA[0][idx * mg.mStencilSize0 + 0] != Real(0)) { + type_0[idx] = GridMg::vtActive; + + bool isStencilSumNonZero = false, isEquationTrivial = false; + mg.analyzeStencil(int(idx), mg.mIs3D, isStencilSumNonZero, isEquationTrivial); + + // Note: nonZeroStencilSumFound and trivialEquationsFound are only + // changed from false to true, and hence there are no race conditions. + if (isStencilSumNonZero) + nonZeroStencilSumFound = true; + + // scale down trivial equations + if (isEquationTrivial) { + type_0[idx] = GridMg::vtActiveTrivial; + A0[idx * mg.mStencilSize0 + 0] *= mg.mTrivialEquationScale; + trivialEquationsFound = true; + }; + } + } + inline std::vector<GridMg::VertexType> &getArg0() + { + return type_0; + } + typedef std::vector<GridMg::VertexType> type0; + inline std::vector<Real> &getArg1() + { + return A0; + } + typedef std::vector<Real> type1; + inline bool &getArg2() + { + return nonZeroStencilSumFound; + } + typedef bool type2; + inline bool &getArg3() + { + return trivialEquationsFound; + } + typedef bool type3; + inline const GridMg &getArg4() + { + return mg; + } + typedef GridMg type4; + void runMessage() + { + debMsg("Executing kernel knActivateVertices ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<GridMg::VertexType> &type_0; + std::vector<Real> &A0; + bool &nonZeroStencilSumFound; + bool &trivialEquationsFound; + const GridMg &mg; +}; + +void GridMg::setA(const Grid<Real> *pA0, + const Grid<Real> *pAi, + const Grid<Real> *pAj, + const Grid<Real> *pAk) +{ + MG_TIMINGS(MuTime time;) + + // Copy level 0 + knCopyA(mx[0], mA[0], mStencilSize0, mIs3D, pA0, pAi, pAj, pAk); + + // Determine active vertices and scale trivial equations + bool nonZeroStencilSumFound = false; + bool trivialEquationsFound = false; + + knActivateVertices(mType[0], mA[0], nonZeroStencilSumFound, trivialEquationsFound, *this); + + if (trivialEquationsFound) + debMsg("GridMg::setA: Found at least one trivial equation", 2); + + // Sanity check: if all rows of A sum up to 0 --> A doesn't have full rank (opposite direction + // isn't necessarily true) + if (!nonZeroStencilSumFound) + debMsg( + "GridMg::setA: Found constant mode: A*1=0! A does not have full rank and multigrid may " + "not converge. (forgot to fix a pressure value?)", + 1); + + // Create coarse grids and operators on levels >0 + for (int l = 1; l < mA.size(); l++) { + MG_TIMINGS(time.get();) + genCoarseGrid(l); + MG_TIMINGS(debMsg("GridMg: Generated level " << l << " in " << time.update(), 1);) + genCoraseGridOperator(l); + MG_TIMINGS(debMsg("GridMg: Generated operator " << l << " in " << time.update(), 1);) + } + + mIsASet = true; + mIsRhsSet = false; // invalidate rhs +} + +struct knSetRhs : public KernelBase { + knSetRhs(std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) + : KernelBase(b.size()), b(b), rhs(rhs), mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) const + { + b[idx] = rhs[idx]; + + // scale down trivial equations + if (mg.mType[0][idx] == GridMg::vtActiveTrivial) { + b[idx] *= mg.mTrivialEquationScale; + }; + } + inline std::vector<Real> &getArg0() + { + return b; + } + typedef std::vector<Real> type0; + inline const Grid<Real> &getArg1() + { + return rhs; + } + typedef Grid<Real> type1; + inline const GridMg &getArg2() + { + return mg; + } + typedef GridMg type2; + void runMessage() + { + debMsg("Executing kernel knSetRhs ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, b, rhs, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &b; + const Grid<Real> &rhs; + const GridMg &mg; +}; + +void GridMg::setRhs(const Grid<Real> &rhs) +{ + assertMsg(mIsASet, "GridMg::setRhs Error: A has not been set."); + + knSetRhs(mb[0], rhs, *this); + + mIsRhsSet = true; +} + +template<class T> struct knSet : public KernelBase { + knSet(std::vector<T> &data, T value) : KernelBase(data.size()), data(data), value(value) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<T> &data, T value) const + { + data[idx] = value; + } + inline std::vector<T> &getArg0() + { + return data; + } + typedef std::vector<T> type0; + inline T &getArg1() + { + return value; + } + typedef T type1; + void runMessage() + { + debMsg("Executing kernel knSet ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, data, value); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<T> &data; + T value; +}; + +template<class T> struct knCopyToVector : public KernelBase { + knCopyToVector(std::vector<T> &dst, const Grid<T> &src) + : KernelBase(dst.size()), dst(dst), src(src) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) const + { + dst[idx] = src[idx]; + } + inline std::vector<T> &getArg0() + { + return dst; + } + typedef std::vector<T> type0; + inline const Grid<T> &getArg1() + { + return src; + } + typedef Grid<T> type1; + void runMessage() + { + debMsg("Executing kernel knCopyToVector ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, dst, src); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<T> &dst; + const Grid<T> &src; +}; + +template<class T> struct knCopyToGrid : public KernelBase { + knCopyToGrid(const std::vector<T> &src, Grid<T> &dst) + : KernelBase(src.size()), src(src), dst(dst) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) const + { + dst[idx] = src[idx]; + } + inline const std::vector<T> &getArg0() + { + return src; + } + typedef std::vector<T> type0; + inline Grid<T> &getArg1() + { + return dst; + } + typedef Grid<T> type1; + void runMessage() + { + debMsg("Executing kernel knCopyToGrid ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, src, dst); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + const std::vector<T> &src; + Grid<T> &dst; +}; + +template<class T> struct knAddAssign : public KernelBase { + knAddAssign(std::vector<T> &dst, const std::vector<T> &src) + : KernelBase(dst.size()), dst(dst), src(src) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) const + { + dst[idx] += src[idx]; + } + inline std::vector<T> &getArg0() + { + return dst; + } + typedef std::vector<T> type0; + inline const std::vector<T> &getArg1() + { + return src; + } + typedef std::vector<T> type1; + void runMessage() + { + debMsg("Executing kernel knAddAssign ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, dst, src); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<T> &dst; + const std::vector<T> &src; +}; + +Real GridMg::doVCycle(Grid<Real> &dst, const Grid<Real> *src) +{ + MG_TIMINGS(MuTime timeSmooth; MuTime timeCG; MuTime timeI; MuTime timeR; MuTime timeTotal; + MuTime time;) + MG_TIMINGS(timeSmooth.clear(); timeCG.clear(); timeI.clear(); timeR.clear();) + + assertMsg(mIsASet && mIsRhsSet, "GridMg::doVCycle Error: A and/or rhs have not been set."); + + const int maxLevel = int(mA.size()) - 1; + + if (src) { + knCopyToVector<Real>(mx[0], *src); + } + else { + knSet<Real>(mx[0], Real(0)); + } + + for (int l = 0; l < maxLevel; l++) { + MG_TIMINGS(time.update();) + for (int i = 0; i < mNumPreSmooth; i++) { + smoothGS(l, false); + } + + MG_TIMINGS(timeSmooth += time.update();) + + calcResidual(l); + restrict(l + 1, mr[l], mb[l + 1]); + + knSet<Real>(mx[l + 1], Real(0)); + + MG_TIMINGS(timeR += time.update();) + } + + MG_TIMINGS(time.update();) + solveCG(maxLevel); + MG_TIMINGS(timeCG += time.update();) + + for (int l = maxLevel - 1; l >= 0; l--) { + MG_TIMINGS(time.update();) + interpolate(l, mx[l + 1], mr[l]); + + knAddAssign<Real>(mx[l], mr[l]); + + MG_TIMINGS(timeI += time.update();) + + for (int i = 0; i < mNumPostSmooth; i++) { + smoothGS(l, true); + } + MG_TIMINGS(timeSmooth += time.update();) + } + + calcResidual(0); + Real res = calcResidualNorm(0); + + knCopyToGrid<Real>(mx[0], dst); + + MG_TIMINGS(debMsg("GridMg: Finished VCycle in " + << timeTotal.update() << " (smoothing: " << timeSmooth + << ", CG: " << timeCG << ", R: " << timeR << ", I: " << timeI << ")", + 1);) + + return res; +} + +struct knActivateCoarseVertices : public KernelBase { + knActivateCoarseVertices(std::vector<GridMg::VertexType> &type, int unused) + : KernelBase(type.size()), type(type), unused(unused) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) const + { + // set all remaining 'free' vertices to 'removed', + if (type[idx] == GridMg::vtFree) + type[idx] = GridMg::vtRemoved; + + // then convert 'zero' vertices to 'active' and 'removed' vertices to 'inactive' + if (type[idx] == GridMg::vtZero) + type[idx] = GridMg::vtActive; + if (type[idx] == GridMg::vtRemoved) + type[idx] = GridMg::vtInactive; + } + inline std::vector<GridMg::VertexType> &getArg0() + { + return type; + } + typedef std::vector<GridMg::VertexType> type0; + inline int &getArg1() + { + return unused; + } + typedef int type1; + void runMessage() + { + debMsg("Executing kernel knActivateCoarseVertices ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, type, unused); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<GridMg::VertexType> &type; + int unused; +}; + +// Determine active cells on coarse level l from active cells on fine level l-1 +// while ensuring a full-rank interpolation operator (see Section 3.3 in [1]). +void GridMg::genCoarseGrid(int l) +{ + // AF_Free: unused/untouched vertices + // AF_Zero: vertices selected for coarser level + // AF_Removed: vertices removed from coarser level + enum activeFlags : char { AF_Removed = 0, AF_Zero = 1, AF_Free = 2 }; + + // initialize all coarse vertices with 'free' + knSet<VertexType>(mType[l], vtFree); + + // initialize min heap of (ID: fine grid vertex, key: #free interpolation vertices) pairs + NKMinHeap heap(int(mb[l - 1].size()), + mIs3D ? 9 : 5); // max 8 (or 4 in 2D) free interpolation vertices + + FOR_LVL(v, l - 1) + { + if (mType[l - 1][v] != vtInactive) { + Vec3i V = vecIdx(v, l - 1); + int fiv = 1 << ((V.x % 2) + (V.y % 2) + (V.z % 2)); + heap.setKey(v, fiv); + } + } + + // process fine vertices in heap consecutively, always choosing the vertex with + // the currently smallest number of free interpolation vertices + while (heap.size() > 0) { + int v = heap.popMin().first; + Vec3i V = vecIdx(v, l - 1); + + // loop over associated interpolation vertices of V on coarse level l: + // the first encountered 'free' vertex is set to 'zero', + // all remaining 'free' vertices are set to 'removed'. + bool vdone = false; + + FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2) + { + int i = linIdx(I, l); + + if (mType[l][i] == vtFree) { + if (vdone) { + mType[l][i] = vtRemoved; + } + else { + mType[l][i] = vtZero; + vdone = true; + } + + // update #free interpolation vertices in heap: + // loop over all associated restriction vertices of I on fine level l-1 + FOR_VEC_MINMAX(R, vmax(0, I * 2 - 1), vmin(mSize[l - 1] - 1, I * 2 + 1)) + { + int r = linIdx(R, l - 1); + int key = heap.getKey(r); + + if (key > 1) { + heap.setKey(r, key - 1); + } // decrease key of r + else if (key > -1) { + heap.setKey(r, -1); + } // removes r from heap + } + } + } + } + + knActivateCoarseVertices(mType[l], 0); +} + +struct knGenCoarseGridOperator : public KernelBase { + knGenCoarseGridOperator(std::vector<Real> &sizeRef, + std::vector<Real> &A, + int l, + const GridMg &mg) + : KernelBase(sizeRef.size()), sizeRef(sizeRef), A(A), l(l), mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + std::vector<Real> &sizeRef, + std::vector<Real> &A, + int l, + const GridMg &mg) const + { + if (mg.mType[l][idx] == GridMg::vtInactive) + return; + + for (int i = 0; i < mg.mStencilSize; i++) { + A[idx * mg.mStencilSize + i] = Real(0); + } // clear stencil + + Vec3i V = mg.vecIdx(int(idx), l); + + // Calculate the stencil of A_l at V by considering all vertex paths of the form: + // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) + // V and N are vertices on the coarse grid level l, + // U and W are vertices on the fine grid level l-1. + + if (l == 1) { + // loop over precomputed paths + for (auto it = mg.mCoarseningPaths0.begin(); it != mg.mCoarseningPaths0.end(); it++) { + Vec3i N = V + it->N; + int n = mg.linIdx(N, l); + if (!mg.inGrid(N, l) || mg.mType[l][n] == GridMg::vtInactive) + continue; + + Vec3i U = V * 2 + it->U; + int u = mg.linIdx(U, l - 1); + if (!mg.inGrid(U, l - 1) || mg.mType[l - 1][u] == GridMg::vtInactive) + continue; + + Vec3i W = V * 2 + it->W; + int w = mg.linIdx(W, l - 1); + if (!mg.inGrid(W, l - 1) || mg.mType[l - 1][w] == GridMg::vtInactive) + continue; + + if (it->inUStencil) { + A[idx * mg.mStencilSize + it->sc] += it->rw * + mg.mA[l - 1][u * mg.mStencilSize0 + it->sf] * + it->iw; + } + else { + A[idx * mg.mStencilSize + it->sc] += it->rw * + mg.mA[l - 1][w * mg.mStencilSize0 + it->sf] * + it->iw; + } + } + } + else { + // l > 1: + // loop over restriction vertices U on level l-1 associated with V + FOR_VEC_MINMAX(U, vmax(0, V * 2 - 1), vmin(mg.mSize[l - 1] - 1, V * 2 + 1)) + { + int u = mg.linIdx(U, l - 1); + if (mg.mType[l - 1][u] == GridMg::vtInactive) + continue; + + // restriction weight + Real rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); + + // loop over all stencil neighbors N of V on level l that can be reached via restriction to + // U + FOR_VEC_MINMAX(N, (U - 1) / 2, vmin(mg.mSize[l] - 1, (U + 2) / 2)) + { + int n = mg.linIdx(N, l); + if (mg.mType[l][n] == GridMg::vtInactive) + continue; + + // stencil entry at V associated to N (coarse grid level l) + Vec3i SC = N - V + mg.mStencilMax; + int sc = SC.x + 3 * SC.y + 9 * SC.z; + if (sc < mg.mStencilSize - 1) + continue; + + // loop over all vertices W which are in the stencil of A_{l-1} at U + // and which interpolate from N + FOR_VEC_MINMAX(W, + vmax(0, vmax(U - 1, N * 2 - 1)), + vmin(mg.mSize[l - 1] - 1, vmin(U + 1, N * 2 + 1))) + { + int w = mg.linIdx(W, l - 1); + if (mg.mType[l - 1][w] == GridMg::vtInactive) + continue; + + // stencil entry at U associated to W (fine grid level l-1) + Vec3i SF = W - U + mg.mStencilMax; + int sf = SF.x + 3 * SF.y + 9 * SF.z; + + Real iw = Real(1) / + Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight + + if (sf < mg.mStencilSize) { + A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] += + rw * mg.mA[l - 1][w * mg.mStencilSize + mg.mStencilSize - 1 - sf] * iw; + } + else { + A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] += + rw * mg.mA[l - 1][u * mg.mStencilSize + sf - mg.mStencilSize + 1] * iw; + } + } + } + } + } + } + inline std::vector<Real> &getArg0() + { + return sizeRef; + } + typedef std::vector<Real> type0; + inline std::vector<Real> &getArg1() + { + return A; + } + typedef std::vector<Real> type1; + inline int &getArg2() + { + return l; + } + typedef int type2; + inline const GridMg &getArg3() + { + return mg; + } + typedef GridMg type3; + void runMessage() + { + debMsg("Executing kernel knGenCoarseGridOperator ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, sizeRef, A, l, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &sizeRef; + std::vector<Real> &A; + int l; + const GridMg &mg; +}; + +// Calculate A_l on coarse level l from A_{l-1} on fine level l-1 using +// Galerkin-based coarsening, i.e., compute A_l = R * A_{l-1} * I. +void GridMg::genCoraseGridOperator(int l) +{ + // for each coarse grid vertex V + knGenCoarseGridOperator(mx[l], mA[l], l, *this); +} + +struct knSmoothColor : public KernelBase { + knSmoothColor(ThreadSize &numBlocks, + std::vector<Real> &x, + const Vec3i &blockSize, + const std::vector<Vec3i> &colorOffs, + int l, + const GridMg &mg) + : KernelBase(numBlocks.size()), + numBlocks(numBlocks), + x(x), + blockSize(blockSize), + colorOffs(colorOffs), + l(l), + mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + ThreadSize &numBlocks, + std::vector<Real> &x, + const Vec3i &blockSize, + const std::vector<Vec3i> &colorOffs, + int l, + const GridMg &mg) const + { + Vec3i blockOff(int(idx) % blockSize.x, + (int(idx) % (blockSize.x * blockSize.y)) / blockSize.x, + int(idx) / (blockSize.x * blockSize.y)); + + for (int off = 0; off < colorOffs.size(); off++) { + + Vec3i V = blockOff * 2 + colorOffs[off]; + if (!mg.inGrid(V, l)) + continue; + + const int v = mg.linIdx(V, l); + if (mg.mType[l][v] == GridMg::vtInactive) + continue; + + Real sum = mg.mb[l][v]; + + if (l == 0) { + int n; + for (int d = 0; d < mg.mDim; d++) { + if (V[d] > 0) { + n = v - mg.mPitch[0][d]; + sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; + } + if (V[d] < mg.mSize[0][d] - 1) { + n = v + mg.mPitch[0][d]; + sum -= mg.mA[0][v * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; + } + } + + x[v] = sum / mg.mA[0][v * mg.mStencilSize0 + 0]; + } + else { + FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) + { + if (s == mg.mStencilSize - 1) + continue; + + Vec3i N = V + S; + int n = mg.linIdx(N, l); + + if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) { + if (s < mg.mStencilSize) { + sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] * mg.mx[l][n]; + } + else { + sum -= mg.mA[l][v * mg.mStencilSize + s - mg.mStencilSize + 1] * mg.mx[l][n]; + } + } + } + + x[v] = sum / mg.mA[l][v * mg.mStencilSize + 0]; + } + } + } + inline ThreadSize &getArg0() + { + return numBlocks; + } + typedef ThreadSize type0; + inline std::vector<Real> &getArg1() + { + return x; + } + typedef std::vector<Real> type1; + inline const Vec3i &getArg2() + { + return blockSize; + } + typedef Vec3i type2; + inline const std::vector<Vec3i> &getArg3() + { + return colorOffs; + } + typedef std::vector<Vec3i> type3; + inline int &getArg4() + { + return l; + } + typedef int type4; + inline const GridMg &getArg5() + { + return mg; + } + typedef GridMg type5; + void runMessage() + { + debMsg("Executing kernel knSmoothColor ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, numBlocks, x, blockSize, colorOffs, l, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + ThreadSize &numBlocks; + std::vector<Real> &x; + const Vec3i &blockSize; + const std::vector<Vec3i> &colorOffs; + int l; + const GridMg &mg; +}; + +void GridMg::smoothGS(int l, bool reversedOrder) +{ + // Multicolor Gauss-Seidel with two colors for the 5/7-point stencil on level 0 + // and with four/eight colors for the 9/27-point stencil on levels > 0 + std::vector<std::vector<Vec3i>> colorOffs; + const Vec3i a[8] = {Vec3i(0, 0, 0), + Vec3i(1, 0, 0), + Vec3i(0, 1, 0), + Vec3i(1, 1, 0), + Vec3i(0, 0, 1), + Vec3i(1, 0, 1), + Vec3i(0, 1, 1), + Vec3i(1, 1, 1)}; + if (mIs3D) { + if (l == 0) + colorOffs = {{a[0], a[3], a[5], a[6]}, {a[1], a[2], a[4], a[7]}}; + else + colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}, {a[4]}, {a[5]}, {a[6]}, {a[7]}}; + } + else { + if (l == 0) + colorOffs = {{a[0], a[3]}, {a[1], a[2]}}; + else + colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}}; + } + + // Divide grid into 2x2 blocks for parallelization + Vec3i blockSize = (mSize[l] + 1) / 2; + ThreadSize numBlocks(blockSize.x * blockSize.y * blockSize.z); + + for (int c = 0; c < colorOffs.size(); c++) { + int color = reversedOrder ? int(colorOffs.size()) - 1 - c : c; + + knSmoothColor(numBlocks, mx[l], blockSize, colorOffs[color], l, *this); + } +} + +struct knCalcResidual : public KernelBase { + knCalcResidual(std::vector<Real> &r, int l, const GridMg &mg) + : KernelBase(r.size()), r(r), l(l), mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) const + { + if (mg.mType[l][idx] == GridMg::vtInactive) + return; + + Vec3i V = mg.vecIdx(int(idx), l); + + Real sum = mg.mb[l][idx]; + + if (l == 0) { + int n; + for (int d = 0; d < mg.mDim; d++) { + if (V[d] > 0) { + n = int(idx) - mg.mPitch[0][d]; + sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; + } + if (V[d] < mg.mSize[0][d] - 1) { + n = int(idx) + mg.mPitch[0][d]; + sum -= mg.mA[0][idx * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; + } + } + sum -= mg.mA[0][idx * mg.mStencilSize0 + 0] * mg.mx[0][idx]; + } + else { + FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) + { + Vec3i N = V + S; + int n = mg.linIdx(N, l); + + if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) { + if (s < mg.mStencilSize) { + sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] * mg.mx[l][n]; + } + else { + sum -= mg.mA[l][idx * mg.mStencilSize + s - mg.mStencilSize + 1] * mg.mx[l][n]; + } + } + } + } + + r[idx] = sum; + } + inline std::vector<Real> &getArg0() + { + return r; + } + typedef std::vector<Real> type0; + inline int &getArg1() + { + return l; + } + typedef int type1; + inline const GridMg &getArg2() + { + return mg; + } + typedef GridMg type2; + void runMessage() + { + debMsg("Executing kernel knCalcResidual ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, r, l, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &r; + int l; + const GridMg &mg; +}; + +void GridMg::calcResidual(int l) +{ + knCalcResidual(mr[l], l, *this); +} + +struct knResidualNormSumSqr : public KernelBase { + knResidualNormSumSqr(const vector<Real> &r, int l, const GridMg &mg) + : KernelBase(r.size()), r(r), l(l), mg(mg), result(Real(0)) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, const vector<Real> &r, int l, const GridMg &mg, Real &result) + { + if (mg.mType[l][idx] == GridMg::vtInactive) + return; + + result += r[idx] * r[idx]; + } + inline operator Real() + { + return result; + } + inline Real &getRet() + { + return result; + } + inline const vector<Real> &getArg0() + { + return r; + } + typedef vector<Real> type0; + inline int &getArg1() + { + return l; + } + typedef int type1; + inline const GridMg &getArg2() + { + return mg; + } + typedef GridMg type2; + void runMessage() + { + debMsg("Executing kernel knResidualNormSumSqr ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, r, l, mg, result); + } + void run() + { + tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); + } + knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split) + : KernelBase(o), r(o.r), l(o.l), mg(o.mg), result(Real(0)) + { + } + void join(const knResidualNormSumSqr &o) + { + result += o.result; + } + const vector<Real> &r; + int l; + const GridMg &mg; + Real result; +}; +; + +Real GridMg::calcResidualNorm(int l) +{ + Real res = knResidualNormSumSqr(mr[l], l, *this); + + return std::sqrt(res); +} + +// Standard conjugate gradients with Jacobi preconditioner +// Notes: Always run at double precision. Not parallelized since +// coarsest level is assumed to be small. +void GridMg::solveCG(int l) +{ + auto applyAStencil = [this](int v, int l, const std::vector<double> &vec) -> double { + Vec3i V = vecIdx(v, l); + + double sum = 0; + + if (l == 0) { + int n; + for (int d = 0; d < mDim; d++) { + if (V[d] > 0) { + n = v - mPitch[0][d]; + sum += mA[0][n * mStencilSize0 + d + 1] * vec[n]; + } + if (V[d] < mSize[0][d] - 1) { + n = v + mPitch[0][d]; + sum += mA[0][v * mStencilSize0 + d + 1] * vec[n]; + } + } + sum += mA[0][v * mStencilSize0 + 0] * vec[v]; + } + else { + FOR_VECLIN_MINMAX(S, s, mStencilMin, mStencilMax) + { + Vec3i N = V + S; + int n = linIdx(N, l); + + if (inGrid(N, l) && mType[l][n] != vtInactive) { + if (s < mStencilSize) { + sum += mA[l][n * mStencilSize + mStencilSize - 1 - s] * vec[n]; + } + else { + sum += mA[l][v * mStencilSize + s - mStencilSize + 1] * vec[n]; + } + } + } + } + + return sum; + }; + + std::vector<double> &z = mCGtmp1[l]; + std::vector<double> &p = mCGtmp2[l]; + std::vector<double> &x = mCGtmp3[l]; + std::vector<double> &r = mCGtmp4[l]; + + // Initialization: + double alphaTop = 0; + double initialResidual = 0; + + FOR_LVL(v, l) + { + x[v] = mx[l][v]; + } + + FOR_LVL(v, l) + { + if (mType[l][v] == vtInactive) + continue; + + r[v] = mb[l][v] - applyAStencil(v, l, x); + if (l == 0) { + z[v] = r[v] / mA[0][v * mStencilSize0 + 0]; + } + else { + z[v] = r[v] / mA[l][v * mStencilSize + 0]; + } + + initialResidual += r[v] * r[v]; + p[v] = z[v]; + alphaTop += r[v] * z[v]; + } + + initialResidual = std::sqrt(initialResidual); + + int iter = 0; + const int maxIter = 10000; + double residual = -1; + + // CG iterations + for (; iter < maxIter && initialResidual > 1E-12; iter++) { + double alphaBot = 0; + + FOR_LVL(v, l) + { + if (mType[l][v] == vtInactive) + continue; + + z[v] = applyAStencil(v, l, p); + alphaBot += p[v] * z[v]; + } + + double alpha = alphaTop / alphaBot; + + double alphaTopNew = 0; + residual = 0; + + FOR_LVL(v, l) + { + if (mType[l][v] == vtInactive) + continue; + + x[v] += alpha * p[v]; + r[v] -= alpha * z[v]; + residual += r[v] * r[v]; + if (l == 0) + z[v] = r[v] / mA[0][v * mStencilSize0 + 0]; + else + z[v] = r[v] / mA[l][v * mStencilSize + 0]; + alphaTopNew += r[v] * z[v]; + } + + residual = std::sqrt(residual); + + if (residual / initialResidual < mCoarsestLevelAccuracy) + break; + + double beta = alphaTopNew / alphaTop; + alphaTop = alphaTopNew; + + FOR_LVL(v, l) + { + p[v] = z[v] + beta * p[v]; + } + debMsg("GridMg::solveCG i=" << iter << " rel-residual=" << (residual / initialResidual), 5); + } + + FOR_LVL(v, l) + { + mx[l][v] = Real(x[v]); + } + + if (iter == maxIter) { + debMsg("GridMg::solveCG Warning: Reached maximum number of CG iterations", 1); + } + else { + debMsg("GridMg::solveCG Info: Reached residual " << residual << " in " << iter + << " iterations", + 2); + } +} + +struct knRestrict : public KernelBase { + knRestrict(std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, const GridMg &mg) + : KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + std::vector<Real> &dst, + const std::vector<Real> &src, + int l_dst, + const GridMg &mg) const + { + if (mg.mType[l_dst][idx] == GridMg::vtInactive) + return; + + const int l_src = l_dst - 1; + + // Coarse grid vertex + Vec3i V = mg.vecIdx(int(idx), l_dst); + + Real sum = Real(0); + + FOR_VEC_MINMAX(R, vmax(0, V * 2 - 1), vmin(mg.mSize[l_src] - 1, V * 2 + 1)) + { + int r = mg.linIdx(R, l_src); + if (mg.mType[l_src][r] == GridMg::vtInactive) + continue; + + // restriction weight + Real rw = Real(1) / Real(1 << ((R.x % 2) + (R.y % 2) + (R.z % 2))); + + sum += rw * src[r]; + } + + dst[idx] = sum; + } + inline std::vector<Real> &getArg0() + { + return dst; + } + typedef std::vector<Real> type0; + inline const std::vector<Real> &getArg1() + { + return src; + } + typedef std::vector<Real> type1; + inline int &getArg2() + { + return l_dst; + } + typedef int type2; + inline const GridMg &getArg3() + { + return mg; + } + typedef GridMg type3; + void runMessage() + { + debMsg("Executing kernel knRestrict ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, dst, src, l_dst, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &dst; + const std::vector<Real> &src; + int l_dst; + const GridMg &mg; +}; + +void GridMg::restrict(int l_dst, const std::vector<Real> &src, std::vector<Real> &dst) const +{ + knRestrict(dst, src, l_dst, *this); +} + +struct knInterpolate : public KernelBase { + knInterpolate(std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, const GridMg &mg) + : KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg) + { + runMessage(); + run(); + } + inline void op(IndexInt idx, + std::vector<Real> &dst, + const std::vector<Real> &src, + int l_dst, + const GridMg &mg) const + { + if (mg.mType[l_dst][idx] == GridMg::vtInactive) + return; + + const int l_src = l_dst + 1; + + Vec3i V = mg.vecIdx(int(idx), l_dst); + + Real sum = Real(0); + + FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2) + { + int i = mg.linIdx(I, l_src); + if (mg.mType[l_src][i] != GridMg::vtInactive) + sum += src[i]; + } + + // interpolation weight + Real iw = Real(1) / Real(1 << ((V.x % 2) + (V.y % 2) + (V.z % 2))); + + dst[idx] = iw * sum; + } + inline std::vector<Real> &getArg0() + { + return dst; + } + typedef std::vector<Real> type0; + inline const std::vector<Real> &getArg1() + { + return src; + } + typedef std::vector<Real> type1; + inline int &getArg2() + { + return l_dst; + } + typedef int type2; + inline const GridMg &getArg3() + { + return mg; + } + typedef GridMg type3; + void runMessage() + { + debMsg("Executing kernel knInterpolate ", 3); + debMsg("Kernel range" + << " size " << size << " ", + 4); + }; + void operator()(const tbb::blocked_range<IndexInt> &__r) const + { + for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) + op(idx, dst, src, l_dst, mg); + } + void run() + { + tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + } + std::vector<Real> &dst; + const std::vector<Real> &src; + int l_dst; + const GridMg &mg; +}; + +void GridMg::interpolate(int l_dst, const std::vector<Real> &src, std::vector<Real> &dst) const +{ + knInterpolate(dst, src, l_dst, *this); +} + +}; // namespace Manta |