// DO NOT EDIT ! // This file is generated using the MantaFlow preprocessor (prep generate). /****************************************************************************** * * MantaFlow fluid solver framework * Multigrid solver * * This program is free software, distributed under the terms of the * Apache License, Version 2.0 * http://www.apache.org/licenses/LICENSE-2.0 * * Copyright 2016, by Florian Ferstl (florian.ferstl.ff@gmail.com) * * This is an implementation of the solver developed by Dick et al. [1] * without topology awareness (= vertex duplication on coarser levels). This * simplification allows us to use regular grids for all levels of the multigrid * hierarchy and works well for moderately complex domains. * * [1] Solving the Fluid Pressure Poisson Equation Using Multigrid-Evaluation * and Improvements, C. Dick, M. Rogowsky, R. Westermann, IEEE TVCG 2015 * ******************************************************************************/ #include "multigrid.h" #define FOR_LVL(IDX, LVL) for (int IDX = 0; IDX < mb[LVL].size(); IDX++) #define FOR_VEC_MINMAX(VEC, MIN, MAX) \ Vec3i VEC; \ const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \ for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \ for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++) #define FOR_VECLIN_MINMAX(VEC, LIN, MIN, MAX) \ Vec3i VEC; \ int LIN = 0; \ const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \ for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \ for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \ for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++, LIN++) #define MG_TIMINGS(X) //#define MG_TIMINGS(X) X using namespace std; namespace Manta { // Helper class for calling mantaflow kernels with a specific number of threads class ThreadSize { IndexInt s; public: ThreadSize(IndexInt _s) { s = _s; } IndexInt size() { return s; } }; // ---------------------------------------------------------------------------- // Efficient min heap for pairs with 0<=ID mEntries; public: NKMinHeap(int N, int K) : mN(N), mK(K), mSize(0), mMinKey(-1), mEntries(N + K) { } int size() { return mSize; } int getKey(int ID) { return mEntries[mK + ID].key; } // Insert, decrease or increase key (or delete by setting key to -1) void setKey(int ID, int key); // peek min key (returns ID/key pair) std::pair peekMin(); // pop min key (returns ID/key pair) std::pair popMin(); void print(); // for debugging }; void NKMinHeap::setKey(int ID, int key) { assertMsg(0 <= ID && ID < mN, "NKMinHeap::setKey: ID out of range"); assertMsg(-1 <= key && key < mK, "NKMinHeap::setKey: key out of range"); const int kid = mK + ID; if (mEntries[kid].key == key) return; // nothing changes // remove from old key-list if ID existed previously if (mEntries[kid].key != -1) { int pred = mEntries[kid].prev; int succ = mEntries[kid].next; // can be -1 mEntries[pred].next = succ; if (succ != -1) mEntries[succ].prev = pred; // if removed key was minimum key, mMinKey may need to be updated int removedKey = mEntries[kid].key; if (removedKey == mMinKey) { if (mSize == 1) { mMinKey = -1; } else { for (; mMinKey < mK; mMinKey++) { if (mEntries[mMinKey].next != -1) break; } } } mSize--; } // set new key of ID mEntries[kid].key = key; if (key == -1) { // finished if key was set to -1 mEntries[kid].next = mEntries[kid].prev = -1; return; } // add key mSize++; if (mMinKey == -1) mMinKey = key; else mMinKey = std::min(mMinKey, key); // insert into new key-list (headed by mEntries[key]) int tmp = mEntries[key].next; mEntries[key].next = kid; mEntries[kid].prev = key; mEntries[kid].next = tmp; if (tmp != -1) mEntries[tmp].prev = kid; } std::pair NKMinHeap::peekMin() { if (mSize == 0) return std::pair(-1, -1); // error const int ID = mEntries[mMinKey].next - mK; return std::pair(ID, mMinKey); } std::pair NKMinHeap::popMin() { if (mSize == 0) return std::pair(-1, -1); // error const int kid = mEntries[mMinKey].next; const int ID = kid - mK; const int key = mMinKey; // remove from key-list int pred = mEntries[kid].prev; int succ = mEntries[kid].next; // can be -1 mEntries[pred].next = succ; if (succ != -1) mEntries[succ].prev = pred; // remove entry mEntries[kid] = Entry(); mSize--; // update mMinKey if (mSize == 0) { mMinKey = -1; } else { for (; mMinKey < mK; mMinKey++) { if (mEntries[mMinKey].next != -1) break; } } // return result return std::pair(ID, key); } void NKMinHeap::print() { std::cout << "Size: " << mSize << ", MinKey: " << mMinKey << std::endl; for (int key = 0; key < mK; key++) { if (mEntries[key].next != -1) { std::cout << "Key " << key << ": "; int kid = mEntries[key].next; while (kid != -1) { std::cout << kid - mK << " "; kid = mEntries[kid].next; } std::cout << std::endl; } } std::cout << std::endl; } // ---------------------------------------------------------------------------- // GridMg methods // // Illustration of 27-point stencil indices // y | z = -1 z = 0 z = 1 // ^ | 6 7 8, 15 16 17, 24 25 26 // | | 3 4 5, 12 13 14, 21 22 23 // o-> x | 0 1 2, 9 10 11, 18 19 20 // // Symmetric storage with only 14 entries per vertex // y | z = -1 z = 0 z = 1 // ^ | - - -, 2 3 4, 11 12 13 // | | - - -, - 0 1, 8 9 10 // o-> x | - - -, - - -, 5 6 7 GridMg::GridMg(const Vec3i &gridSize) : mNumPreSmooth(1), mNumPostSmooth(1), mCoarsestLevelAccuracy(Real(1E-8)), mTrivialEquationScale(Real(1E-6)), mIsASet(false), mIsRhsSet(false) { MG_TIMINGS(MuTime time;) // 2D or 3D mode mIs3D = (gridSize.z > 1); mDim = mIs3D ? 3 : 2; mStencilSize = mIs3D ? 14 : 5; // A has a full 27-point stencil on levels > 0 mStencilSize0 = mIs3D ? 4 : 3; // A has a 7-point stencil on level 0 mStencilMin = Vec3i(-1, -1, mIs3D ? -1 : 0); mStencilMax = Vec3i(1, 1, mIs3D ? 1 : 0); // Create level 0 (=original grid) mSize.push_back(gridSize); mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y)); int n = mSize.back().x * mSize.back().y * mSize.back().z; mA.push_back(std::vector(n * mStencilSize0)); mx.push_back(std::vector(n)); mb.push_back(std::vector(n)); mr.push_back(std::vector(n)); mType.push_back(std::vector(n)); mCGtmp1.push_back(std::vector()); mCGtmp2.push_back(std::vector()); mCGtmp3.push_back(std::vector()); mCGtmp4.push_back(std::vector()); debMsg("GridMg::GridMg level 0: " << mSize[0].x << " x " << mSize[0].y << " x " << mSize[0].z << " x ", 2); // Create coarse levels >0 for (int l = 1; l <= 100; l++) { if (mSize[l - 1].x <= 5 && mSize[l - 1].y <= 5 && mSize[l - 1].z <= 5) break; if (n <= 1000) break; mSize.push_back((mSize[l - 1] + 2) / 2); mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y)); n = mSize.back().x * mSize.back().y * mSize.back().z; mA.push_back(std::vector(n * mStencilSize)); mx.push_back(std::vector(n)); mb.push_back(std::vector(n)); mr.push_back(std::vector(n)); mType.push_back(std::vector(n)); mCGtmp1.push_back(std::vector()); mCGtmp2.push_back(std::vector()); mCGtmp3.push_back(std::vector()); mCGtmp4.push_back(std::vector()); debMsg("GridMg::GridMg level " << l << ": " << mSize[l].x << " x " << mSize[l].y << " x " << mSize[l].z << " x ", 2); } // Additional memory for CG on coarsest level mCGtmp1.back() = std::vector(n); mCGtmp2.back() = std::vector(n); mCGtmp3.back() = std::vector(n); mCGtmp4.back() = std::vector(n); MG_TIMINGS(debMsg("GridMg: Allocation done in " << time.update(), 1);) // Precalculate coarsening paths: // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) Vec3i p7stencil[7] = {Vec3i(0, 0, 0), Vec3i(-1, 0, 0), Vec3i(1, 0, 0), Vec3i(0, -1, 0), Vec3i(0, 1, 0), Vec3i(0, 0, -1), Vec3i(0, 0, 1)}; Vec3i V(1, 1, 1); // reference coarse grid vertex at (1,1,1) FOR_VEC_MINMAX(U, V * 2 + mStencilMin, V * 2 + mStencilMax) { for (int i = 0; i < 1 + 2 * mDim; i++) { Vec3i W = U + p7stencil[i]; FOR_VEC_MINMAX(N, W / 2, (W + 1) / 2) { int s = dot(N, Vec3i(1, 3, 9)); if (s >= 13) { CoarseningPath path; path.N = N - 1; // offset of N on coarse grid path.U = U - V * 2; // offset of U on fine grid path.W = W - V * 2; // offset of W on fine grid path.sc = s - 13; // stencil index corresponding to V<-N on coarse grid path.sf = (i + 1) / 2; // stencil index corresponding to U<-W on coarse grid path.inUStencil = (i % 2 == 0); // fine grid stencil entry stored at U or W? path.rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); // restriction weight V<-U path.iw = Real(1) / Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight W<-N mCoarseningPaths0.push_back(path); } } } } auto pathLess = [](const GridMg::CoarseningPath &p1, const GridMg::CoarseningPath &p2) { if (p1.sc == p2.sc) return dot(p1.U + 1, Vec3i(1, 3, 9)) < dot(p2.U + 1, Vec3i(1, 3, 9)); return p1.sc < p2.sc; }; std::sort(mCoarseningPaths0.begin(), mCoarseningPaths0.end(), pathLess); } void GridMg::analyzeStencil(int v, bool is3D, bool &isStencilSumNonZero, bool &isEquationTrivial) const { Vec3i V = vecIdx(v, 0); // collect stencil entries Real A[7]; A[0] = mA[0][v * mStencilSize0 + 0]; A[1] = mA[0][v * mStencilSize0 + 1]; A[2] = mA[0][v * mStencilSize0 + 2]; A[3] = is3D ? mA[0][v * mStencilSize0 + 3] : Real(0); A[4] = V.x != 0 ? mA[0][(v - mPitch[0].x) * mStencilSize0 + 1] : Real(0); A[5] = V.y != 0 ? mA[0][(v - mPitch[0].y) * mStencilSize0 + 2] : Real(0); A[6] = V.z != 0 && is3D ? mA[0][(v - mPitch[0].z) * mStencilSize0 + 3] : Real(0); // compute sum of stencil entries Real stencilMax = Real(0), stencilSum = Real(0); for (int i = 0; i < 7; i++) { stencilSum += A[i]; stencilMax = max(stencilMax, std::abs(A[i])); } // check if sum is numerically zero isStencilSumNonZero = std::abs(stencilSum / stencilMax) > Real(1E-6); // check for trivial equation (exact comparisons) isEquationTrivial = A[0] == Real(1) && A[1] == Real(0) && A[2] == Real(0) && A[3] == Real(0) && A[4] == Real(0) && A[5] == Real(0) && A[6] == Real(0); } struct knCopyA : public KernelBase { knCopyA(std::vector &sizeRef, std::vector &A0, int stencilSize0, bool is3D, const Grid *pA0, const Grid *pAi, const Grid *pAj, const Grid *pAk) : KernelBase(sizeRef.size()), sizeRef(sizeRef), A0(A0), stencilSize0(stencilSize0), is3D(is3D), pA0(pA0), pAi(pAi), pAj(pAj), pAk(pAk) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &sizeRef, std::vector &A0, int stencilSize0, bool is3D, const Grid *pA0, const Grid *pAi, const Grid *pAj, const Grid *pAk) const { A0[idx * stencilSize0 + 0] = (*pA0)[idx]; A0[idx * stencilSize0 + 1] = (*pAi)[idx]; A0[idx * stencilSize0 + 2] = (*pAj)[idx]; if (is3D) A0[idx * stencilSize0 + 3] = (*pAk)[idx]; } inline std::vector &getArg0() { return sizeRef; } typedef std::vector type0; inline std::vector &getArg1() { return A0; } typedef std::vector type1; inline int &getArg2() { return stencilSize0; } typedef int type2; inline bool &getArg3() { return is3D; } typedef bool type3; inline const Grid *getArg4() { return pA0; } typedef Grid type4; inline const Grid *getArg5() { return pAi; } typedef Grid type5; inline const Grid *getArg6() { return pAj; } typedef Grid type6; inline const Grid *getArg7() { return pAk; } typedef Grid type7; void runMessage() { debMsg("Executing kernel knCopyA ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &sizeRef; std::vector &A0; int stencilSize0; bool is3D; const Grid *pA0; const Grid *pAi; const Grid *pAj; const Grid *pAk; }; struct knActivateVertices : public KernelBase { knActivateVertices(std::vector &type_0, std::vector &A0, bool &nonZeroStencilSumFound, bool &trivialEquationsFound, const GridMg &mg) : KernelBase(type_0.size()), type_0(type_0), A0(A0), nonZeroStencilSumFound(nonZeroStencilSumFound), trivialEquationsFound(trivialEquationsFound), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &type_0, std::vector &A0, bool &nonZeroStencilSumFound, bool &trivialEquationsFound, const GridMg &mg) const { // active vertices on level 0 are vertices with non-zero diagonal entry in A type_0[idx] = GridMg::vtInactive; if (mg.mA[0][idx * mg.mStencilSize0 + 0] != Real(0)) { type_0[idx] = GridMg::vtActive; bool isStencilSumNonZero = false, isEquationTrivial = false; mg.analyzeStencil(int(idx), mg.mIs3D, isStencilSumNonZero, isEquationTrivial); // Note: nonZeroStencilSumFound and trivialEquationsFound are only // changed from false to true, and hence there are no race conditions. if (isStencilSumNonZero) nonZeroStencilSumFound = true; // scale down trivial equations if (isEquationTrivial) { type_0[idx] = GridMg::vtActiveTrivial; A0[idx * mg.mStencilSize0 + 0] *= mg.mTrivialEquationScale; trivialEquationsFound = true; }; } } inline std::vector &getArg0() { return type_0; } typedef std::vector type0; inline std::vector &getArg1() { return A0; } typedef std::vector type1; inline bool &getArg2() { return nonZeroStencilSumFound; } typedef bool type2; inline bool &getArg3() { return trivialEquationsFound; } typedef bool type3; inline const GridMg &getArg4() { return mg; } typedef GridMg type4; void runMessage() { debMsg("Executing kernel knActivateVertices ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &type_0; std::vector &A0; bool &nonZeroStencilSumFound; bool &trivialEquationsFound; const GridMg &mg; }; void GridMg::setA(const Grid *pA0, const Grid *pAi, const Grid *pAj, const Grid *pAk) { MG_TIMINGS(MuTime time;) // Copy level 0 knCopyA(mx[0], mA[0], mStencilSize0, mIs3D, pA0, pAi, pAj, pAk); // Determine active vertices and scale trivial equations bool nonZeroStencilSumFound = false; bool trivialEquationsFound = false; knActivateVertices(mType[0], mA[0], nonZeroStencilSumFound, trivialEquationsFound, *this); if (trivialEquationsFound) debMsg("GridMg::setA: Found at least one trivial equation", 2); // Sanity check: if all rows of A sum up to 0 --> A doesn't have full rank (opposite direction // isn't necessarily true) if (!nonZeroStencilSumFound) debMsg( "GridMg::setA: Found constant mode: A*1=0! A does not have full rank and multigrid may " "not converge. (forgot to fix a pressure value?)", 1); // Create coarse grids and operators on levels >0 for (int l = 1; l < mA.size(); l++) { MG_TIMINGS(time.get();) genCoarseGrid(l); MG_TIMINGS(debMsg("GridMg: Generated level " << l << " in " << time.update(), 1);) genCoraseGridOperator(l); MG_TIMINGS(debMsg("GridMg: Generated operator " << l << " in " << time.update(), 1);) } mIsASet = true; mIsRhsSet = false; // invalidate rhs } struct knSetRhs : public KernelBase { knSetRhs(std::vector &b, const Grid &rhs, const GridMg &mg) : KernelBase(b.size()), b(b), rhs(rhs), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &b, const Grid &rhs, const GridMg &mg) const { b[idx] = rhs[idx]; // scale down trivial equations if (mg.mType[0][idx] == GridMg::vtActiveTrivial) { b[idx] *= mg.mTrivialEquationScale; }; } inline std::vector &getArg0() { return b; } typedef std::vector type0; inline const Grid &getArg1() { return rhs; } typedef Grid type1; inline const GridMg &getArg2() { return mg; } typedef GridMg type2; void runMessage() { debMsg("Executing kernel knSetRhs ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, b, rhs, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &b; const Grid &rhs; const GridMg &mg; }; void GridMg::setRhs(const Grid &rhs) { assertMsg(mIsASet, "GridMg::setRhs Error: A has not been set."); knSetRhs(mb[0], rhs, *this); mIsRhsSet = true; } template struct knSet : public KernelBase { knSet(std::vector &data, T value) : KernelBase(data.size()), data(data), value(value) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &data, T value) const { data[idx] = value; } inline std::vector &getArg0() { return data; } typedef std::vector type0; inline T &getArg1() { return value; } typedef T type1; void runMessage() { debMsg("Executing kernel knSet ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, data, value); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &data; T value; }; template struct knCopyToVector : public KernelBase { knCopyToVector(std::vector &dst, const Grid &src) : KernelBase(dst.size()), dst(dst), src(src) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &dst, const Grid &src) const { dst[idx] = src[idx]; } inline std::vector &getArg0() { return dst; } typedef std::vector type0; inline const Grid &getArg1() { return src; } typedef Grid type1; void runMessage() { debMsg("Executing kernel knCopyToVector ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, dst, src); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &dst; const Grid &src; }; template struct knCopyToGrid : public KernelBase { knCopyToGrid(const std::vector &src, Grid &dst) : KernelBase(src.size()), src(src), dst(dst) { runMessage(); run(); } inline void op(IndexInt idx, const std::vector &src, Grid &dst) const { dst[idx] = src[idx]; } inline const std::vector &getArg0() { return src; } typedef std::vector type0; inline Grid &getArg1() { return dst; } typedef Grid type1; void runMessage() { debMsg("Executing kernel knCopyToGrid ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, src, dst); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } const std::vector &src; Grid &dst; }; template struct knAddAssign : public KernelBase { knAddAssign(std::vector &dst, const std::vector &src) : KernelBase(dst.size()), dst(dst), src(src) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &dst, const std::vector &src) const { dst[idx] += src[idx]; } inline std::vector &getArg0() { return dst; } typedef std::vector type0; inline const std::vector &getArg1() { return src; } typedef std::vector type1; void runMessage() { debMsg("Executing kernel knAddAssign ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, dst, src); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &dst; const std::vector &src; }; Real GridMg::doVCycle(Grid &dst, const Grid *src) { MG_TIMINGS(MuTime timeSmooth; MuTime timeCG; MuTime timeI; MuTime timeR; MuTime timeTotal; MuTime time;) MG_TIMINGS(timeSmooth.clear(); timeCG.clear(); timeI.clear(); timeR.clear();) assertMsg(mIsASet && mIsRhsSet, "GridMg::doVCycle Error: A and/or rhs have not been set."); const int maxLevel = int(mA.size()) - 1; if (src) { knCopyToVector(mx[0], *src); } else { knSet(mx[0], Real(0)); } for (int l = 0; l < maxLevel; l++) { MG_TIMINGS(time.update();) for (int i = 0; i < mNumPreSmooth; i++) { smoothGS(l, false); } MG_TIMINGS(timeSmooth += time.update();) calcResidual(l); restrict(l + 1, mr[l], mb[l + 1]); knSet(mx[l + 1], Real(0)); MG_TIMINGS(timeR += time.update();) } MG_TIMINGS(time.update();) solveCG(maxLevel); MG_TIMINGS(timeCG += time.update();) for (int l = maxLevel - 1; l >= 0; l--) { MG_TIMINGS(time.update();) interpolate(l, mx[l + 1], mr[l]); knAddAssign(mx[l], mr[l]); MG_TIMINGS(timeI += time.update();) for (int i = 0; i < mNumPostSmooth; i++) { smoothGS(l, true); } MG_TIMINGS(timeSmooth += time.update();) } calcResidual(0); Real res = calcResidualNorm(0); knCopyToGrid(mx[0], dst); MG_TIMINGS(debMsg("GridMg: Finished VCycle in " << timeTotal.update() << " (smoothing: " << timeSmooth << ", CG: " << timeCG << ", R: " << timeR << ", I: " << timeI << ")", 1);) return res; } struct knActivateCoarseVertices : public KernelBase { knActivateCoarseVertices(std::vector &type, int unused) : KernelBase(type.size()), type(type), unused(unused) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &type, int unused) const { // set all remaining 'free' vertices to 'removed', if (type[idx] == GridMg::vtFree) type[idx] = GridMg::vtRemoved; // then convert 'zero' vertices to 'active' and 'removed' vertices to 'inactive' if (type[idx] == GridMg::vtZero) type[idx] = GridMg::vtActive; if (type[idx] == GridMg::vtRemoved) type[idx] = GridMg::vtInactive; } inline std::vector &getArg0() { return type; } typedef std::vector type0; inline int &getArg1() { return unused; } typedef int type1; void runMessage() { debMsg("Executing kernel knActivateCoarseVertices ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, type, unused); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &type; int unused; }; // Determine active cells on coarse level l from active cells on fine level l-1 // while ensuring a full-rank interpolation operator (see Section 3.3 in [1]). void GridMg::genCoarseGrid(int l) { // AF_Free: unused/untouched vertices // AF_Zero: vertices selected for coarser level // AF_Removed: vertices removed from coarser level enum activeFlags : char { AF_Removed = 0, AF_Zero = 1, AF_Free = 2 }; // initialize all coarse vertices with 'free' knSet(mType[l], vtFree); // initialize min heap of (ID: fine grid vertex, key: #free interpolation vertices) pairs NKMinHeap heap(int(mb[l - 1].size()), mIs3D ? 9 : 5); // max 8 (or 4 in 2D) free interpolation vertices FOR_LVL(v, l - 1) { if (mType[l - 1][v] != vtInactive) { Vec3i V = vecIdx(v, l - 1); int fiv = 1 << ((V.x % 2) + (V.y % 2) + (V.z % 2)); heap.setKey(v, fiv); } } // process fine vertices in heap consecutively, always choosing the vertex with // the currently smallest number of free interpolation vertices while (heap.size() > 0) { int v = heap.popMin().first; Vec3i V = vecIdx(v, l - 1); // loop over associated interpolation vertices of V on coarse level l: // the first encountered 'free' vertex is set to 'zero', // all remaining 'free' vertices are set to 'removed'. bool vdone = false; FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2) { int i = linIdx(I, l); if (mType[l][i] == vtFree) { if (vdone) { mType[l][i] = vtRemoved; } else { mType[l][i] = vtZero; vdone = true; } // update #free interpolation vertices in heap: // loop over all associated restriction vertices of I on fine level l-1 FOR_VEC_MINMAX(R, vmax(0, I * 2 - 1), vmin(mSize[l - 1] - 1, I * 2 + 1)) { int r = linIdx(R, l - 1); int key = heap.getKey(r); if (key > 1) { heap.setKey(r, key - 1); } // decrease key of r else if (key > -1) { heap.setKey(r, -1); } // removes r from heap } } } } knActivateCoarseVertices(mType[l], 0); } struct knGenCoarseGridOperator : public KernelBase { knGenCoarseGridOperator(std::vector &sizeRef, std::vector &A, int l, const GridMg &mg) : KernelBase(sizeRef.size()), sizeRef(sizeRef), A(A), l(l), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &sizeRef, std::vector &A, int l, const GridMg &mg) const { if (mg.mType[l][idx] == GridMg::vtInactive) return; for (int i = 0; i < mg.mStencilSize; i++) { A[idx * mg.mStencilSize + i] = Real(0); } // clear stencil Vec3i V = mg.vecIdx(int(idx), l); // Calculate the stencil of A_l at V by considering all vertex paths of the form: // (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N) // V and N are vertices on the coarse grid level l, // U and W are vertices on the fine grid level l-1. if (l == 1) { // loop over precomputed paths for (auto it = mg.mCoarseningPaths0.begin(); it != mg.mCoarseningPaths0.end(); it++) { Vec3i N = V + it->N; int n = mg.linIdx(N, l); if (!mg.inGrid(N, l) || mg.mType[l][n] == GridMg::vtInactive) continue; Vec3i U = V * 2 + it->U; int u = mg.linIdx(U, l - 1); if (!mg.inGrid(U, l - 1) || mg.mType[l - 1][u] == GridMg::vtInactive) continue; Vec3i W = V * 2 + it->W; int w = mg.linIdx(W, l - 1); if (!mg.inGrid(W, l - 1) || mg.mType[l - 1][w] == GridMg::vtInactive) continue; if (it->inUStencil) { A[idx * mg.mStencilSize + it->sc] += it->rw * mg.mA[l - 1][u * mg.mStencilSize0 + it->sf] * it->iw; } else { A[idx * mg.mStencilSize + it->sc] += it->rw * mg.mA[l - 1][w * mg.mStencilSize0 + it->sf] * it->iw; } } } else { // l > 1: // loop over restriction vertices U on level l-1 associated with V FOR_VEC_MINMAX(U, vmax(0, V * 2 - 1), vmin(mg.mSize[l - 1] - 1, V * 2 + 1)) { int u = mg.linIdx(U, l - 1); if (mg.mType[l - 1][u] == GridMg::vtInactive) continue; // restriction weight Real rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); // loop over all stencil neighbors N of V on level l that can be reached via restriction to // U FOR_VEC_MINMAX(N, (U - 1) / 2, vmin(mg.mSize[l] - 1, (U + 2) / 2)) { int n = mg.linIdx(N, l); if (mg.mType[l][n] == GridMg::vtInactive) continue; // stencil entry at V associated to N (coarse grid level l) Vec3i SC = N - V + mg.mStencilMax; int sc = SC.x + 3 * SC.y + 9 * SC.z; if (sc < mg.mStencilSize - 1) continue; // loop over all vertices W which are in the stencil of A_{l-1} at U // and which interpolate from N FOR_VEC_MINMAX(W, vmax(0, vmax(U - 1, N * 2 - 1)), vmin(mg.mSize[l - 1] - 1, vmin(U + 1, N * 2 + 1))) { int w = mg.linIdx(W, l - 1); if (mg.mType[l - 1][w] == GridMg::vtInactive) continue; // stencil entry at U associated to W (fine grid level l-1) Vec3i SF = W - U + mg.mStencilMax; int sf = SF.x + 3 * SF.y + 9 * SF.z; Real iw = Real(1) / Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight if (sf < mg.mStencilSize) { A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] += rw * mg.mA[l - 1][w * mg.mStencilSize + mg.mStencilSize - 1 - sf] * iw; } else { A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] += rw * mg.mA[l - 1][u * mg.mStencilSize + sf - mg.mStencilSize + 1] * iw; } } } } } } inline std::vector &getArg0() { return sizeRef; } typedef std::vector type0; inline std::vector &getArg1() { return A; } typedef std::vector type1; inline int &getArg2() { return l; } typedef int type2; inline const GridMg &getArg3() { return mg; } typedef GridMg type3; void runMessage() { debMsg("Executing kernel knGenCoarseGridOperator ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, sizeRef, A, l, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &sizeRef; std::vector &A; int l; const GridMg &mg; }; // Calculate A_l on coarse level l from A_{l-1} on fine level l-1 using // Galerkin-based coarsening, i.e., compute A_l = R * A_{l-1} * I. void GridMg::genCoraseGridOperator(int l) { // for each coarse grid vertex V knGenCoarseGridOperator(mx[l], mA[l], l, *this); } struct knSmoothColor : public KernelBase { knSmoothColor(ThreadSize &numBlocks, std::vector &x, const Vec3i &blockSize, const std::vector &colorOffs, int l, const GridMg &mg) : KernelBase(numBlocks.size()), numBlocks(numBlocks), x(x), blockSize(blockSize), colorOffs(colorOffs), l(l), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, ThreadSize &numBlocks, std::vector &x, const Vec3i &blockSize, const std::vector &colorOffs, int l, const GridMg &mg) const { Vec3i blockOff(int(idx) % blockSize.x, (int(idx) % (blockSize.x * blockSize.y)) / blockSize.x, int(idx) / (blockSize.x * blockSize.y)); for (int off = 0; off < colorOffs.size(); off++) { Vec3i V = blockOff * 2 + colorOffs[off]; if (!mg.inGrid(V, l)) continue; const int v = mg.linIdx(V, l); if (mg.mType[l][v] == GridMg::vtInactive) continue; Real sum = mg.mb[l][v]; if (l == 0) { int n; for (int d = 0; d < mg.mDim; d++) { if (V[d] > 0) { n = v - mg.mPitch[0][d]; sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; } if (V[d] < mg.mSize[0][d] - 1) { n = v + mg.mPitch[0][d]; sum -= mg.mA[0][v * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; } } x[v] = sum / mg.mA[0][v * mg.mStencilSize0 + 0]; } else { FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) { if (s == mg.mStencilSize - 1) continue; Vec3i N = V + S; int n = mg.linIdx(N, l); if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) { if (s < mg.mStencilSize) { sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] * mg.mx[l][n]; } else { sum -= mg.mA[l][v * mg.mStencilSize + s - mg.mStencilSize + 1] * mg.mx[l][n]; } } } x[v] = sum / mg.mA[l][v * mg.mStencilSize + 0]; } } } inline ThreadSize &getArg0() { return numBlocks; } typedef ThreadSize type0; inline std::vector &getArg1() { return x; } typedef std::vector type1; inline const Vec3i &getArg2() { return blockSize; } typedef Vec3i type2; inline const std::vector &getArg3() { return colorOffs; } typedef std::vector type3; inline int &getArg4() { return l; } typedef int type4; inline const GridMg &getArg5() { return mg; } typedef GridMg type5; void runMessage() { debMsg("Executing kernel knSmoothColor ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, numBlocks, x, blockSize, colorOffs, l, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } ThreadSize &numBlocks; std::vector &x; const Vec3i &blockSize; const std::vector &colorOffs; int l; const GridMg &mg; }; void GridMg::smoothGS(int l, bool reversedOrder) { // Multicolor Gauss-Seidel with two colors for the 5/7-point stencil on level 0 // and with four/eight colors for the 9/27-point stencil on levels > 0 std::vector> colorOffs; const Vec3i a[8] = {Vec3i(0, 0, 0), Vec3i(1, 0, 0), Vec3i(0, 1, 0), Vec3i(1, 1, 0), Vec3i(0, 0, 1), Vec3i(1, 0, 1), Vec3i(0, 1, 1), Vec3i(1, 1, 1)}; if (mIs3D) { if (l == 0) colorOffs = {{a[0], a[3], a[5], a[6]}, {a[1], a[2], a[4], a[7]}}; else colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}, {a[4]}, {a[5]}, {a[6]}, {a[7]}}; } else { if (l == 0) colorOffs = {{a[0], a[3]}, {a[1], a[2]}}; else colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}}; } // Divide grid into 2x2 blocks for parallelization Vec3i blockSize = (mSize[l] + 1) / 2; ThreadSize numBlocks(blockSize.x * blockSize.y * blockSize.z); for (int c = 0; c < colorOffs.size(); c++) { int color = reversedOrder ? int(colorOffs.size()) - 1 - c : c; knSmoothColor(numBlocks, mx[l], blockSize, colorOffs[color], l, *this); } } struct knCalcResidual : public KernelBase { knCalcResidual(std::vector &r, int l, const GridMg &mg) : KernelBase(r.size()), r(r), l(l), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &r, int l, const GridMg &mg) const { if (mg.mType[l][idx] == GridMg::vtInactive) return; Vec3i V = mg.vecIdx(int(idx), l); Real sum = mg.mb[l][idx]; if (l == 0) { int n; for (int d = 0; d < mg.mDim; d++) { if (V[d] > 0) { n = int(idx) - mg.mPitch[0][d]; sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; } if (V[d] < mg.mSize[0][d] - 1) { n = int(idx) + mg.mPitch[0][d]; sum -= mg.mA[0][idx * mg.mStencilSize0 + d + 1] * mg.mx[0][n]; } } sum -= mg.mA[0][idx * mg.mStencilSize0 + 0] * mg.mx[0][idx]; } else { FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax) { Vec3i N = V + S; int n = mg.linIdx(N, l); if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) { if (s < mg.mStencilSize) { sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] * mg.mx[l][n]; } else { sum -= mg.mA[l][idx * mg.mStencilSize + s - mg.mStencilSize + 1] * mg.mx[l][n]; } } } } r[idx] = sum; } inline std::vector &getArg0() { return r; } typedef std::vector type0; inline int &getArg1() { return l; } typedef int type1; inline const GridMg &getArg2() { return mg; } typedef GridMg type2; void runMessage() { debMsg("Executing kernel knCalcResidual ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, r, l, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &r; int l; const GridMg &mg; }; void GridMg::calcResidual(int l) { knCalcResidual(mr[l], l, *this); } struct knResidualNormSumSqr : public KernelBase { knResidualNormSumSqr(const vector &r, int l, const GridMg &mg) : KernelBase(r.size()), r(r), l(l), mg(mg), result(Real(0)) { runMessage(); run(); } inline void op(IndexInt idx, const vector &r, int l, const GridMg &mg, Real &result) { if (mg.mType[l][idx] == GridMg::vtInactive) return; result += r[idx] * r[idx]; } inline operator Real() { return result; } inline Real &getRet() { return result; } inline const vector &getArg0() { return r; } typedef vector type0; inline int &getArg1() { return l; } typedef int type1; inline const GridMg &getArg2() { return mg; } typedef GridMg type2; void runMessage() { debMsg("Executing kernel knResidualNormSumSqr ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, r, l, mg, result); } void run() { tbb::parallel_reduce(tbb::blocked_range(0, size), *this); } knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split) : KernelBase(o), r(o.r), l(o.l), mg(o.mg), result(Real(0)) { } void join(const knResidualNormSumSqr &o) { result += o.result; } const vector &r; int l; const GridMg &mg; Real result; }; ; Real GridMg::calcResidualNorm(int l) { Real res = knResidualNormSumSqr(mr[l], l, *this); return std::sqrt(res); } // Standard conjugate gradients with Jacobi preconditioner // Notes: Always run at double precision. Not parallelized since // coarsest level is assumed to be small. void GridMg::solveCG(int l) { auto applyAStencil = [this](int v, int l, const std::vector &vec) -> double { Vec3i V = vecIdx(v, l); double sum = 0; if (l == 0) { int n; for (int d = 0; d < mDim; d++) { if (V[d] > 0) { n = v - mPitch[0][d]; sum += mA[0][n * mStencilSize0 + d + 1] * vec[n]; } if (V[d] < mSize[0][d] - 1) { n = v + mPitch[0][d]; sum += mA[0][v * mStencilSize0 + d + 1] * vec[n]; } } sum += mA[0][v * mStencilSize0 + 0] * vec[v]; } else { FOR_VECLIN_MINMAX(S, s, mStencilMin, mStencilMax) { Vec3i N = V + S; int n = linIdx(N, l); if (inGrid(N, l) && mType[l][n] != vtInactive) { if (s < mStencilSize) { sum += mA[l][n * mStencilSize + mStencilSize - 1 - s] * vec[n]; } else { sum += mA[l][v * mStencilSize + s - mStencilSize + 1] * vec[n]; } } } } return sum; }; std::vector &z = mCGtmp1[l]; std::vector &p = mCGtmp2[l]; std::vector &x = mCGtmp3[l]; std::vector &r = mCGtmp4[l]; // Initialization: double alphaTop = 0; double initialResidual = 0; FOR_LVL(v, l) { x[v] = mx[l][v]; } FOR_LVL(v, l) { if (mType[l][v] == vtInactive) continue; r[v] = mb[l][v] - applyAStencil(v, l, x); if (l == 0) { z[v] = r[v] / mA[0][v * mStencilSize0 + 0]; } else { z[v] = r[v] / mA[l][v * mStencilSize + 0]; } initialResidual += r[v] * r[v]; p[v] = z[v]; alphaTop += r[v] * z[v]; } initialResidual = std::sqrt(initialResidual); int iter = 0; const int maxIter = 10000; double residual = -1; // CG iterations for (; iter < maxIter && initialResidual > 1E-12; iter++) { double alphaBot = 0; FOR_LVL(v, l) { if (mType[l][v] == vtInactive) continue; z[v] = applyAStencil(v, l, p); alphaBot += p[v] * z[v]; } double alpha = alphaTop / alphaBot; double alphaTopNew = 0; residual = 0; FOR_LVL(v, l) { if (mType[l][v] == vtInactive) continue; x[v] += alpha * p[v]; r[v] -= alpha * z[v]; residual += r[v] * r[v]; if (l == 0) z[v] = r[v] / mA[0][v * mStencilSize0 + 0]; else z[v] = r[v] / mA[l][v * mStencilSize + 0]; alphaTopNew += r[v] * z[v]; } residual = std::sqrt(residual); if (residual / initialResidual < mCoarsestLevelAccuracy) break; double beta = alphaTopNew / alphaTop; alphaTop = alphaTopNew; FOR_LVL(v, l) { p[v] = z[v] + beta * p[v]; } debMsg("GridMg::solveCG i=" << iter << " rel-residual=" << (residual / initialResidual), 5); } FOR_LVL(v, l) { mx[l][v] = Real(x[v]); } if (iter == maxIter) { debMsg("GridMg::solveCG Warning: Reached maximum number of CG iterations", 1); } else { debMsg("GridMg::solveCG Info: Reached residual " << residual << " in " << iter << " iterations", 2); } } struct knRestrict : public KernelBase { knRestrict(std::vector &dst, const std::vector &src, int l_dst, const GridMg &mg) : KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &dst, const std::vector &src, int l_dst, const GridMg &mg) const { if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; const int l_src = l_dst - 1; // Coarse grid vertex Vec3i V = mg.vecIdx(int(idx), l_dst); Real sum = Real(0); FOR_VEC_MINMAX(R, vmax(0, V * 2 - 1), vmin(mg.mSize[l_src] - 1, V * 2 + 1)) { int r = mg.linIdx(R, l_src); if (mg.mType[l_src][r] == GridMg::vtInactive) continue; // restriction weight Real rw = Real(1) / Real(1 << ((R.x % 2) + (R.y % 2) + (R.z % 2))); sum += rw * src[r]; } dst[idx] = sum; } inline std::vector &getArg0() { return dst; } typedef std::vector type0; inline const std::vector &getArg1() { return src; } typedef std::vector type1; inline int &getArg2() { return l_dst; } typedef int type2; inline const GridMg &getArg3() { return mg; } typedef GridMg type3; void runMessage() { debMsg("Executing kernel knRestrict ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, dst, src, l_dst, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &dst; const std::vector &src; int l_dst; const GridMg &mg; }; void GridMg::restrict(int l_dst, const std::vector &src, std::vector &dst) const { knRestrict(dst, src, l_dst, *this); } struct knInterpolate : public KernelBase { knInterpolate(std::vector &dst, const std::vector &src, int l_dst, const GridMg &mg) : KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg) { runMessage(); run(); } inline void op(IndexInt idx, std::vector &dst, const std::vector &src, int l_dst, const GridMg &mg) const { if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; const int l_src = l_dst + 1; Vec3i V = mg.vecIdx(int(idx), l_dst); Real sum = Real(0); FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2) { int i = mg.linIdx(I, l_src); if (mg.mType[l_src][i] != GridMg::vtInactive) sum += src[i]; } // interpolation weight Real iw = Real(1) / Real(1 << ((V.x % 2) + (V.y % 2) + (V.z % 2))); dst[idx] = iw * sum; } inline std::vector &getArg0() { return dst; } typedef std::vector type0; inline const std::vector &getArg1() { return src; } typedef std::vector type1; inline int &getArg2() { return l_dst; } typedef int type2; inline const GridMg &getArg3() { return mg; } typedef GridMg type3; void runMessage() { debMsg("Executing kernel knInterpolate ", 3); debMsg("Kernel range" << " size " << size << " ", 4); }; void operator()(const tbb::blocked_range &__r) const { for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) op(idx, dst, src, l_dst, mg); } void run() { tbb::parallel_for(tbb::blocked_range(0, size), *this); } std::vector &dst; const std::vector &src; int l_dst; const GridMg &mg; }; void GridMg::interpolate(int l_dst, const std::vector &src, std::vector &dst) const { knInterpolate(dst, src, l_dst, *this); } }; // namespace Manta