// DO NOT EDIT ! // This file is generated using the MantaFlow preprocessor (prep generate). /****************************************************************************** * * MantaFlow fluid solver framework * Copyright 2011 Tobias Pfaff, Nils Thuerey * * This program is free software, distributed under the terms of the * Apache License, Version 2.0 * http://www.apache.org/licenses/LICENSE-2.0 * * Conjugate gradient solver, for pressure and viscosity * ******************************************************************************/ #include "conjugategrad.h" #include "commonkernels.h" #include using namespace std::chrono; using namespace std; namespace Manta { const int CG_DEBUGLEVEL = 3; //***************************************************************************** // Precondition helpers //! Preconditioning a la Wavelet Turbulence (needs 4 add. grids) void InitPreconditionIncompCholesky(const FlagGrid &flags, Grid &A0, Grid &Ai, Grid &Aj, Grid &Ak, Grid &orgA0, Grid &orgAi, Grid &orgAj, Grid &orgAk) { // compute IC according to Golub and Van Loan A0.copyFrom(orgA0); Ai.copyFrom(orgAi); Aj.copyFrom(orgAj); Ak.copyFrom(orgAk); FOR_IJK(A0) { if (flags.isFluid(i, j, k)) { const IndexInt idx = A0.index(i, j, k); A0[idx] = sqrt(A0[idx]); // correct left and top stencil in other entries // for i = k+1:n // if (A(i,k) != 0) // A(i,k) = A(i,k) / A(k,k) Real invDiagonal = 1.0f / A0[idx]; Ai[idx] *= invDiagonal; Aj[idx] *= invDiagonal; Ak[idx] *= invDiagonal; // correct the right and bottom stencil in other entries // for j = k+1:n // for i = j:n // if (A(i,j) != 0) // A(i,j) = A(i,j) - A(i,k) * A(j,k) A0(i + 1, j, k) -= square(Ai[idx]); A0(i, j + 1, k) -= square(Aj[idx]); A0(i, j, k + 1) -= square(Ak[idx]); } } // invert A0 for faster computation later InvertCheckFluid(flags, A0); }; //! Preconditioning using modified IC ala Bridson (needs 1 add. grid) void InitPreconditionModifiedIncompCholesky2(const FlagGrid &flags, Grid &Aprecond, Grid &A0, Grid &Ai, Grid &Aj, Grid &Ak) { // compute IC according to Golub and Van Loan Aprecond.clear(); FOR_IJK(flags) { if (!flags.isFluid(i, j, k)) continue; const Real tau = 0.97; const Real sigma = 0.25; // compute modified incomplete cholesky Real e = 0.; e = A0(i, j, k) - square(Ai(i - 1, j, k) * Aprecond(i - 1, j, k)) - square(Aj(i, j - 1, k) * Aprecond(i, j - 1, k)) - square(Ak(i, j, k - 1) * Aprecond(i, j, k - 1)); e -= tau * (Ai(i - 1, j, k) * (Aj(i - 1, j, k) + Ak(i - 1, j, k)) * square(Aprecond(i - 1, j, k)) + Aj(i, j - 1, k) * (Ai(i, j - 1, k) + Ak(i, j - 1, k)) * square(Aprecond(i, j - 1, k)) + Ak(i, j, k - 1) * (Ai(i, j, k - 1) + Aj(i, j, k - 1)) * square(Aprecond(i, j, k - 1)) + 0.); // stability cutoff if (e < sigma * A0(i, j, k)) e = A0(i, j, k); Aprecond(i, j, k) = 1. / sqrt(e); } }; //! Preconditioning using multigrid ala Dick et al. void InitPreconditionMultigrid( GridMg *MG, Grid &A0, Grid &Ai, Grid &Aj, Grid &Ak, Real mAccuracy) { // build multigrid hierarchy if necessary if (!MG->isASet()) MG->setA(&A0, &Ai, &Aj, &Ak); MG->setCoarsestLevelAccuracy(mAccuracy * 1E-4); MG->setSmoothing(1, 1); }; //! Apply WT-style ICP void ApplyPreconditionIncompCholesky(Grid &dst, Grid &Var1, const FlagGrid &flags, Grid &A0, Grid &Ai, Grid &Aj, Grid &Ak, Grid &orgA0, Grid &orgAi, Grid &orgAj, Grid &orgAk) { // forward substitution FOR_IJK(dst) { if (!flags.isFluid(i, j, k)) continue; dst(i, j, k) = A0(i, j, k) * (Var1(i, j, k) - dst(i - 1, j, k) * Ai(i - 1, j, k) - dst(i, j - 1, k) * Aj(i, j - 1, k) - dst(i, j, k - 1) * Ak(i, j, k - 1)); } // backward substitution FOR_IJK_REVERSE(dst) { const IndexInt idx = A0.index(i, j, k); if (!flags.isFluid(idx)) continue; dst[idx] = A0[idx] * (dst[idx] - dst(i + 1, j, k) * Ai[idx] - dst(i, j + 1, k) * Aj[idx] - dst(i, j, k + 1) * Ak[idx]); } } //! Apply Bridson-style mICP void ApplyPreconditionModifiedIncompCholesky2(Grid &dst, Grid &Var1, const FlagGrid &flags, Grid &Aprecond, Grid &A0, Grid &Ai, Grid &Aj, Grid &Ak) { // forward substitution FOR_IJK(dst) { if (!flags.isFluid(i, j, k)) continue; const Real p = Aprecond(i, j, k); dst(i, j, k) = p * (Var1(i, j, k) - dst(i - 1, j, k) * Ai(i - 1, j, k) * Aprecond(i - 1, j, k) - dst(i, j - 1, k) * Aj(i, j - 1, k) * Aprecond(i, j - 1, k) - dst(i, j, k - 1) * Ak(i, j, k - 1) * Aprecond(i, j, k - 1)); } // backward substitution FOR_IJK_REVERSE(dst) { const IndexInt idx = A0.index(i, j, k); if (!flags.isFluid(idx)) continue; const Real p = Aprecond[idx]; dst[idx] = p * (dst[idx] - dst(i + 1, j, k) * Ai[idx] * p - dst(i, j + 1, k) * Aj[idx] * p - dst(i, j, k + 1) * Ak[idx] * p); } } //! Perform one Multigrid VCycle void ApplyPreconditionMultigrid(GridMg *pMG, Grid &dst, Grid &Var1) { // one VCycle on "A*dst = Var1" with initial guess dst=0 pMG->setRhs(Var1); pMG->doVCycle(dst); } //***************************************************************************** // Kernels //! Kernel: Compute the dot product between two Real grids /*! Uses double precision internally */ struct GridDotProduct : public KernelBase { GridDotProduct(const Grid &a, const Grid &b) : KernelBase(&a, 0), a(a), b(b), result(0.0) { runMessage(); run(); } inline void op(int i, int j, int k, const Grid &a, const Grid &b, double &result) { result += (a(i, j, k) * b(i, j, k)); } inline operator double() { return result; } inline double &getRet() { return result; } inline const Grid &getArg0() { return a; } typedef Grid type0; inline const Grid &getArg1() { return b; } typedef Grid type1; void runMessage(){}; void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { const Grid &a = getArg0(); const Grid &b = getArg1(); #pragma omp target teams distribute parallel for reduction(+:result) collapse(2) schedule(static,1) { for (int k = minZ; k < maxZ; k++) for (int j = 0; j < _maxY; j++) for (int i = 0; i < _maxX; i++) op(i, j, k, a, b, result); } { this->result = result; } } else { const int k = 0; const Grid &a = getArg0(); const Grid &b = getArg1(); #pragma omp target teams distribute parallel for reduction(+:result) collapse(1) schedule(static,1) { for (int j = 0; j < _maxY; j++) for (int i = 0; i < _maxX; i++) op(i, j, k, a, b, result); } { this->result = result; } } } const Grid &a; const Grid &b; double result; }; ; //! Kernel: compute residual (init) and add to sigma struct InitSigma : public KernelBase { InitSigma(const FlagGrid &flags, Grid &dst, Grid &rhs, Grid &temp) : KernelBase(&flags, 0), flags(flags), dst(dst), rhs(rhs), temp(temp), sigma(0) { runMessage(); run(); } inline void op(IndexInt idx, const FlagGrid &flags, Grid &dst, Grid &rhs, Grid &temp, double &sigma) { const double res = rhs[idx] - temp[idx]; dst[idx] = (Real)res; // only compute residual in fluid region if (flags.isFluid(idx)) sigma += res * res; } inline operator double() { return sigma; } inline double &getRet() { return sigma; } inline const FlagGrid &getArg0() { return flags; } typedef FlagGrid type0; inline Grid &getArg1() { return dst; } typedef Grid type1; inline Grid &getArg2() { return rhs; } typedef Grid type2; inline Grid &getArg3() { return temp; } typedef Grid type3; void runMessage(){}; void run() { const IndexInt _sz = size; #pragma omp parallel { double sigma = 0; #pragma omp for nowait for (IndexInt i = 0; i < _sz; i++) op(i, flags, dst, rhs, temp, sigma); #pragma omp critical { this->sigma += sigma; } } } const FlagGrid &flags; Grid &dst; Grid &rhs; Grid &temp; double sigma; }; ; //! Kernel: update search vector struct UpdateSearchVec : public KernelBase { UpdateSearchVec(Grid &dst, Grid &src, Real factor) : KernelBase(&dst, 0), dst(dst), src(src), factor(factor) { runMessage(); run(); } inline void op(int i, int j, int k, Grid &dst, Grid &src, Real factor) { const IndexInt idx = dst.index(i, j, k); dst[idx] = src[idx] + factor * dst[idx]; } inline Grid &getArg0() { return dst; } typedef Grid type0; inline Grid &getArg1() { return src; } typedef Grid type1; inline Real &getArg2() { return factor; } typedef Real type2; void runMessage(){}; void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { Grid &dst = getArg0(); Grid &src = getArg1(); Real &factor = getArg2(); #pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) { for (int k = minZ; k < maxZ; k++) for (int j = 0; j < _maxY; j++) for (int i = 0; i < _maxX; i++) op(i, j, k, dst, src, factor); } } else { const int k = 0; Grid &dst = getArg0(); Grid &src = getArg1(); Real &factor = getArg2(); #pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) { for (int j = 0; j < _maxY; j++) for (int i = 0; i < _maxX; i++) op(i, j, k, dst, src, factor); } } } Grid &dst; Grid &src; Real factor; }; //***************************************************************************** // CG class template GridCg::GridCg(Grid &dst, Grid &rhs, Grid &residual, Grid &search, const FlagGrid &flags, Grid &tmp, Grid *pA0, Grid *pAi, Grid *pAj, Grid *pAk) : GridCgInterface(), mInited(false), mIterations(0), mDst(dst), mRhs(rhs), mResidual(residual), mSearch(search), mFlags(flags), mTmp(tmp), mpA0(pA0), mpAi(pAi), mpAj(pAj), mpAk(pAk), mPcMethod(PC_None), mpPCA0(nullptr), mpPCAi(nullptr), mpPCAj(nullptr), mpPCAk(nullptr), mMG(nullptr), mSigma(0.), mAccuracy(VECTOR_EPSILON), mResNorm(1e20) { } template void GridCg::doInit() { mInited = true; mIterations = 0; mDst.clear(1); mResidual.copyFrom(mRhs, true, 1); // p=0, residual = b if (mPcMethod == PC_ICP) { // assertMsg(mDst.is3D(), "ICP only supports 3D grids so far"); InitPreconditionIncompCholesky( mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); ApplyPreconditionIncompCholesky( mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); } else if (mPcMethod == PC_mICP) { // assertMsg(mDst.is3D(), "mICP only supports 3D grids so far"); InitPreconditionModifiedIncompCholesky2(mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); ApplyPreconditionModifiedIncompCholesky2( mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); } else if (mPcMethod == PC_MGP) { InitPreconditionMultigrid(mMG, *mpA0, *mpAi, *mpAj, *mpAk, mAccuracy); ApplyPreconditionMultigrid(mMG, mTmp, mResidual); } else { mTmp.copyFrom(mResidual, true, 1); } mSearch.copyFrom(mTmp, true, 1); mSigma = GridDotProduct(mTmp, mResidual); } template bool GridCg::iterate(Real &time) { auto start = high_resolution_clock::now(); if (!mInited) doInit(); mIterations++; // create matrix application operator passed as template argument, // this could reinterpret the mpA pointers (not so clean right now) // tmp = applyMat(search) APPLYMAT(mFlags, mTmp, mSearch, *mpA0, *mpAi, *mpAj, *mpAk); auto stop = high_resolution_clock::now(); auto duration = duration_cast(stop - start); time += duration.count(); // std::cout << "APPLYMAT Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); // alpha = sigma/dot(tmp, search) Real dp = GridDotProduct(mTmp, mSearch); Real alpha = 0.; if (fabs(dp) > 0.) alpha = mSigma / (Real)dp; stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); gridScaledAdd(mDst, mSearch, alpha); // dst += search * alpha gridScaledAdd(mResidual, mTmp, -alpha); // residual += tmp * -alpha stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "gridScaledAdd Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); if (mPcMethod == PC_ICP) ApplyPreconditionIncompCholesky( mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); else if (mPcMethod == PC_mICP) ApplyPreconditionModifiedIncompCholesky2( mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); else if (mPcMethod == PC_MGP) ApplyPreconditionMultigrid(mMG, mTmp, mResidual); else mTmp.copyFrom(mResidual, true, 1); stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "copyFrom Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); // use the l2 norm of the residual for convergence check? (usually max norm is recommended // instead) if (this->mUseL2Norm) { // std::cout << "USING L2" << std::endl; mResNorm = GridSumSqr(mResidual).sum; } else { // std::cout << "NOT USING L2" << std::endl; mResNorm = mResidual.getMaxAbs(); } // abort here to safe some work... if (mResNorm < mAccuracy) { mSigma = mResNorm; // this will be returned later on to the caller... return false; } stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "GridSumSqr Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); Real sigmaNew = GridDotProduct(mTmp, mResidual); Real beta = sigmaNew / mSigma; stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl; start = high_resolution_clock::now(); // search = tmp + beta * search UpdateSearchVec(mSearch, mTmp, beta); stop = high_resolution_clock::now(); duration = duration_cast(stop - start); time += duration.count(); // std::cout << "UpdateSearchVec Time taken: " << duration.count() << std::endl; // debMsg("GridCg::iterate i="< 1e30, stopping."); } // debMsg("PB-CG-Norms::p"< void GridCg::solve(int maxIter) { Real time = 0; for (int iter = 0; iter < maxIter; iter++) { if (!iterate(time)) iter = maxIter; } return; } static bool gPrint2dWarning = true; template void GridCg::setICPreconditioner( PreconditionType method, Grid *A0, Grid *Ai, Grid *Aj, Grid *Ak) { // assertMsg(method==PC_ICP || method==PC_mICP, "GridCg::setICPreconditioner: Invalid // method specified."); mPcMethod = method; if ((!A0->is3D())) { if (gPrint2dWarning) { // debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1); gPrint2dWarning = false; } mPcMethod = PC_None; } mpPCA0 = A0; mpPCAi = Ai; mpPCAj = Aj; mpPCAk = Ak; } template void GridCg::setMGPreconditioner(PreconditionType method, GridMg *MG) { // assertMsg(method==PC_MGP, "GridCg::setMGPreconditioner: Invalid method specified."); mPcMethod = method; mMG = MG; } // explicit instantiation template class GridCg; template class GridCg; //***************************************************************************** // diffusion for real and vec grids, e.g. for viscosity //! do a CG solve for diffusion; note: diffusion coefficient alpha given in grid space, // rescale in python file for discretization independence (or physical correspondence) // see lidDrivenCavity.py for an example void cgSolveDiffusion(const FlagGrid &flags, GridBase &grid, Real alpha = 0.25, Real cgMaxIterFac = 1.0, Real cgAccuracy = 1e-4) { // reserve temp grids FluidSolver *parent = flags.getParent(); Grid rhs(parent); Grid residual(parent), search(parent), tmp(parent); Grid A0(parent), Ai(parent), Aj(parent), Ak(parent); // setup matrix and boundaries FlagGrid flagsDummy(parent); flagsDummy.setConst(FlagGrid::TypeFluid); MakeLaplaceMatrix(flagsDummy, A0, Ai, Aj, Ak); FOR_IJK(flags) { if (flags.isObstacle(i, j, k)) { Ai(i, j, k) = Aj(i, j, k) = Ak(i, j, k) = 0.0; A0(i, j, k) = 1.0; } else { Ai(i, j, k) *= alpha; Aj(i, j, k) *= alpha; Ak(i, j, k) *= alpha; A0(i, j, k) *= alpha; A0(i, j, k) += 1.; } } GridCgInterface *gcg = nullptr; // note , no preconditioning for now... const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); if (grid.getType() & GridBase::TypeReal) { Grid &u = ((Grid &)grid); rhs.copyFrom(u); if (flags.is3D()) gcg = new GridCg(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); else gcg = new GridCg(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); gcg->solve(maxIter); // debMsg("FluidSolver::solveDiffusion iterations:"<getIterations()<<", // res:"<getSigma(), CG_DEBUGLEVEL); } else if ((grid.getType() & GridBase::TypeVec3) || (grid.getType() & GridBase::TypeMAC)) { Grid &vec = ((Grid &)grid); Grid u(parent); // core solve is same as for a regular real grid if (flags.is3D()) gcg = new GridCg(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); else gcg = new GridCg(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); // diffuse every component separately for (int component = 0; component < (grid.is3D() ? 3 : 2); ++component) { getComponent(vec, u, component); gcg->forceReinit(); rhs.copyFrom(u); gcg->solve(maxIter); // debMsg("FluidSolver::solveDiffusion vec3, iterations:"<getIterations()<<", // res:"<getSigma(), CG_DEBUGLEVEL); setComponent(u, vec, component); } } else { // errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)"); } delete gcg; } static PyObject *_W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); FluidSolver *parent = _args.obtainParent(); bool noTiming = _args.getOpt("notiming", -1, 0); pbPreparePlugin(parent, "cgSolveDiffusion", !noTiming); PyObject *_retval = nullptr; { ArgLocker _lock; const FlagGrid &flags = *_args.getPtr("flags", 0, &_lock); GridBase &grid = *_args.getPtr("grid", 1, &_lock); Real alpha = _args.getOpt("alpha", 2, 0.25, &_lock); Real cgMaxIterFac = _args.getOpt("cgMaxIterFac", 3, 1.0, &_lock); Real cgAccuracy = _args.getOpt("cgAccuracy", 4, 1e-4, &_lock); _retval = getPyNone(); cgSolveDiffusion(flags, grid, alpha, cgMaxIterFac, cgAccuracy); _args.check(); } pbFinalizePlugin(parent, "cgSolveDiffusion", !noTiming); return _retval; } catch (std::exception &e) { pbSetError("cgSolveDiffusion", e.what()); return 0; } } static const Pb::Register _RP_cgSolveDiffusion("", "cgSolveDiffusion", _W_0); extern "C" { void PbRegister_cgSolveDiffusion() { KEEP_UNUSED(_RP_cgSolveDiffusion); } } }; // namespace Manta