diff options
author | Sebastián Barschkis <sebbas@sebbas.org> | 2021-09-13 16:03:52 +0300 |
---|---|---|
committer | Sebastián Barschkis <sebbas@sebbas.org> | 2021-09-13 16:03:52 +0300 |
commit | 063ce7f550f1612ab0e34c4ecb4b57f8401b84b4 (patch) | |
tree | 53584b6c514510b0bab33a480b3ec85274b48a6b | |
parent | 4b06420e65040c642d2b0a7a1c9bf7515d3cec0c (diff) |
Fluid: Initial changes for OpenMP GPU supportfluid-mantaflow-gpu
Contains basic support for OpenMP GPU offloading.
That is, offloading of fluid KERNEL loops to the GPU.
This branch offloads pressure and advection calls only - the 2 most
expensive operation per step. In theory though, any function can be
offloaded.
For now, this branch needs to be build with a compiler that supports
Nvidia GPU offloading. Exact GPU models need to be specified via CMake.
65 files changed, 6167 insertions, 8354 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 47712f0ac1e..a257068801a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -747,7 +747,7 @@ set_and_warn_dependency(WITH_TBB WITH_CYCLES OFF) set_and_warn_dependency(WITH_TBB WITH_USD OFF) set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF) set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF) -set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF) +#set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF) # NanoVDB requires OpenVDB to convert the data structure set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF) @@ -1239,15 +1239,29 @@ endif() if(WITH_OPENMP) if(NOT OPENMP_CUSTOM) find_package(OpenMP) + + list(APPEND CMAKE_MODULE_PATH "${LLVM_LIBPATH}/cmake/openmp") + find_package(OpenMPTarget REQUIRED NVPTX) endif() if(OPENMP_FOUND) if(NOT WITH_OPENMP_STATIC) + message(STATUS "============ No Static OpenMP") + message(${OpenMPTarget_NVPTX_FLAGS}) + string(APPEND CMAKE_C_FLAGS " ${OpenMP_C_FLAGS}") string(APPEND CMAKE_CXX_FLAGS " ${OpenMP_CXX_FLAGS}") string(APPEND CMAKE_EXE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}") string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}") + + string(APPEND CMAKE_C_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda") + string(APPEND CMAKE_CXX_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda") + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda") + string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda") + + set(PLATFORM_LINKLIBS "${PLATFORM_LINKLIBS};-fopenmp=libomp;-fopenmp-targets=nvptx64-nvidia-cuda") else() + message(STATUS "============= Static OpenMP") # Typically avoid adding flags as defines but we can't # pass OpenMP flags to the linker for static builds, meaning # we can't add any OpenMP related flags to CFLAGS variables diff --git a/build_files/build_environment/cmake/llvm.cmake b/build_files/build_environment/cmake/llvm.cmake index 7a8ce2ddfec..7f54e83c92e 100644 --- a/build_files/build_environment/cmake/llvm.cmake +++ b/build_files/build_environment/cmake/llvm.cmake @@ -30,6 +30,10 @@ if(APPLE) set(BUILD_CLANG_TOOLS ON) endif() +if(UNIX AND NOT APPLE) + set(LLVM_BUILD_OPENMP ^^openmp) + set(LLVM_TARGETS ${LLVM_TARGETS} ^^NVPTX) +endif() set(LLVM_EXTRA_ARGS -DLLVM_USE_CRT_RELEASE=MD @@ -40,10 +44,18 @@ set(LLVM_EXTRA_ARGS -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_BUILD_LLVM_C_DYLIB=OFF -DLLVM_ENABLE_UNWIND_TABLES=OFF - -DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA} + -DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA}${LLVM_BUILD_OPENMP} ${LLVM_XML2_ARGS} ) +if(UNIX AND NOT APPLE) + list(APPEND LLVM_EXTRA_ARGS + -DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_61 + -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=61 + ) +endif() + + if(WIN32) set(LLVM_GENERATOR "Ninja") else() diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake index fc0c37e4c8b..065c0b48416 100644 --- a/build_files/cmake/platform/platform_unix.cmake +++ b/build_files/cmake/platform/platform_unix.cmake @@ -63,7 +63,8 @@ if(EXISTS ${LIBDIR}) # OpenMP usually can't be statically linked into shared libraries, # due to not being compiled with position independent code. if(NOT WITH_PYTHON_MODULE) - set(WITH_OPENMP_STATIC ON) + #message(STATUS "============= ENABLING Static OpenMP") + #set(WITH_OPENMP_STATIC ON) endif() set(Boost_NO_BOOST_CMAKE ON) set(BOOST_ROOT ${LIBDIR}/boost) diff --git a/extern/curve_fit_nd/intern/curve_fit_corners_detect.c b/extern/curve_fit_nd/intern/curve_fit_corners_detect.c index 415ef40fdd5..b600a41f9cd 100644 --- a/extern/curve_fit_nd/intern/curve_fit_corners_detect.c +++ b/extern/curve_fit_nd/intern/curve_fit_corners_detect.c @@ -81,7 +81,7 @@ static double cos_vnvnvn( normalize_vn_vnvn(dvec1, v1, v2, dims); double d = dot_vnvn(dvec0, dvec1, dims); /* sanity check */ - d = max(-1.0, min(1.0, d)); + d = maxV(-1.0, minV(1.0, d)); return d; } diff --git a/extern/curve_fit_nd/intern/curve_fit_cubic.c b/extern/curve_fit_nd/intern/curve_fit_cubic.c index 47c5344c821..65042d7dbef 100644 --- a/extern/curve_fit_nd/intern/curve_fit_cubic.c +++ b/extern/curve_fit_nd/intern/curve_fit_cubic.c @@ -29,9 +29,9 @@ * \ingroup curve_fit */ -#ifdef _MSC_VER +//#ifdef _MSC_VER # define _USE_MATH_DEFINES -#endif +//#endif #include <math.h> #include <float.h> @@ -456,7 +456,7 @@ static double points_calc_circumference_factor( const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims); if (len_tangent > DBL_EPSILON) { /* only clamp to avoid precision error */ - double angle = acos(max(-fabs(dot), -1.0)); + double angle = acos(maxV(-fabs(dot), -1.0)); /* Angle may be less than the length when the tangents define >180 degrees of the circle, * (tangents that point away from each other). * We could try support this but will likely cause extreme >1 scales which could cause other issues. */ @@ -607,7 +607,7 @@ static void cubic_from_points_offset_fallback( for (uint k = 0; k < 2; k++) { sub_vn_vnvn(tmp, p0, pt, dims); project_vn_vnvn_normalized(tmp, tmp, a[k], dims); - dists[k] = max(dists[k], dot_vnvn(tmp, a[k], dims)); + dists[k] = maxV(dists[k], dot_vnvn(tmp, a[k], dims)); } } @@ -796,7 +796,7 @@ static void cubic_from_points( dist_sq_test += sq((pt[j] - center[j]) * clamp_scale); } #endif - dist_sq_max = max(dist_sq_max, dist_sq_test); + dist_sq_max = maxV(dist_sq_max, dist_sq_test); } } diff --git a/extern/curve_fit_nd/intern/curve_fit_inline.h b/extern/curve_fit_nd/intern/curve_fit_inline.h index f9eaa4c647c..6b47d3c12b0 100644 --- a/extern/curve_fit_nd/intern/curve_fit_inline.h +++ b/extern/curve_fit_nd/intern/curve_fit_inline.h @@ -45,12 +45,12 @@ MINLINE double sq(const double d) } #ifndef _MSC_VER -MINLINE double min(const double a, const double b) +MINLINE double minV(const double a, const double b) { return b < a ? b : a; } -MINLINE double max(const double a, const double b) +MINLINE double maxV(const double a, const double b) { return a < b ? b : a; } diff --git a/extern/draco/draco/src/draco/animation/keyframe_animation.cc b/extern/draco/draco/src/draco/animation/keyframe_animation.cc index eaf94a3305d..e602140ea0f 100644 --- a/extern/draco/draco/src/draco/animation/keyframe_animation.cc +++ b/extern/draco/draco/src/draco/animation/keyframe_animation.cc @@ -13,6 +13,7 @@ // limitations under the License. // #include "draco/animation/keyframe_animation.h" +#include <iostream> namespace draco { diff --git a/extern/mantaflow/CMakeLists.txt b/extern/mantaflow/CMakeLists.txt index 9b047eb1a3e..cdb3afcff4a 100644 --- a/extern/mantaflow/CMakeLists.txt +++ b/extern/mantaflow/CMakeLists.txt @@ -55,26 +55,30 @@ if(NOT WITH_MANTA_DEPENDENCIES) add_definitions(-DNO_CNPY=1) endif() -set(MANTA_HLP - helper -) -set(MANTA_PP - preprocessed -) if(WITH_MANTA_DEPENDENCIES) set(MANTA_DEP dependencies ) endif() -if(WITH_TBB) - add_definitions(-DTBB=1) +if(WITH_OPENMP) + add_definitions(-DOPENMP=1) + + if(WITH_OPENMP_STATIC) + list(APPEND LIB + ${OpenMP_LIBRARIES} + ) + endif() +endif() + +if(WITH_OPENMP) + add_definitions(-DOPENMP_OFFLOAD=1) endif() if(WITH_OPENVDB) add_definitions(-DOPENVDB=1) # OpenVDB headers use deprecated TBB headers, silence warning. - add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1) + #add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1) endif() if(WITH_OPENVDB_BLOSC) @@ -90,12 +94,12 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY) endif() set(INC - ${MANTA_PP} - ${MANTA_PP}/fileio - ${MANTA_PP}/python - ${MANTA_PP}/plugin - ${MANTA_HLP}/pwrapper - ${MANTA_HLP}/util + preprocessed + preprocessed/fileio + preprocessed/python + preprocessed/plugin + helper/pwrapper + helper/util ) if(WITH_MANTA_DEPENDENCIES) @@ -115,15 +119,6 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY) ) endif() -if(WITH_TBB) - list(APPEND INC_SYS - ${TBB_INCLUDE_DIRS} - ) - list(APPEND LIB - ${TBB_LIBRARIES} - ) -endif() - if(WITH_OPENVDB) list(APPEND INC_SYS ${OPENVDB_INCLUDE_DIRS} @@ -142,120 +137,120 @@ if(WITH_OPENVDB) endif() set(SRC - ${MANTA_PP}/commonkernels.h - ${MANTA_PP}/commonkernels.h.reg.cpp - ${MANTA_PP}/conjugategrad.cpp - ${MANTA_PP}/conjugategrad.h - ${MANTA_PP}/conjugategrad.h.reg.cpp - ${MANTA_PP}/edgecollapse.cpp - ${MANTA_PP}/edgecollapse.h - ${MANTA_PP}/edgecollapse.h.reg.cpp - ${MANTA_PP}/fastmarch.cpp - ${MANTA_PP}/fastmarch.h - ${MANTA_PP}/fastmarch.h.reg.cpp - ${MANTA_PP}/fileio/iogrids.cpp - ${MANTA_PP}/fileio/iomeshes.cpp - ${MANTA_PP}/fileio/ioparticles.cpp - ${MANTA_PP}/fileio/ioutil.cpp - ${MANTA_PP}/fileio/iovdb.cpp - ${MANTA_PP}/fileio/mantaio.cpp - ${MANTA_PP}/fileio/mantaio.h - ${MANTA_PP}/fileio/mantaio.h.reg.cpp - ${MANTA_PP}/fluidsolver.cpp - ${MANTA_PP}/fluidsolver.h - ${MANTA_PP}/fluidsolver.h.reg.cpp - ${MANTA_PP}/general.cpp - ${MANTA_PP}/general.h - ${MANTA_PP}/general.h.reg.cpp - ${MANTA_PP}/gitinfo.h - ${MANTA_PP}/grid.cpp - ${MANTA_PP}/grid.h - ${MANTA_PP}/grid.h.reg.cpp - ${MANTA_PP}/grid4d.cpp - ${MANTA_PP}/grid4d.h - ${MANTA_PP}/grid4d.h.reg.cpp - ${MANTA_PP}/kernel.cpp - ${MANTA_PP}/kernel.h - ${MANTA_PP}/kernel.h.reg.cpp - ${MANTA_PP}/levelset.cpp - ${MANTA_PP}/levelset.h - ${MANTA_PP}/levelset.h.reg.cpp - ${MANTA_PP}/mesh.cpp - ${MANTA_PP}/mesh.h - ${MANTA_PP}/mesh.h.reg.cpp - ${MANTA_PP}/movingobs.cpp - ${MANTA_PP}/movingobs.h - ${MANTA_PP}/movingobs.h.reg.cpp - ${MANTA_PP}/multigrid.cpp - ${MANTA_PP}/multigrid.h - ${MANTA_PP}/multigrid.h.reg.cpp - ${MANTA_PP}/noisefield.cpp - ${MANTA_PP}/noisefield.h - ${MANTA_PP}/noisefield.h.reg.cpp - ${MANTA_PP}/particle.cpp - ${MANTA_PP}/particle.h - ${MANTA_PP}/particle.h.reg.cpp - ${MANTA_PP}/plugin/advection.cpp - ${MANTA_PP}/plugin/apic.cpp - ${MANTA_PP}/plugin/extforces.cpp - ${MANTA_PP}/plugin/fire.cpp - ${MANTA_PP}/plugin/flip.cpp - ${MANTA_PP}/plugin/fluidguiding.cpp - ${MANTA_PP}/plugin/initplugins.cpp - ${MANTA_PP}/plugin/kepsilon.cpp - ${MANTA_PP}/plugin/meshplugins.cpp - ${MANTA_PP}/plugin/pressure.cpp - ${MANTA_PP}/plugin/ptsplugins.cpp - ${MANTA_PP}/plugin/secondaryparticles.cpp - ${MANTA_PP}/plugin/surfaceturbulence.cpp - ${MANTA_PP}/plugin/viscosity.cpp - ${MANTA_PP}/plugin/vortexplugins.cpp - ${MANTA_PP}/plugin/waveletturbulence.cpp - ${MANTA_PP}/plugin/waves.cpp - ${MANTA_PP}/python/defines.py - ${MANTA_PP}/python/defines.py.reg.cpp - ${MANTA_PP}/registration.cpp - ${MANTA_PP}/shapes.cpp - ${MANTA_PP}/shapes.h - ${MANTA_PP}/shapes.h.reg.cpp - ${MANTA_PP}/test.cpp - ${MANTA_PP}/timing.cpp - ${MANTA_PP}/timing.h - ${MANTA_PP}/timing.h.reg.cpp - ${MANTA_PP}/turbulencepart.cpp - ${MANTA_PP}/turbulencepart.h - ${MANTA_PP}/turbulencepart.h.reg.cpp - ${MANTA_PP}/vortexpart.cpp - ${MANTA_PP}/vortexpart.h - ${MANTA_PP}/vortexpart.h.reg.cpp - ${MANTA_PP}/vortexsheet.cpp - ${MANTA_PP}/vortexsheet.h - ${MANTA_PP}/vortexsheet.h.reg.cpp + preprocessed/commonkernels.h + preprocessed/commonkernels.h.reg.cpp + preprocessed/conjugategrad.cpp + preprocessed/conjugategrad.h + preprocessed/conjugategrad.h.reg.cpp + preprocessed/edgecollapse.cpp + preprocessed/edgecollapse.h + preprocessed/edgecollapse.h.reg.cpp + preprocessed/fastmarch.cpp + preprocessed/fastmarch.h + preprocessed/fastmarch.h.reg.cpp + preprocessed/fileio/iogrids.cpp + preprocessed/fileio/iomeshes.cpp + preprocessed/fileio/ioparticles.cpp + preprocessed/fileio/ioutil.cpp + preprocessed/fileio/iovdb.cpp + preprocessed/fileio/mantaio.cpp + preprocessed/fileio/mantaio.h + preprocessed/fileio/mantaio.h.reg.cpp + preprocessed/fluidsolver.cpp + preprocessed/fluidsolver.h + preprocessed/fluidsolver.h.reg.cpp + preprocessed/general.cpp + preprocessed/general.h + preprocessed/general.h.reg.cpp + preprocessed/gitinfo.h + preprocessed/grid.cpp + preprocessed/grid.h + preprocessed/grid.h.reg.cpp + preprocessed/grid4d.cpp + preprocessed/grid4d.h + preprocessed/grid4d.h.reg.cpp + preprocessed/kernel.cpp + preprocessed/kernel.h + preprocessed/kernel.h.reg.cpp + preprocessed/levelset.cpp + preprocessed/levelset.h + preprocessed/levelset.h.reg.cpp + preprocessed/mesh.cpp + preprocessed/mesh.h + preprocessed/mesh.h.reg.cpp + preprocessed/movingobs.cpp + preprocessed/movingobs.h + preprocessed/movingobs.h.reg.cpp + preprocessed/multigrid.cpp + preprocessed/multigrid.h + preprocessed/multigrid.h.reg.cpp + preprocessed/noisefield.cpp + preprocessed/noisefield.h + preprocessed/noisefield.h.reg.cpp + preprocessed/particle.cpp + preprocessed/particle.h + preprocessed/particle.h.reg.cpp + preprocessed/plugin/advection.cpp + preprocessed/plugin/apic.cpp + preprocessed/plugin/extforces.cpp + preprocessed/plugin/fire.cpp + preprocessed/plugin/flip.cpp + preprocessed/plugin/fluidguiding.cpp + preprocessed/plugin/initplugins.cpp + preprocessed/plugin/kepsilon.cpp + preprocessed/plugin/meshplugins.cpp + preprocessed/plugin/pressure.cpp + preprocessed/plugin/ptsplugins.cpp + preprocessed/plugin/secondaryparticles.cpp + preprocessed/plugin/surfaceturbulence.cpp +# preprocessed/plugin/viscosity.cpp + preprocessed/plugin/vortexplugins.cpp + preprocessed/plugin/waveletturbulence.cpp + preprocessed/plugin/waves.cpp + preprocessed/python/defines.py + preprocessed/python/defines.py.reg.cpp + preprocessed/registration.cpp + preprocessed/shapes.cpp + preprocessed/shapes.h + preprocessed/shapes.h.reg.cpp + preprocessed/test.cpp + preprocessed/timing.cpp + preprocessed/timing.h + preprocessed/timing.h.reg.cpp + preprocessed/turbulencepart.cpp + preprocessed/turbulencepart.h + preprocessed/turbulencepart.h.reg.cpp + preprocessed/vortexpart.cpp + preprocessed/vortexpart.h + preprocessed/vortexpart.h.reg.cpp + preprocessed/vortexsheet.cpp + preprocessed/vortexsheet.h + preprocessed/vortexsheet.h.reg.cpp - ${MANTA_HLP}/pwrapper/manta.h - ${MANTA_HLP}/pwrapper/pclass.cpp - ${MANTA_HLP}/pwrapper/pclass.h - ${MANTA_HLP}/pwrapper/pconvert.cpp - ${MANTA_HLP}/pwrapper/pconvert.h - ${MANTA_HLP}/pwrapper/pvec3.cpp - ${MANTA_HLP}/pwrapper/pythonInclude.h - ${MANTA_HLP}/pwrapper/registry.cpp - ${MANTA_HLP}/pwrapper/registry.h - ${MANTA_HLP}/util/integrator.h - ${MANTA_HLP}/util/interpol.h - ${MANTA_HLP}/util/interpolHigh.h - ${MANTA_HLP}/util/matrixbase.h - ${MANTA_HLP}/util/mcubes.h - ${MANTA_HLP}/util/quaternion.h - ${MANTA_HLP}/util/randomstream.h - ${MANTA_HLP}/util/rcmatrix.h - ${MANTA_HLP}/util/simpleimage.cpp - ${MANTA_HLP}/util/simpleimage.h - ${MANTA_HLP}/util/solvana.h - ${MANTA_HLP}/util/vector4d.cpp - ${MANTA_HLP}/util/vector4d.h - ${MANTA_HLP}/util/vectorbase.cpp - ${MANTA_HLP}/util/vectorbase.h + helper/pwrapper/manta.h + helper/pwrapper/pclass.cpp + helper/pwrapper/pclass.h + helper/pwrapper/pconvert.cpp + helper/pwrapper/pconvert.h + helper/pwrapper/pvec3.cpp + helper/pwrapper/pythonInclude.h + helper/pwrapper/registry.cpp + helper/pwrapper/registry.h + helper/util/integrator.h + helper/util/interpol.h + helper/util/interpolHigh.h + helper/util/matrixbase.h + helper/util/mcubes.h + helper/util/quaternion.h + helper/util/randomstream.h + helper/util/rcmatrix.h + helper/util/simpleimage.cpp + helper/util/simpleimage.h + helper/util/solvana.h + helper/util/vector4d.cpp + helper/util/vector4d.h + helper/util/vectorbase.cpp + helper/util/vectorbase.h ) if(WITH_MANTA_DEPENDENCIES) @@ -266,16 +261,34 @@ if(WITH_MANTA_DEPENDENCIES) endif() if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY) list(APPEND SRC - ${MANTA_PP}/plugin/numpyconvert.cpp - ${MANTA_PP}/plugin/tfplugins.cpp - ${MANTA_HLP}/pwrapper/numpyWrap.cpp - ${MANTA_HLP}/pwrapper/numpyWrap.h + preprocessed/plugin/numpyconvert.cpp + preprocessed/plugin/tfplugins.cpp + helper/pwrapper/numpyWrap.cpp + helper/pwrapper/numpyWrap.h ) endif() set(LIB ${PYTHON_LINKFLAGS} ${PYTHON_LIBRARIES} + ${OPENVDB_LIBRARIES} ) -blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") +#blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") + +add_library(extern_mantaflow SHARED "${SRC}") + +include_directories(preprocessed) +include_directories(preprocessed/fileio) +include_directories(preprocessed/python) +include_directories(preprocessed/plugin) +include_directories(helper/pwrapper) +include_directories(helper/util) +include_directories(${PYTHON_INCLUDE_DIRS}) +include_directories(${ZLIB_INCLUDE_DIRS}) +include_directories(${OPENVDB_INCLUDE_DIRS}) + +#target_include_directories(extern_mantaflow PRIVATE "${INC}") +target_link_libraries(extern_mantaflow PRIVATE "${LIB}") + +blender_source_group(extern_mantaflow "${SRC}") diff --git a/extern/mantaflow/UPDATE.sh b/extern/mantaflow/UPDATE.sh index 1158ff13455..83895d8c84d 100644 --- a/extern/mantaflow/UPDATE.sh +++ b/extern/mantaflow/UPDATE.sh @@ -7,8 +7,11 @@ # ==================== 1) ENVIRONMENT SETUP ============================================= # YOUR INSTALLATION PATHS GO HERE: -MANTA_INSTALLATION=/Users/sebbas/Developer/Mantaflow/mantaflowDevelop -BLENDER_INSTALLATION=/Users/sebbas/Developer/Blender +MANTA_INSTALLATION=/home/sebbas/Developer/Mantaflow +BLENDER_INSTALLATION=/home/sebbas/Developer/Blender + +CC=/home/sebbas/Developer/LLVM-Project/install/bin/clang +CXX=/home/sebbas/Developer/LLVM-Project/install/bin/clang++ # Try to check out Mantaflow repository before building? CLEAN_REPOSITORY=0 @@ -20,8 +23,13 @@ WITH_DEPENDENCIES=0 USE_NUMPY=0 # Choose which multithreading platform to use for Mantaflow preprocessing -USE_OMP=0 -USE_TBB=1 +USE_OMP=1 +USE_TBB=0 + +# Use OpenMP offloading too? +if [[ "$USE_OMP" -eq "1" ]]; then + USE_OMP_OFFLOAD=1 +fi if [[ "$USE_OMP" -eq "1" && "$USE_TBB" -eq "1" ]]; then echo "Cannot build Mantaflow for OpenMP and TBB at the same time" @@ -56,7 +64,7 @@ fi MANTA_BUILD_PATH=$MANTA_INSTALLATION/build_blender/ mkdir -p $MANTA_BUILD_PATH cd $MANTA_BUILD_PATH -cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY && make -j8 +cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DOPENMP_OFFLOAD=$USE_OMP_OFFLOAD -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY -DPYTHON_VERSION=3 -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX && make -j8 # ==================== 3) COPY MANTAFLOW FILES TO BLENDER ROOT =========================== diff --git a/extern/mantaflow/helper/util/rcmatrix.h b/extern/mantaflow/helper/util/rcmatrix.h index 330fd1f64f7..f1f0efe6416 100644 --- a/extern/mantaflow/helper/util/rcmatrix.h +++ b/extern/mantaflow/helper/util/rcmatrix.h @@ -1035,7 +1035,7 @@ template<class N, class T> struct RCFixedMatrix { typedef RCMatrix<int, Real> Matrix; typedef RCFixedMatrix<int, Real> FixedMatrix; -} +} // namespace Manta #undef parallel_for #undef parallel_end diff --git a/extern/mantaflow/preprocessed/commonkernels.h b/extern/mantaflow/preprocessed/commonkernels.h index 7fa6f185146..7ac13fbdfc4 100644 --- a/extern/mantaflow/preprocessed/commonkernels.h +++ b/extern/mantaflow/preprocessed/commonkernels.h @@ -34,7 +34,7 @@ struct InvertCheckFluid : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const FlagGrid &flags, Grid<Real> &grid) const + inline void op(IndexInt idx, const FlagGrid &flags, Grid<Real> &grid) { if (flags.isFluid(idx) && grid[idx] > 0) grid[idx] = 1.0 / grid[idx]; @@ -49,21 +49,17 @@ struct InvertCheckFluid : public KernelBase { return grid; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel InvertCheckFluid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, grid); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, grid); + } } const FlagGrid &flags; Grid<Real> &grid; @@ -77,9 +73,9 @@ struct GridSumSqr : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid<Real> &grid, double &sum) + inline void op(int i, int j, int k, const Grid<Real> &grid, double &sum) { - sum += square((double)grid[idx]); + sum += square((double)grid(i, j, k)); } inline operator double() { @@ -94,28 +90,37 @@ struct GridSumSqr : public KernelBase { return grid; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel GridSumSqr ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, sum); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - GridSumSqr(GridSumSqr &o, tbb::split) : KernelBase(o), grid(o.grid), sum(0) - { - } - void join(const GridSumSqr &o) - { - sum += o.sum; + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + const Grid<Real> &grid = getArg0(); +#pragma omp target teams distribute parallel for reduction(+ : sum) collapse(2) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, sum); + } + { + this->sum = sum; + } + } + else { + const int k = 0; + const Grid<Real> &grid = getArg0(); +#pragma omp target teams distribute parallel for reduction(+ : sum) collapse(1) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, sum); + } + { + this->sum = sum; + } + } } const Grid<Real> &grid; double sum; @@ -129,7 +134,7 @@ struct CurlOp : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Vec3> &dst) const + inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Vec3> &dst) { Vec3 v = Vec3(0., 0., @@ -153,37 +158,35 @@ struct CurlOp : public KernelBase { return dst; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel CurlOp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, grid, dst); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, grid, dst); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, grid, dst); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, grid, dst); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<Vec3> &grid; Grid<Vec3> &dst; }; @@ -197,7 +200,7 @@ struct DivergenceOpMAC : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Real> &div, const MACGrid &grid) const + inline void op(int i, int j, int k, Grid<Real> &div, const MACGrid &grid) { Vec3 del = Vec3(grid(i + 1, j, k).x, grid(i, j + 1, k).y, 0.) - grid(i, j, k); if (grid.is3D()) @@ -216,37 +219,35 @@ struct DivergenceOpMAC : public KernelBase { return grid; } typedef MACGrid type1; - void runMessage() - { - debMsg("Executing kernel DivergenceOpMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, div, grid); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, div, grid); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, div, grid); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, div, grid); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> ÷ const MACGrid &grid; }; @@ -259,7 +260,7 @@ struct GradientOpMAC : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &gradient, const Grid<Real> &grid) const + inline void op(int i, int j, int k, MACGrid &gradient, const Grid<Real> &grid) { Vec3 grad = (Vec3(grid(i, j, k)) - Vec3(grid(i - 1, j, k), grid(i, j - 1, k), 0.)); if (grid.is3D()) @@ -278,37 +279,35 @@ struct GradientOpMAC : public KernelBase { return grid; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel GradientOpMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, gradient, grid); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, gradient, grid); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, gradient, grid); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, gradient, grid); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &gradient; const Grid<Real> &grid; }; @@ -321,7 +320,7 @@ struct GradientOp : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &gradient, const Grid<Real> &grid) const + inline void op(int i, int j, int k, Grid<Vec3> &gradient, const Grid<Real> &grid) { Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k), grid(i, j + 1, k) - grid(i, j - 1, k), @@ -340,37 +339,35 @@ struct GradientOp : public KernelBase { return grid; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel GradientOp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, gradient, grid); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, gradient, grid); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, gradient, grid); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, gradient, grid); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Vec3> &gradient; const Grid<Real> &grid; }; @@ -383,7 +380,7 @@ struct LaplaceOp : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Real> &laplace, const Grid<Real> &grid) const + inline void op(int i, int j, int k, Grid<Real> &laplace, const Grid<Real> &grid) { laplace(i, j, k) = grid(i + 1, j, k) - 2.0 * grid(i, j, k) + grid(i - 1, j, k); laplace(i, j, k) += grid(i, j + 1, k) - 2.0 * grid(i, j, k) + grid(i, j - 1, k); @@ -401,37 +398,35 @@ struct LaplaceOp : public KernelBase { return grid; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel LaplaceOp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, laplace, grid); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, laplace, grid); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, laplace, grid); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, laplace, grid); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> &laplace; const Grid<Real> &grid; }; @@ -444,7 +439,7 @@ struct CurvatureOp : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Real> &curv, const Grid<Real> &grid, const Real h) const + inline void op(int i, int j, int k, Grid<Real> &curv, const Grid<Real> &grid, const Real h) { const Real over_h = 1.0 / h; const Real x = 0.5 * (grid(i + 1, j, k) - grid(i - 1, j, k)) * over_h; @@ -492,37 +487,35 @@ struct CurvatureOp : public KernelBase { return h; } typedef Real type2; - void runMessage() - { - debMsg("Executing kernel CurvatureOp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, curv, grid, h); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, curv, grid, h); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, curv, grid, h); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, curv, grid, h); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> &curv; const Grid<Real> &grid; const Real h; @@ -536,7 +529,7 @@ struct GetShiftedComponent : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) const + inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) { Vec3i ishift(i, j, k); ishift[dim]--; @@ -557,37 +550,35 @@ struct GetShiftedComponent : public KernelBase { return dim; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel GetShiftedComponent ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, grid, comp, dim); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, grid, comp, dim); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, grid, comp, dim); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, grid, comp, dim); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<Vec3> &grid; Grid<Real> ∁ int dim; @@ -602,7 +593,7 @@ struct GetComponent : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) const + inline void op(IndexInt idx, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) { comp[idx] = grid[idx][dim]; } @@ -621,21 +612,17 @@ struct GetComponent : public KernelBase { return dim; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel GetComponent ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, comp, dim); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, grid, comp, dim); + } } const Grid<Vec3> &grid; Grid<Real> ∁ @@ -650,7 +637,7 @@ struct GridNorm : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Real> &n, const Grid<Vec3> &grid) const + inline void op(IndexInt idx, Grid<Real> &n, const Grid<Vec3> &grid) { n[idx] = norm(grid[idx]); } @@ -664,21 +651,17 @@ struct GridNorm : public KernelBase { return grid; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel GridNorm ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, n, grid); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, n, grid); + } } Grid<Real> &n; const Grid<Vec3> &grid; @@ -693,7 +676,7 @@ struct SetComponent : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Vec3> &grid, const Grid<Real> &comp, int dim) const + inline void op(IndexInt idx, Grid<Vec3> &grid, const Grid<Real> &comp, int dim) { grid[idx][dim] = comp[idx]; } @@ -712,21 +695,17 @@ struct SetComponent : public KernelBase { return dim; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel SetComponent ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, comp, dim); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, grid, comp, dim); + } } Grid<Vec3> &grid; const Grid<Real> ∁ @@ -742,7 +721,7 @@ struct GetCentered : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> ¢er, const MACGrid &vel) const + inline void op(int i, int j, int k, Grid<Vec3> ¢er, const MACGrid &vel) { Vec3 v = 0.5 * (vel(i, j, k) + Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.)); if (vel.is3D()) @@ -761,37 +740,35 @@ struct GetCentered : public KernelBase { return vel; } typedef MACGrid type1; - void runMessage() - { - debMsg("Executing kernel GetCentered ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, center, vel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, center, vel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, center, vel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, center, vel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Vec3> ¢er; const MACGrid &vel; }; @@ -804,7 +781,7 @@ struct GetMAC : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &vel, const Grid<Vec3> ¢er) const + inline void op(int i, int j, int k, MACGrid &vel, const Grid<Vec3> ¢er) { Vec3 v = 0.5 * (center(i, j, k) + Vec3(center(i - 1, j, k).x, center(i, j - 1, k).y, 0.)); if (vel.is3D()) @@ -823,37 +800,35 @@ struct GetMAC : public KernelBase { return center; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel GetMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, center); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, center); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, center); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, center); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &vel; const Grid<Vec3> ¢er; }; @@ -866,7 +841,7 @@ struct FillInBoundary : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &grid, int g) const + inline void op(int i, int j, int k, Grid<Vec3> &grid, int g) { if (i == 0) grid(i, j, k) = grid(i + 1, j, k); @@ -891,37 +866,35 @@ struct FillInBoundary : public KernelBase { return g; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel FillInBoundary ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, g); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, g); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, g); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, g); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Vec3> &grid; int g; }; @@ -939,7 +912,7 @@ struct kn_conv_mex_in_to_MAC : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const double *p_lin_array, MACGrid *p_result) const + inline void op(int i, int j, int k, const double *p_lin_array, MACGrid *p_result) { int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY(); const int n = p_result->getSizeX() * p_result->getSizeY() * p_result->getSizeZ(); @@ -958,37 +931,35 @@ struct kn_conv_mex_in_to_MAC : public KernelBase { return p_result; } typedef MACGrid type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_mex_in_to_MAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const double *p_lin_array; MACGrid *p_result; }; @@ -1000,7 +971,7 @@ struct kn_conv_MAC_to_mex_out : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const MACGrid *p_mac, double *p_result) const + inline void op(int i, int j, int k, const MACGrid *p_mac, double *p_result) { int ijk = i + j * p_mac->getSizeX() + k * p_mac->getSizeX() * p_mac->getSizeY(); const int n = p_mac->getSizeX() * p_mac->getSizeY() * p_mac->getSizeZ(); @@ -1019,37 +990,35 @@ struct kn_conv_MAC_to_mex_out : public KernelBase { return p_result; } typedef double type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_MAC_to_mex_out ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_mac, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_mac, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_mac, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_mac, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const MACGrid *p_mac; double *p_result; }; @@ -1063,7 +1032,7 @@ struct kn_conv_mex_in_to_Vec3 : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const double *p_lin_array, Grid<Vec3> *p_result) const + inline void op(int i, int j, int k, const double *p_lin_array, Grid<Vec3> *p_result) { int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY(); const int n = p_result->getSizeX() * p_result->getSizeY() * p_result->getSizeZ(); @@ -1082,37 +1051,35 @@ struct kn_conv_mex_in_to_Vec3 : public KernelBase { return p_result; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_mex_in_to_Vec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const double *p_lin_array; Grid<Vec3> *p_result; }; @@ -1124,7 +1091,7 @@ struct kn_conv_Vec3_to_mex_out : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Vec3> *p_Vec3, double *p_result) const + inline void op(int i, int j, int k, const Grid<Vec3> *p_Vec3, double *p_result) { int ijk = i + j * p_Vec3->getSizeX() + k * p_Vec3->getSizeX() * p_Vec3->getSizeY(); const int n = p_Vec3->getSizeX() * p_Vec3->getSizeY() * p_Vec3->getSizeZ(); @@ -1143,37 +1110,35 @@ struct kn_conv_Vec3_to_mex_out : public KernelBase { return p_result; } typedef double type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_Vec3_to_mex_out ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_Vec3, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_Vec3, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_Vec3, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_Vec3, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const Grid<Vec3> *p_Vec3; double *p_result; }; @@ -1187,7 +1152,7 @@ struct kn_conv_mex_in_to_Real : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const double *p_lin_array, Grid<Real> *p_result) const + inline void op(int i, int j, int k, const double *p_lin_array, Grid<Real> *p_result) { int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY(); @@ -1203,37 +1168,35 @@ struct kn_conv_mex_in_to_Real : public KernelBase { return p_result; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_mex_in_to_Real ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_lin_array, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_lin_array, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const double *p_lin_array; Grid<Real> *p_result; }; @@ -1245,7 +1208,7 @@ struct kn_conv_Real_to_mex_out : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Real> *p_grid, double *p_result) const + inline void op(int i, int j, int k, const Grid<Real> *p_grid, double *p_result) { int ijk = i + j * p_grid->getSizeX() + k * p_grid->getSizeX() * p_grid->getSizeY(); @@ -1261,37 +1224,35 @@ struct kn_conv_Real_to_mex_out : public KernelBase { return p_result; } typedef double type1; - void runMessage() - { - debMsg("Executing kernel kn_conv_Real_to_mex_out ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_grid, p_result); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_grid, p_result); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, p_grid, p_result); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, p_grid, p_result); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const Grid<Real> *p_grid; double *p_result; }; diff --git a/extern/mantaflow/preprocessed/conjugategrad.cpp b/extern/mantaflow/preprocessed/conjugategrad.cpp index bdcceb29520..df184f654b6 100644 --- a/extern/mantaflow/preprocessed/conjugategrad.cpp +++ b/extern/mantaflow/preprocessed/conjugategrad.cpp @@ -18,6 +18,8 @@ #include "conjugategrad.h" #include "commonkernels.h" +#include <chrono> +using namespace std::chrono; using namespace std; namespace Manta { @@ -213,9 +215,9 @@ struct GridDotProduct : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid<Real> &a, const Grid<Real> &b, double &result) + inline void op(int i, int j, int k, const Grid<Real> &a, const Grid<Real> &b, double &result) { - result += (a[idx] * b[idx]); + result += (a(i, j, k) * b(i, j, k)); } inline operator double() { @@ -235,28 +237,39 @@ struct GridDotProduct : public KernelBase { return b; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel GridDotProduct ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - GridDotProduct(GridDotProduct &o, tbb::split) : KernelBase(o), a(o.a), b(o.b), result(0.0) - { - } - void join(const GridDotProduct &o) - { - result += o.result; + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + const Grid<Real> &a = getArg0(); + const Grid<Real> &b = getArg1(); +#pragma omp target teams distribute parallel for reduction(+:result) collapse(2) schedule(static,1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, a, b, result); + } + { + this->result = result; + } + } + else { + const int k = 0; + const Grid<Real> &a = getArg0(); + const Grid<Real> &b = getArg1(); +#pragma omp target teams distribute parallel for reduction(+:result) collapse(1) schedule(static,1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, a, b, result); + } + { + this->result = result; + } + } } const Grid<Real> &a; const Grid<Real> &b; @@ -315,29 +328,21 @@ struct InitSigma : public KernelBase { return temp; } typedef Grid<Real> type3; - void runMessage() - { - debMsg("Executing kernel InitSigma ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, dst, rhs, temp, sigma); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - InitSigma(InitSigma &o, tbb::split) - : KernelBase(o), flags(o.flags), dst(o.dst), rhs(o.rhs), temp(o.temp), sigma(0) - { - } - void join(const InitSigma &o) - { - sigma += o.sigma; + const IndexInt _sz = size; +#pragma omp parallel + { + double sigma = 0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, dst, rhs, temp, sigma); +#pragma omp critical + { + this->sigma += sigma; + } + } } const FlagGrid &flags; Grid<Real> &dst; @@ -356,8 +361,9 @@ struct UpdateSearchVec : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Real> &dst, Grid<Real> &src, Real factor) const + inline void op(int i, int j, int k, Grid<Real> &dst, Grid<Real> &src, Real factor) { + const IndexInt idx = dst.index(i, j, k); dst[idx] = src[idx] + factor * dst[idx]; } inline Grid<Real> &getArg0() @@ -375,21 +381,35 @@ struct UpdateSearchVec : public KernelBase { return factor; } typedef Real type2; - void runMessage() - { - debMsg("Executing kernel UpdateSearchVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, dst, src, factor); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + Grid<Real> &dst = getArg0(); + Grid<Real> &src = getArg1(); + Real &factor = getArg2(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dst, src, factor); + } + } + else { + const int k = 0; + Grid<Real> &dst = getArg0(); + Grid<Real> &src = getArg1(); + Real &factor = getArg2(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dst, src, factor); + } + } } Grid<Real> &dst; Grid<Real> &src; @@ -406,8 +426,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst, Grid<Real> &search, const FlagGrid &flags, Grid<Real> &tmp, - std::vector<Grid<Real> *> matrixAVec, - std::vector<Grid<Real> *> rhsVec) + Grid<Real> *pA0, + Grid<Real> *pAi, + Grid<Real> *pAj, + Grid<Real> *pAk) : GridCgInterface(), mInited(false), mIterations(0), @@ -417,8 +439,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst, mSearch(search), mFlags(flags), mTmp(tmp), - mMatrixA(matrixAVec), - mVecRhs(rhsVec), + mpA0(pA0), + mpAi(pAi), + mpAj(pAj), + mpAk(pAk), mPcMethod(PC_None), mpPCA0(nullptr), mpPCAi(nullptr), @@ -436,54 +460,37 @@ template<class APPLYMAT> void GridCg<APPLYMAT>::doInit() mInited = true; mIterations = 0; - mDst.clear(); - mResidual.copyFrom(mRhs); // p=0, residual = b + mDst.clear(1); + mResidual.copyFrom(mRhs, true, 1); // p=0, residual = b if (mPcMethod == PC_ICP) { - assertMsg(mDst.is3D(), "ICP only supports 3D grids so far"); - InitPreconditionIncompCholesky(mFlags, - *mpPCA0, - *mpPCAi, - *mpPCAj, - *mpPCAk, - *mMatrixA[0], - *mMatrixA[1], - *mMatrixA[2], - *mMatrixA[3]); - ApplyPreconditionIncompCholesky(mTmp, - mResidual, - mFlags, - *mpPCA0, - *mpPCAi, - *mpPCAj, - *mpPCAk, - *mMatrixA[0], - *mMatrixA[1], - *mMatrixA[2], - *mMatrixA[3]); + // assertMsg(mDst.is3D(), "ICP only supports 3D grids so far"); + InitPreconditionIncompCholesky( + mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); + ApplyPreconditionIncompCholesky( + mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); } else if (mPcMethod == PC_mICP) { - assertMsg(mDst.is3D(), "mICP only supports 3D grids so far"); - InitPreconditionModifiedIncompCholesky2( - mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]); + // assertMsg(mDst.is3D(), "mICP only supports 3D grids so far"); + InitPreconditionModifiedIncompCholesky2(mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); ApplyPreconditionModifiedIncompCholesky2( - mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]); + mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); } else if (mPcMethod == PC_MGP) { - InitPreconditionMultigrid( - mMG, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3], mAccuracy); + InitPreconditionMultigrid(mMG, *mpA0, *mpAi, *mpAj, *mpAk, mAccuracy); ApplyPreconditionMultigrid(mMG, mTmp, mResidual); } else { - mTmp.copyFrom(mResidual); + mTmp.copyFrom(mResidual, true, 1); } - mSearch.copyFrom(mTmp); + mSearch.copyFrom(mTmp, true, 1); mSigma = GridDotProduct(mTmp, mResidual); } -template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() +template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate(Real &time) { + auto start = high_resolution_clock::now(); if (!mInited) doInit(); @@ -493,7 +500,14 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() // this could reinterpret the mpA pointers (not so clean right now) // tmp = applyMat(search) - APPLYMAT(mFlags, mTmp, mSearch, mMatrixA, mVecRhs); + APPLYMAT(mFlags, mTmp, mSearch, *mpA0, *mpAi, *mpAj, *mpAk); + + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "APPLYMAT Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); // alpha = sigma/dot(tmp, search) Real dp = GridDotProduct(mTmp, mSearch); @@ -501,35 +515,49 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() if (fabs(dp) > 0.) alpha = mSigma / (Real)dp; + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); + gridScaledAdd<Real, Real>(mDst, mSearch, alpha); // dst += search * alpha gridScaledAdd<Real, Real>(mResidual, mTmp, -alpha); // residual += tmp * -alpha + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "gridScaledAdd Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); + if (mPcMethod == PC_ICP) - ApplyPreconditionIncompCholesky(mTmp, - mResidual, - mFlags, - *mpPCA0, - *mpPCAi, - *mpPCAj, - *mpPCAk, - *mMatrixA[0], - *mMatrixA[1], - *mMatrixA[2], - *mMatrixA[3]); + ApplyPreconditionIncompCholesky( + mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk); else if (mPcMethod == PC_mICP) ApplyPreconditionModifiedIncompCholesky2( - mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]); + mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk); else if (mPcMethod == PC_MGP) ApplyPreconditionMultigrid(mMG, mTmp, mResidual); else - mTmp.copyFrom(mResidual); + mTmp.copyFrom(mResidual, true, 1); + + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "copyFrom Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); // use the l2 norm of the residual for convergence check? (usually max norm is recommended // instead) if (this->mUseL2Norm) { + // std::cout << "USING L2" << std::endl; mResNorm = GridSumSqr(mResidual).sum; } else { + // std::cout << "NOT USING L2" << std::endl; mResNorm = mResidual.getMaxAbs(); } @@ -539,27 +567,43 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() return false; } + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "GridSumSqr Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); + Real sigmaNew = GridDotProduct(mTmp, mResidual); Real beta = sigmaNew / mSigma; + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl; + + start = high_resolution_clock::now(); + // search = tmp + beta * search UpdateSearchVec(mSearch, mTmp, beta); - debMsg("GridCg::iterate i=" << mIterations << " sigmaNew=" << sigmaNew << " sigmaLast=" << mSigma - << " alpha=" << alpha << " beta=" << beta << " ", - CG_DEBUGLEVEL); + stop = high_resolution_clock::now(); + duration = duration_cast<microseconds>(stop - start); + time += duration.count(); + // std::cout << "UpdateSearchVec Time taken: " << duration.count() << std::endl; + + // debMsg("GridCg::iterate i="<<mIterations<<" sigmaNew="<<sigmaNew<<" sigmaLast="<<mSigma<<" + // alpha="<<alpha<<" beta="<<beta<<" ", CG_DEBUGLEVEL); mSigma = sigmaNew; if (!(mResNorm < 1e35)) { if (mPcMethod == PC_MGP) { // diverging solves can be caused by the static multigrid mode, we cannot detect this here, // though only the pressure solve call "knows" whether the MG is static or dynamics... - debMsg( - "GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode of " - "the MG preconditioner. If the static mode is active, try switching to dynamic.", - 1); + // debMsg("GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode + // of the MG preconditioner. If the static mode is active, try switching to dynamic.", 1); } - errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping."); + // errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping."); } // debMsg("PB-CG-Norms::p"<<sqrt( GridOpNormNosqrt(mpDst, mpFlags).getValue() ) <<" @@ -571,8 +615,9 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() template<class APPLYMAT> void GridCg<APPLYMAT>::solve(int maxIter) { + Real time = 0; for (int iter = 0; iter < maxIter; iter++) { - if (!iterate()) + if (!iterate(time)) iter = maxIter; } return; @@ -583,13 +628,13 @@ template<class APPLYMAT> void GridCg<APPLYMAT>::setICPreconditioner( PreconditionType method, Grid<Real> *A0, Grid<Real> *Ai, Grid<Real> *Aj, Grid<Real> *Ak) { - assertMsg(method == PC_ICP || method == PC_mICP, - "GridCg<APPLYMAT>::setICPreconditioner: Invalid method specified."); + // assertMsg(method==PC_ICP || method==PC_mICP, "GridCg<APPLYMAT>::setICPreconditioner: Invalid + // method specified."); mPcMethod = method; if ((!A0->is3D())) { if (gPrint2dWarning) { - debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1); + // debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1); gPrint2dWarning = false; } mPcMethod = PC_None; @@ -603,7 +648,7 @@ void GridCg<APPLYMAT>::setICPreconditioner( template<class APPLYMAT> void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG) { - assertMsg(method == PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified."); + // assertMsg(method==PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified."); mPcMethod = method; mMG = MG; @@ -612,9 +657,6 @@ void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG) // explicit instantiation template class GridCg<ApplyMatrix>; template class GridCg<ApplyMatrix2D>; -template class GridCg<ApplyMatrixViscosityU>; -template class GridCg<ApplyMatrixViscosityV>; -template class GridCg<ApplyMatrixViscosityW>; //***************************************************************************** // diffusion for real and vec grids, e.g. for viscosity @@ -655,44 +697,33 @@ void cgSolveDiffusion(const FlagGrid &flags, } } - GridCgInterface *gcg; + GridCgInterface *gcg = nullptr; // note , no preconditioning for now... const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); if (grid.getType() & GridBase::TypeReal) { Grid<Real> &u = ((Grid<Real> &)grid); rhs.copyFrom(u); - vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; - - if (flags.is3D()) { - matA.push_back(&Ak); - gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA); - } - else { - gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA); - } + if (flags.is3D()) + gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); + else + gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); gcg->solve(maxIter); - debMsg("FluidSolver::solveDiffusion iterations:" << gcg->getIterations() - << ", res:" << gcg->getSigma(), - CG_DEBUGLEVEL); + // debMsg("FluidSolver::solveDiffusion iterations:"<<gcg->getIterations()<<", + // res:"<<gcg->getSigma(), CG_DEBUGLEVEL); } else if ((grid.getType() & GridBase::TypeVec3) || (grid.getType() & GridBase::TypeMAC)) { Grid<Vec3> &vec = ((Grid<Vec3> &)grid); Grid<Real> u(parent); - vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; // core solve is same as for a regular real grid - if (flags.is3D()) { - matA.push_back(&Ak); - gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA); - } - else { - gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA); - } - + if (flags.is3D()) + gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); + else + gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); // diffuse every component separately @@ -702,15 +733,14 @@ void cgSolveDiffusion(const FlagGrid &flags, rhs.copyFrom(u); gcg->solve(maxIter); - debMsg("FluidSolver::solveDiffusion vec3, iterations:" << gcg->getIterations() - << ", res:" << gcg->getSigma(), - CG_DEBUGLEVEL); + // debMsg("FluidSolver::solveDiffusion vec3, iterations:"<<gcg->getIterations()<<", + // res:"<<gcg->getSigma(), CG_DEBUGLEVEL); setComponent(u, vec, component); } } else { - errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)"); + // errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)"); } delete gcg; diff --git a/extern/mantaflow/preprocessed/conjugategrad.h b/extern/mantaflow/preprocessed/conjugategrad.h index 35cb3960656..189f85de689 100644 --- a/extern/mantaflow/preprocessed/conjugategrad.h +++ b/extern/mantaflow/preprocessed/conjugategrad.h @@ -37,7 +37,7 @@ class GridCgInterface { virtual ~GridCgInterface(){}; // solving functions - virtual bool iterate() = 0; + virtual bool iterate(Real &time) = 0; virtual void solve(int maxIter) = 0; // precond @@ -78,12 +78,16 @@ template<class APPLYMAT> class GridCg : public GridCgInterface { Grid<Real> &search, const FlagGrid &flags, Grid<Real> &tmp, - std::vector<Grid<Real> *> matrixAVec, - std::vector<Grid<Real> *> rhsVec = {}); - ~GridCg(){}; + Grid<Real> *A0, + Grid<Real> *pAi, + Grid<Real> *pAj, + Grid<Real> *pAk); + ~GridCg() + { + } void doInit(); - bool iterate(); + bool iterate(Real &time); void solve(int maxIter); //! init pointers, and copy values from "normal" matrix void setICPreconditioner( @@ -129,10 +133,7 @@ template<class APPLYMAT> class GridCg : public GridCgInterface { const FlagGrid &mFlags; Grid<Real> &mTmp; - //! shape of A matrix defined here (e.g. diagonal, positive neighbor cells, etc) - std::vector<Grid<Real> *> mMatrixA; - //! shape of rhs vector defined here (e.g. 1 rhs for regular fluids solve, 3 rhs for viscosity) - std::vector<Grid<Real> *> mVecRhs; + Grid<Real> *mpA0, *mpAi, *mpAj, *mpAk; PreconditionType mPcMethod; //! preconditioning grids @@ -153,33 +154,32 @@ struct ApplyMatrix : public KernelBase { ApplyMatrix(const FlagGrid &flags, Grid<Real> &dst, const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) - : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) + Grid<Real> &A0, + Grid<Real> &Ai, + Grid<Real> &Aj, + Grid<Real> &Ak) + : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak) { runMessage(); run(); } - inline void op(IndexInt idx, + inline void op(int i, + int j, + int k, const FlagGrid &flags, Grid<Real> &dst, const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) const + Grid<Real> &A0, + Grid<Real> &Ai, + Grid<Real> &Aj, + Grid<Real> &Ak) { - unusedParameter(vecRhs); // Not needed in this matrix application - - if (matrixA.size() != 4) - errMsg("ConjugateGrad: Invalid A matrix in apply matrix step"); - Grid<Real> &A0 = *matrixA[0]; - Grid<Real> &Ai = *matrixA[1]; - Grid<Real> &Aj = *matrixA[2]; - Grid<Real> &Ak = *matrixA[3]; - + const IndexInt idx = dst.index(i, j, k); if (!flags.isFluid(idx)) { dst[idx] = src[idx]; return; } + const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] + src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx] + src[idx - Z] * Ak[idx - Z] + @@ -200,37 +200,71 @@ struct ApplyMatrix : public KernelBase { return src; } typedef Grid<Real> type2; - inline const std::vector<Grid<Real> *> &getArg3() + inline Grid<Real> &getArg3() { - return matrixA; + return A0; } - typedef std::vector<Grid<Real> *> type3; - inline const std::vector<Grid<Real> *> &getArg4() + typedef Grid<Real> type3; + inline Grid<Real> &getArg4() { - return vecRhs; + return Ai; } - typedef std::vector<Grid<Real> *> type4; - void runMessage() + typedef Grid<Real> type4; + inline Grid<Real> &getArg5() { - debMsg("Executing kernel ApplyMatrix ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + return Aj; + } + typedef Grid<Real> type5; + inline Grid<Real> &getArg6() { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, dst, src, matrixA, vecRhs); + return Ak; } + typedef Grid<Real> type6; + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + const FlagGrid &flags = getArg0(); + Grid<Real> &dst = getArg1(); + const Grid<Real> &src = getArg2(); + Grid<Real> &A0 = getArg3(); + Grid<Real> &Ai = getArg4(); + Grid<Real> &Aj = getArg5(); + Grid<Real> &Ak = getArg6(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak); + } + } + else { + const int k = 0; + const FlagGrid &flags = getArg0(); + Grid<Real> &dst = getArg1(); + const Grid<Real> &src = getArg2(); + Grid<Real> &A0 = getArg3(); + Grid<Real> &Ai = getArg4(); + Grid<Real> &Aj = getArg5(); + Grid<Real> &Ak = getArg6(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak); + } + } } const FlagGrid &flags; Grid<Real> &dst; const Grid<Real> &src; - const std::vector<Grid<Real> *> matrixA; - const std::vector<Grid<Real> *> vecRhs; + Grid<Real> &A0; + Grid<Real> &Ai; + Grid<Real> &Aj; + Grid<Real> &Ak; }; //! Kernel: Apply symmetric stored Matrix. 2D version @@ -239,32 +273,34 @@ struct ApplyMatrix2D : public KernelBase { ApplyMatrix2D(const FlagGrid &flags, Grid<Real> &dst, const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) - : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) + Grid<Real> &A0, + Grid<Real> &Ai, + Grid<Real> &Aj, + Grid<Real> &Ak) + : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak) { runMessage(); run(); } - inline void op(IndexInt idx, + inline void op(int i, + int j, + int k, const FlagGrid &flags, Grid<Real> &dst, const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) const + Grid<Real> &A0, + Grid<Real> &Ai, + Grid<Real> &Aj, + Grid<Real> &Ak) { - unusedParameter(vecRhs); // Not needed in this matrix application - - if (matrixA.size() != 3) - errMsg("ConjugateGrad: Invalid A matrix in apply matrix step"); - Grid<Real> &A0 = *matrixA[0]; - Grid<Real> &Ai = *matrixA[1]; - Grid<Real> &Aj = *matrixA[2]; + unusedParameter(Ak); // only there for parameter compatibility with ApplyMatrix + const IndexInt idx = dst.index(i, j, k); if (!flags.isFluid(idx)) { dst[idx] = src[idx]; return; } + const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] + src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx]; @@ -284,387 +320,73 @@ struct ApplyMatrix2D : public KernelBase { return src; } typedef Grid<Real> type2; - inline const std::vector<Grid<Real> *> &getArg3() - { - return matrixA; - } - typedef std::vector<Grid<Real> *> type3; - inline const std::vector<Grid<Real> *> &getArg4() - { - return vecRhs; - } - typedef std::vector<Grid<Real> *> type4; - void runMessage() - { - debMsg("Executing kernel ApplyMatrix2D ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, dst, src, matrixA, vecRhs); - } - void run() - { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); - } - const FlagGrid &flags; - Grid<Real> &dst; - const Grid<Real> &src; - const std::vector<Grid<Real> *> matrixA; - const std::vector<Grid<Real> *> vecRhs; -}; - -struct ApplyMatrixViscosityU : public KernelBase { - ApplyMatrixViscosityU(const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) - : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) const - { - if (matrixA.size() != 15) - errMsg("ConjugateGrad: Invalid A matrix in apply matrix step"); - Grid<Real> &A0 = *matrixA[0]; - Grid<Real> &Aplusi = *matrixA[1]; - Grid<Real> &Aplusj = *matrixA[2]; - Grid<Real> &Aplusk = *matrixA[3]; - Grid<Real> &Aminusi = *matrixA[4]; - Grid<Real> &Aminusj = *matrixA[5]; - Grid<Real> &Aminusk = *matrixA[6]; - - if (vecRhs.size() != 2) - errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step"); - Grid<Real> &srcV = *vecRhs[0]; - Grid<Real> &srcW = *vecRhs[1]; - - dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) + - src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) + - src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) + - src(i, j, k - 1) * Aminusk(i, j, k); - - dst(i, j, k) += srcV(i, j + 1, k) * (*matrixA[7])(i, j, k) + - srcV(i - 1, j + 1, k) * (*matrixA[8])(i, j, k) + - srcV(i, j, k) * (*matrixA[9])(i, j, k) + - srcV(i - 1, j, k) * (*matrixA[10])(i, j, k) + - srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) + - srcW(i - 1, j, k + 1) * (*matrixA[12])(i, j, k) + - srcW(i, j, k) * (*matrixA[13])(i, j, k) + - srcW(i - 1, j, k) * (*matrixA[14])(i, j, k); - } - inline const FlagGrid &getArg0() - { - return flags; - } - typedef FlagGrid type0; - inline Grid<Real> &getArg1() - { - return dst; - } - typedef Grid<Real> type1; - inline const Grid<Real> &getArg2() - { - return src; - } - typedef Grid<Real> type2; - inline const std::vector<Grid<Real> *> &getArg3() - { - return matrixA; - } - typedef std::vector<Grid<Real> *> type3; - inline const std::vector<Grid<Real> *> &getArg4() - { - return vecRhs; - } - typedef std::vector<Grid<Real> *> type4; - void runMessage() - { - debMsg("Executing kernel ApplyMatrixViscosityU ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } - const FlagGrid &flags; - Grid<Real> &dst; - const Grid<Real> &src; - const std::vector<Grid<Real> *> matrixA; - const std::vector<Grid<Real> *> vecRhs; -}; - -struct ApplyMatrixViscosityV : public KernelBase { - ApplyMatrixViscosityV(const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) - : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) const - { - if (matrixA.size() != 15) - errMsg("ConjugateGrad: Invalid A matrix in apply matrix step"); - Grid<Real> &A0 = *matrixA[0]; - Grid<Real> &Aplusi = *matrixA[1]; - Grid<Real> &Aplusj = *matrixA[2]; - Grid<Real> &Aplusk = *matrixA[3]; - Grid<Real> &Aminusi = *matrixA[4]; - Grid<Real> &Aminusj = *matrixA[5]; - Grid<Real> &Aminusk = *matrixA[6]; - - if (vecRhs.size() != 2) - errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step"); - Grid<Real> &srcU = *vecRhs[0]; - Grid<Real> &srcW = *vecRhs[1]; - - dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) + - src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) + - src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) + - src(i, j, k - 1) * Aminusk(i, j, k); - - dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) + - srcU(i + 1, j - 1, k) * (*matrixA[8])(i, j, k) + - srcU(i, j, k) * (*matrixA[9])(i, j, k) + - srcU(i, j - 1, k) * (*matrixA[10])(i, j, k) + - srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) + - srcW(i, j - 1, k + 1) * (*matrixA[12])(i, j, k) + - srcW(i, j, k) * (*matrixA[13])(i, j, k) + - srcW(i, j - 1, k) * (*matrixA[14])(i, j, k); - } - inline const FlagGrid &getArg0() - { - return flags; - } - typedef FlagGrid type0; - inline Grid<Real> &getArg1() - { - return dst; - } - typedef Grid<Real> type1; - inline const Grid<Real> &getArg2() + inline Grid<Real> &getArg3() { - return src; + return A0; } - typedef Grid<Real> type2; - inline const std::vector<Grid<Real> *> &getArg3() + typedef Grid<Real> type3; + inline Grid<Real> &getArg4() { - return matrixA; + return Ai; } - typedef std::vector<Grid<Real> *> type3; - inline const std::vector<Grid<Real> *> &getArg4() + typedef Grid<Real> type4; + inline Grid<Real> &getArg5() { - return vecRhs; + return Aj; } - typedef std::vector<Grid<Real> *> type4; - void runMessage() + typedef Grid<Real> type5; + inline Grid<Real> &getArg6() { - debMsg("Executing kernel ApplyMatrixViscosityV ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); - } + return Ak; } + typedef Grid<Real> type6; + void runMessage(){}; void run() { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } - const FlagGrid &flags; - Grid<Real> &dst; - const Grid<Real> &src; - const std::vector<Grid<Real> *> matrixA; - const std::vector<Grid<Real> *> vecRhs; -}; - -struct ApplyMatrixViscosityW : public KernelBase { - ApplyMatrixViscosityW(const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) - : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - const FlagGrid &flags, - Grid<Real> &dst, - const Grid<Real> &src, - const std::vector<Grid<Real> *> matrixA, - const std::vector<Grid<Real> *> vecRhs) const - { - if (matrixA.size() != 15) - errMsg("ConjugateGrad: Invalid A matrix in apply matrix step"); - Grid<Real> &A0 = *matrixA[0]; - Grid<Real> &Aplusi = *matrixA[1]; - Grid<Real> &Aplusj = *matrixA[2]; - Grid<Real> &Aplusk = *matrixA[3]; - Grid<Real> &Aminusi = *matrixA[4]; - Grid<Real> &Aminusj = *matrixA[5]; - Grid<Real> &Aminusk = *matrixA[6]; - - if (vecRhs.size() != 2) - errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step"); - Grid<Real> &srcU = *vecRhs[0]; - Grid<Real> &srcV = *vecRhs[1]; - - dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) + - src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) + - src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) + - src(i, j, k - 1) * Aminusk(i, j, k); - - dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) + - srcU(i + 1, j, k - 1) * (*matrixA[8])(i, j, k) + - srcU(i, j, k) * (*matrixA[9])(i, j, k) + - srcU(i, j, k - 1) * (*matrixA[10])(i, j, k) + - srcV(i, j + 1, k) * (*matrixA[11])(i, j, k) + - srcV(i, j + 1, k - 1) * (*matrixA[12])(i, j, k) + - srcV(i, j, k) * (*matrixA[13])(i, j, k) + - srcV(i, j, k - 1) * (*matrixA[14])(i, j, k); - } - inline const FlagGrid &getArg0() - { - return flags; - } - typedef FlagGrid type0; - inline Grid<Real> &getArg1() - { - return dst; - } - typedef Grid<Real> type1; - inline const Grid<Real> &getArg2() - { - return src; - } - typedef Grid<Real> type2; - inline const std::vector<Grid<Real> *> &getArg3() - { - return matrixA; - } - typedef std::vector<Grid<Real> *> type3; - inline const std::vector<Grid<Real> *> &getArg4() - { - return vecRhs; - } - typedef std::vector<Grid<Real> *> type4; - void runMessage() - { - debMsg("Executing kernel ApplyMatrixViscosityW ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); + const FlagGrid &flags = getArg0(); + Grid<Real> &dst = getArg1(); + const Grid<Real> &src = getArg2(); + Grid<Real> &A0 = getArg3(); + Grid<Real> &Ai = getArg4(); + Grid<Real> &Aj = getArg5(); + Grid<Real> &Ak = getArg6(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, dst, src, matrixA, vecRhs); + const FlagGrid &flags = getArg0(); + Grid<Real> &dst = getArg1(); + const Grid<Real> &src = getArg2(); + Grid<Real> &A0 = getArg3(); + Grid<Real> &Ai = getArg4(); + Grid<Real> &Aj = getArg5(); + Grid<Real> &Ak = getArg6(); +#pragma omp target teams distribute parallel for collapse(1) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<Real> &dst; const Grid<Real> &src; - const std::vector<Grid<Real> *> matrixA; - const std::vector<Grid<Real> *> vecRhs; + Grid<Real> &A0; + Grid<Real> &Ai; + Grid<Real> &Aj; + Grid<Real> &Ak; }; -/* NOTE: Use this template for new matrix application kernels - -//! Template for matrix application kernels -KERNEL() -void ApplyMatrixTemplate (const FlagGrid& flags, Grid<Real>& dst, const Grid<Real>& src, - const std::vector<Grid<Real> *> matrixA, const std::vector<Grid<Real> *> vecRhs) -{ - // The kernel must define how to use the grids from the matrixA and vecRhs lists -} - -*/ - //! Kernel: Construct the matrix for the poisson equation struct MakeLaplaceMatrix : public KernelBase { @@ -687,7 +409,7 @@ struct MakeLaplaceMatrix : public KernelBase { Grid<Real> &Ai, Grid<Real> &Aj, Grid<Real> &Ak, - const MACGrid *fractions = 0) const + const MACGrid *fractions = 0) { if (!flags.isFluid(i, j, k)) return; @@ -765,37 +487,42 @@ struct MakeLaplaceMatrix : public KernelBase { return fractions; } typedef MACGrid type5; - void runMessage() - { - debMsg("Executing kernel MakeLaplaceMatrix ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); + const FlagGrid &flags = getArg0(); + Grid<Real> &A0 = getArg1(); + Grid<Real> &Ai = getArg2(); + Grid<Real> &Aj = getArg3(); + Grid<Real> &Ak = getArg4(); + const MACGrid *fractions = getArg5(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); + const FlagGrid &flags = getArg0(); + Grid<Real> &A0 = getArg1(); + Grid<Real> &Ai = getArg2(); + Grid<Real> &Aj = getArg3(); + Grid<Real> &Ak = getArg4(); + const MACGrid *fractions = getArg5(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<Real> &A0; Grid<Real> &Ai; diff --git a/extern/mantaflow/preprocessed/fastmarch.cpp b/extern/mantaflow/preprocessed/fastmarch.cpp index 31e43483b49..5499ec9cbdf 100644 --- a/extern/mantaflow/preprocessed/fastmarch.cpp +++ b/extern/mantaflow/preprocessed/fastmarch.cpp @@ -236,13 +236,7 @@ struct SetLevelsetBoundaries : public KernelBase { return phi; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel SetLevelsetBoundaries ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; + void runMessage(){}; void run() { const int _maxX = maxX; @@ -297,14 +291,8 @@ struct knExtrapolateMACSimple : public KernelBase { runMessage(); run(); } - inline void op(int i, - int j, - int k, - MACGrid &vel, - int distance, - Grid<int> &tmp, - const int d, - const int c) const + inline void op( + int i, int j, int k, MACGrid &vel, int distance, Grid<int> &tmp, const int d, const int c) { static const Vec3i nb[6] = {Vec3i(1, 0, 0), Vec3i(-1, 0, 0), @@ -359,37 +347,35 @@ struct knExtrapolateMACSimple : public KernelBase { return c; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel knExtrapolateMACSimple ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, distance, tmp, d, c); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, distance, tmp, d, c); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, distance, tmp, d, c); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, distance, tmp, d, c); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &vel; int distance; Grid<int> &tmp; @@ -405,7 +391,7 @@ struct knExtrapolateIntoBnd : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp) const + inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp) { int c = 0; Vec3 v(0, 0, 0); @@ -467,37 +453,35 @@ struct knExtrapolateIntoBnd : public KernelBase { return velTmp; } typedef MACGrid type2; - void runMessage() - { - debMsg("Executing kernel knExtrapolateIntoBnd ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velTmp); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velTmp); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velTmp); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velTmp); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } FlagGrid &flags; MACGrid &vel; const MACGrid &velTmp; @@ -538,8 +522,7 @@ struct knUnprojectNormalComp : public KernelBase { runMessage(); run(); } - inline void op( - int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist) const + inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist) { // apply inside, within range near obstacle surface if (phi(i, j, k) > 0. || phi(i, j, k) < -maxDist) @@ -573,37 +556,35 @@ struct knUnprojectNormalComp : public KernelBase { return maxDist; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel knUnprojectNormalComp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, phi, maxDist); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, phi, maxDist); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, phi, maxDist); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, phi, maxDist); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } FlagGrid &flags; MACGrid &vel; Grid<Real> φ @@ -712,7 +693,7 @@ struct knExtrapolateMACFromWeight : public KernelBase { Grid<Vec3> &weight, int distance, const int d, - const int c) const + const int c) { static const Vec3i nb[6] = {Vec3i(1, 0, 0), Vec3i(-1, 0, 0), @@ -766,37 +747,35 @@ struct knExtrapolateMACFromWeight : public KernelBase { return c; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel knExtrapolateMACFromWeight ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, weight, distance, d, c); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, weight, distance, d, c); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, weight, distance, d, c); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, weight, distance, d, c); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &vel; Grid<Vec3> &weight; int distance; @@ -1011,14 +990,8 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase { runMessage(); run(); } - inline void op(int i, - int j, - int k, - Grid<S> &val, - int distance, - Grid<int> &tmp, - const int d, - S direction) const + inline void op( + int i, int j, int k, Grid<S> &val, int distance, Grid<int> &tmp, const int d, S direction) { const int dim = (val.is3D() ? 3 : 2); if (tmp(i, j, k) != 0) @@ -1065,37 +1038,35 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase { return direction; } typedef S type4; - void runMessage() - { - debMsg("Executing kernel knExtrapolateLsSimple ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, val, distance, tmp, d, direction); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, val, distance, tmp, d, direction); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, val, distance, tmp, d, direction); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, val, distance, tmp, d, direction); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<S> &val; int distance; Grid<int> &tmp; @@ -1110,7 +1081,7 @@ template<class S> struct knSetRemaining : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance) const + inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance) { if (tmp(i, j, k) != 0) return; @@ -1131,37 +1102,35 @@ template<class S> struct knSetRemaining : public KernelBase { return distance; } typedef S type2; - void runMessage() - { - debMsg("Executing kernel knSetRemaining ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, phi, tmp, distance); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, phi, tmp, distance); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, phi, tmp, distance); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, phi, tmp, distance); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<S> φ Grid<int> &tmp; S distance; diff --git a/extern/mantaflow/preprocessed/fileio/iogrids.cpp b/extern/mantaflow/preprocessed/fileio/iogrids.cpp index d138cd2925f..77e3e89ed26 100644 --- a/extern/mantaflow/preprocessed/fileio/iogrids.cpp +++ b/extern/mantaflow/preprocessed/fileio/iogrids.cpp @@ -1256,7 +1256,7 @@ struct knQuantize : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Real> &grid, Real step) const + inline void op(IndexInt idx, Grid<Real> &grid, Real step) { quantizeReal(grid(idx), step); } @@ -1270,21 +1270,17 @@ struct knQuantize : public KernelBase { return step; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knQuantize ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, step); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, grid, step); + } } Grid<Real> &grid; Real step; @@ -1331,7 +1327,7 @@ struct knQuantizeVec3 : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Vec3> &grid, Real step) const + inline void op(IndexInt idx, Grid<Vec3> &grid, Real step) { for (int c = 0; c < 3; ++c) quantizeReal(grid(idx)[c], step); @@ -1346,21 +1342,17 @@ struct knQuantizeVec3 : public KernelBase { return step; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knQuantizeVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, step); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, grid, step); + } } Grid<Vec3> &grid; Real step; diff --git a/extern/mantaflow/preprocessed/fileio/ioutil.cpp b/extern/mantaflow/preprocessed/fileio/ioutil.cpp index 2bac34c2a65..409760b0a0f 100644 --- a/extern/mantaflow/preprocessed/fileio/ioutil.cpp +++ b/extern/mantaflow/preprocessed/fileio/ioutil.cpp @@ -66,7 +66,7 @@ void *safeGzopen(const char *filename, const char *mode) #endif // NO_ZLIB != 1 } -#if defined(OPENVDB) +#if OPENVDB == 1 // Convert from OpenVDB value to Manta value. template<class S, class T> void convertFrom(S &in, T *out) { diff --git a/extern/mantaflow/preprocessed/fileio/iovdb.cpp b/extern/mantaflow/preprocessed/fileio/iovdb.cpp index 1846ef7ecbb..815bfa19aaa 100644 --- a/extern/mantaflow/preprocessed/fileio/iovdb.cpp +++ b/extern/mantaflow/preprocessed/fileio/iovdb.cpp @@ -29,10 +29,10 @@ #if OPENVDB == 1 # include "openvdb/openvdb.h" -# include "openvdb/points/PointConversion.h" -# include "openvdb/points/PointCount.h" -# include "openvdb/tools/Clip.h" -# include "openvdb/tools/Dense.h" +# include <openvdb/points/PointConversion.h> +# include <openvdb/points/PointCount.h> +# include <openvdb/tools/Clip.h> +# include <openvdb/tools/Dense.h> #endif #define POSITION_NAME "P" @@ -433,9 +433,9 @@ int writeObjectsVDB(const string &filename, debMsg("Writing int grid '" << mantaGrid->getName() << "' to vdb file " << filename, 1); Grid<int> *mantaIntGrid = (Grid<int> *)mantaGrid; if (clipGrid && mantaIntGrid->saveSparse()) { - assertMsg(clipGrid->getSize() == mantaGrid->getSize(), - "writeObjectsVDB: Clip grid and exported grid must have the same size " - << clipGrid->getSize() << " vs " << mantaGrid->getSize()); + // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and + // exported grid must have the same size " << clipGrid->getSize() << " vs " << + // mantaGrid->getSize()); } vdbGrid = exportVDB<int, openvdb::Int32Grid>(mantaIntGrid, clip, vdbClipGrid); gridsVDB.push_back(vdbGrid); @@ -448,9 +448,9 @@ int writeObjectsVDB(const string &filename, // Only supply clip grid if real grid is not equal to the clip grid openvdb::FloatGrid::Ptr tmpClipGrid = (mantaRealGrid == clipGrid) ? nullptr : vdbClipGrid; if (clipGrid && mantaRealGrid->saveSparse()) { - assertMsg(clipGrid->getSize() == mantaGrid->getSize(), - "writeObjectsVDB: Clip grid and exported grid must have the same size " - << clipGrid->getSize() << " vs " << mantaGrid->getSize()); + // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and + // exported grid must have the same size " << clipGrid->getSize() << " vs " << + // mantaGrid->getSize()); } vdbGrid = exportVDB<Real, openvdb::FloatGrid>(mantaRealGrid, clip, tmpClipGrid); gridsVDB.push_back(vdbGrid); @@ -461,9 +461,9 @@ int writeObjectsVDB(const string &filename, openvdb::GRID_UNKNOWN; Grid<Vec3> *mantaVec3Grid = (Grid<Vec3> *)mantaGrid; if (clipGrid && mantaVec3Grid->saveSparse()) { - assertMsg(clipGrid->getSize() == mantaGrid->getSize(), - "writeObjectsVDB: Clip grid and exported grid must have the same size " - << clipGrid->getSize() << " vs " << mantaGrid->getSize()); + // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and + // exported grid must have the same size " << clipGrid->getSize() << " vs " << + // mantaGrid->getSize()); } vdbGrid = exportVDB<Vec3, openvdb::Vec3SGrid>(mantaVec3Grid, clip, vdbClipGrid); gridsVDB.push_back(vdbGrid); @@ -519,7 +519,7 @@ int writeObjectsVDB(const string &filename, } } - // Write only if there is at least one grid, optionally write with compression. + // Write only if the is at least one grid, optionally write with compression. if (gridsVDB.size()) { int vdb_flags = openvdb::io::COMPRESS_ACTIVE_MASK; switch (compression) { @@ -534,8 +534,7 @@ int writeObjectsVDB(const string &filename, } case COMPRESSION_BLOSC: { # if OPENVDB_BLOSC == 1 - // Cannot use |= here, causes segfault with blosc 1.5.0 (== recommended version) - vdb_flags = openvdb::io::COMPRESS_BLOSC; + vdb_flags |= openvdb::io::COMPRESS_BLOSC; # else debMsg("OpenVDB was built without Blosc support, using Zip compression instead", 1); vdb_flags |= openvdb::io::COMPRESS_ZIP; @@ -696,36 +695,28 @@ int readObjectsVDB(const string &filename, std::vector<PbClass *> *objects, floa // Compare metadata with allocated grid setup. This prevents invalid index access. if (notZero(metaRes) && metaRes != origRes) { - debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() - << "' has not been read. Meta grid res " << metaRes - << " vs " << origRes << " current grid size", - 1); + // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read. + // Meta grid res " << metaRes << " vs " << origRes << " current grid size", 1); readFailure++; break; } if (notZero(metaVoxelSize) && metaVoxelSize != voxelSize) { - debMsg("readObjectsVDB Warning: Grid '" - << vdbGrid->getName() << "' has not been read. Meta voxel size " - << metaVoxelSize << " vs " << voxelSize << " current voxel size", - 1); + // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read. + // Meta voxel size " << metaVoxelSize << " vs " << voxelSize << " current voxel size", 1); readFailure++; break; } if (metaBBoxMax.x > origRes.x || metaBBoxMax.y > origRes.y || metaBBoxMax.z > origRes.z) { - debMsg("readObjectsVDB Warning: Grid '" - << vdbGrid->getName() << "' has not been read. Vdb bbox max " << metaBBoxMax - << " vs " << origRes << " current grid size", - 1); + // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read. + // Vdb bbox max " << metaBBoxMax << " vs " << origRes << " current grid size", 1); readFailure++; break; } const Vec3i origOrigin(0); if (metaBBoxMin.x < origOrigin.x || metaBBoxMin.y < origOrigin.y || metaBBoxMin.z < origOrigin.z) { - debMsg("readObjectsVDB Warning: Grid '" - << vdbGrid->getName() << "' has not been read. Vdb bbox min " << metaBBoxMin - << " vs " << origOrigin << " current grid origin", - 1); + // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read. + // Vdb bbox min " << metaBBoxMin << " vs " << origOrigin << " current grid origin", 1); readFailure++; break; } diff --git a/extern/mantaflow/preprocessed/fluidsolver.h b/extern/mantaflow/preprocessed/fluidsolver.h index 6770f8b7b05..0c871bca3a1 100644 --- a/extern/mantaflow/preprocessed/fluidsolver.h +++ b/extern/mantaflow/preprocessed/fluidsolver.h @@ -384,7 +384,6 @@ class FluidSolver : public PbClass { GridStorage<Real> mGrids4dReal; GridStorage<Vec3> mGrids4dVec; GridStorage<Vec4> mGrids4dVec4; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/general.h b/extern/mantaflow/preprocessed/general.h index 8bf1c2e25de..7a840517cef 100644 --- a/extern/mantaflow/preprocessed/general.h +++ b/extern/mantaflow/preprocessed/general.h @@ -242,39 +242,6 @@ inline bool c_isnan(float c) return d != d; } -//! Swap so that a<b -template<class T> inline void sort(T &a, T &b) -{ - if (a > b) - std::swap(a, b); -} - -//! Swap so that a<b<c -template<class T> inline void sort(T &a, T &b, T &c) -{ - if (a > b) - std::swap(a, b); - if (a > c) - std::swap(a, c); - if (b > c) - std::swap(b, c); -} - -//! Swap so that a<b<c<d -template<class T> inline void sort(T &a, T &b, T &c, T &d) -{ - if (a > b) - std::swap(a, b); - if (c > d) - std::swap(c, d); - if (a > c) - std::swap(a, c); - if (b > d) - std::swap(b, d); - if (b > c) - std::swap(b, c); -} - } // namespace Manta #endif diff --git a/extern/mantaflow/preprocessed/gitinfo.h b/extern/mantaflow/preprocessed/gitinfo.h index 6d367b764af..206d13bdf00 100644 --- a/extern/mantaflow/preprocessed/gitinfo.h +++ b/extern/mantaflow/preprocessed/gitinfo.h @@ -1,3 +1,3 @@ -#define MANTA_GIT_VERSION "commit d5d9a6c28daa8f21426d7a285f48639c0d8fd13f" +#define MANTA_GIT_VERSION "commit 39b7a415721ecbf6643612a24e8eadd221aeb934"
\ No newline at end of file diff --git a/extern/mantaflow/preprocessed/grid.cpp b/extern/mantaflow/preprocessed/grid.cpp index e0ea3830fae..cda3c5b213d 100644 --- a/extern/mantaflow/preprocessed/grid.cpp +++ b/extern/mantaflow/preprocessed/grid.cpp @@ -60,7 +60,8 @@ template<> inline GridBase::GridType typeList<Vec3>() } template<class T> -Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse) : GridBase(parent), mExternalData(false) +Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse, bool offload) + : GridBase(parent), mExternalData(false) { mType = typeList<T>(); mSize = parent->getGridSize(); @@ -78,6 +79,34 @@ Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse) : GridBase(parent), m debMsg("Cannot enable sparse save option without OpenVDB", 1); mSaveSparse = false; #endif + +#if OPENMP && OPENMP_OFFLOAD + if (offload) { + Grid<T> *grid = this; + printf("OMP Allocating %s (%d,%d,%d)\n", + this->getName().c_str(), + this->getSizeX(), + this->getSizeY(), + this->getSizeZ()); + int size = this->getSizeX() * this->getSizeY() * this->getSizeZ(); + + printf("Size OMP is %d\n", size); +# pragma omp target enter data map(to \ + : grid, \ + grid->mSize, \ + grid->mStrideZ, \ + grid->mType, \ + grid->mDx, \ + grid->m3D, \ + grid->mData [0:size]) + //#pragma omp target enter data map(alloc: grid, grid->mData[0:size]) + } + mOffload = offload; +#else + if (offload) + debMsg("Cannot offload grid without OpenMP and OpenMP offloading support", 2); + mOffload = false; +#endif } template<class T> @@ -99,14 +128,119 @@ template<class T> Grid<T>::Grid(const Grid<T> &a) : GridBase(a.getParent()), mEx template<class T> Grid<T>::~Grid() { +#if OPENMP && OPENMP_OFFLOAD + if (mOffload) { + this->mapDeleteOmp(); + } +#endif if (!mExternalData) { mParent->freeGridPointer<T>(mData); } } -template<class T> void Grid<T>::clear() +template<class T> struct knGridClear : public KernelBase { + knGridClear(Grid<T> &dest) : KernelBase(&dest, 0), dest(dest) + { + runMessage(); + run(); + } + inline void op(int i, int j, int k, Grid<T> &dest) + { + dest(i, j, k) = T(0.); + } + inline Grid<T> &getArg0() + { + return dest; + } + typedef Grid<T> type0; + void runMessage(){}; + void run() + { + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + Grid<T> &dest = getArg0(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest); + } + } + else { + const int k = 0; + Grid<T> &dest = getArg0(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest); + } + } + } + Grid<T> &dest; +}; +template<class T> struct knGridClear2 : public KernelBase { + knGridClear2(Grid<T> &dest) : KernelBase(&dest, 0), dest(dest) + { + runMessage(); + run(); + } + inline void op(int i, int j, int k, Grid<T> &dest) + { + dest(i, j, k) = T(0.); + } + inline Grid<T> &getArg0() + { + return dest; + } + typedef Grid<T> type0; + void runMessage(){}; + void run() + { + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest); + } + } + } + Grid<T> &dest; +}; + +template<class T> void Grid<T>::clear(bool isOmp) { + +#if OPENMP && OPENMP_OFFLOAD + if (isOmp) { + knGridClear<T>(*this); + } + else { + knGridClear2<T>(*this); + } +#else memset(mData, 0, sizeof(T) * mSize.x * mSize.y * mSize.z); +#endif } template<class T> void Grid<T>::swap(Grid<T> &other) @@ -201,29 +335,21 @@ struct CompMinReal : public KernelBase { return val; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel CompMinReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMinReal(CompMinReal &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const CompMinReal &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const Grid<Real> &val; Real minVal; @@ -256,29 +382,21 @@ struct CompMaxReal : public KernelBase { return val; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel CompMaxReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMaxReal(CompMaxReal &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const CompMaxReal &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const Grid<Real> &val; Real maxVal; @@ -311,29 +429,21 @@ struct CompMinInt : public KernelBase { return val; } typedef Grid<int> type0; - void runMessage() - { - debMsg("Executing kernel CompMinInt ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMinInt(CompMinInt &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<int>::max()) - { - } - void join(const CompMinInt &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + int minVal = std::numeric_limits<int>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const Grid<int> &val; int minVal; @@ -366,29 +476,21 @@ struct CompMaxInt : public KernelBase { return val; } typedef Grid<int> type0; - void runMessage() - { - debMsg("Executing kernel CompMaxInt ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMaxInt(CompMaxInt &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<int>::max()) - { - } - void join(const CompMaxInt &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + int maxVal = -std::numeric_limits<int>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const Grid<int> &val; int maxVal; @@ -422,29 +524,21 @@ struct CompMinVec : public KernelBase { return val; } typedef Grid<Vec3> type0; - void runMessage() - { - debMsg("Executing kernel CompMinVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMinVec(CompMinVec &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const CompMinVec &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const Grid<Vec3> &val; Real minVal; @@ -478,38 +572,143 @@ struct CompMaxVec : public KernelBase { return val; } typedef Grid<Vec3> type0; - void runMessage() + void runMessage(){}; + void run() + { + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } + } + const Grid<Vec3> &val; + Real maxVal; +}; + +template<class T> struct knGridCopyGrid : public KernelBase { + knGridCopyGrid(Grid<T> &dest, const Grid<T> &source) + : KernelBase(&dest, 0), dest(dest), source(source) + { + runMessage(); + run(); + } + inline void op(int i, int j, int k, Grid<T> &dest, const Grid<T> &source) + { + dest(i, j, k) = source(i, j, k); + } + inline Grid<T> &getArg0() { - debMsg("Executing kernel CompMaxVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) + return dest; + } + typedef Grid<T> type0; + inline const Grid<T> &getArg1() { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); + return source; } + typedef Grid<T> type1; + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + Grid<T> &dest = getArg0(); + const Grid<T> &source = getArg1(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest, source); + } + } + else { + const int k = 0; + Grid<T> &dest = getArg0(); + const Grid<T> &source = getArg1(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest, source); + } + } } - CompMaxVec(CompMaxVec &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) + Grid<T> &dest; + const Grid<T> &source; +}; +template<class T> struct knGridCopyGrid2 : public KernelBase { + knGridCopyGrid2(Grid<T> &dest, const Grid<T> &source) + : KernelBase(&dest, 0), dest(dest), source(source) { + runMessage(); + run(); } - void join(const CompMaxVec &o) + inline void op(int i, int j, int k, Grid<T> &dest, const Grid<T> &source) { - maxVal = max(maxVal, o.maxVal); + dest(i, j, k) = source(i, j, k); } - const Grid<Vec3> &val; - Real maxVal; + inline Grid<T> &getArg0() + { + return dest; + } + typedef Grid<T> type0; + inline const Grid<T> &getArg1() + { + return source; + } + typedef Grid<T> type1; + void runMessage(){}; + void run() + { + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest, source); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, dest, source); + } + } + } + Grid<T> &dest; + const Grid<T> &source; }; -template<class T> Grid<T> &Grid<T>::copyFrom(const Grid<T> &a, bool copyType) +template<class T> Grid<T> &Grid<T>::copyFrom(const Grid<T> &a, bool copyType, bool isOmp) { assertMsg(a.mSize == mSize, "different grid resolutions " << a.mSize << " vs " << this->mSize); +#if OPENMP && OPENMP_OFFLOAD + if (isOmp) + knGridCopyGrid<T>(*this, a); + else + knGridCopyGrid2<T>(*this, a); +#else memcpy(mData, a.mData, sizeof(T) * mSize.x * mSize.y * mSize.z); +#endif if (copyType) mType = a.mType; // copy type marker return *this; @@ -524,7 +723,7 @@ template<class T> struct knGridSetConstReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, T val) const + inline void op(IndexInt idx, Grid<T> &me, T val) { me[idx] = val; } @@ -538,21 +737,17 @@ template<class T> struct knGridSetConstReal : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knGridSetConstReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid<T> &me; T val; @@ -563,7 +758,7 @@ template<class T> struct knGridAddConstReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, T val) const + inline void op(IndexInt idx, Grid<T> &me, T val) { me[idx] += val; } @@ -577,21 +772,17 @@ template<class T> struct knGridAddConstReal : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knGridAddConstReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid<T> &me; T val; @@ -602,7 +793,7 @@ template<class T> struct knGridMultConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, T val) const + inline void op(IndexInt idx, Grid<T> &me, T val) { me[idx] *= val; } @@ -616,21 +807,17 @@ template<class T> struct knGridMultConst : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knGridMultConst ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid<T> &me; T val; @@ -642,7 +829,7 @@ template<class T> struct knGridSafeDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other) { me[idx] = safeDivide(me[idx], other[idx]); } @@ -656,21 +843,17 @@ template<class T> struct knGridSafeDiv : public KernelBase { return other; } typedef Grid<T> type1; - void runMessage() - { - debMsg("Executing kernel knGridSafeDiv ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const Grid<T> &other; @@ -685,7 +868,7 @@ template<class T> struct knGridClamp : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const T &min, const T &max) const + inline void op(IndexInt idx, Grid<T> &me, const T &min, const T &max) { me[idx] = clamp(me[idx], min, max); } @@ -704,21 +887,17 @@ template<class T> struct knGridClamp : public KernelBase { return max; } typedef T type2; - void runMessage() - { - debMsg("Executing kernel knGridClamp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, min, max); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, min, max); + } } Grid<T> &me; const T &min; @@ -745,7 +924,7 @@ template<class T> struct knGridStomp : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const T &threshold) const + inline void op(IndexInt idx, Grid<T> &me, const T &threshold) { stomp(me[idx], threshold); } @@ -759,21 +938,17 @@ template<class T> struct knGridStomp : public KernelBase { return threshold; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knGridStomp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, threshold); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, threshold); + } } Grid<T> &me; const T &threshold; @@ -787,7 +962,7 @@ template<class T> struct knPermuteAxes : public KernelBase { run(); } inline void op( - int i, int j, int k, Grid<T> &self, Grid<T> &target, int axis0, int axis1, int axis2) const + int i, int j, int k, Grid<T> &self, Grid<T> &target, int axis0, int axis1, int axis2) { int i0 = axis0 == 0 ? i : (axis0 == 1 ? j : k); int i1 = axis1 == 0 ? i : (axis1 == 1 ? j : k); @@ -819,37 +994,35 @@ template<class T> struct knPermuteAxes : public KernelBase { return axis2; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel knPermuteAxes ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, self, target, axis0, axis1, axis2); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, self, target, axis0, axis1, axis2); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, self, target, axis0, axis1, axis2); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, self, target, axis0, axis1, axis2); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> &self; Grid<T> ⌖ int axis0; @@ -864,7 +1037,7 @@ struct knJoinVec : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Vec3> &a, const Grid<Vec3> &b, bool keepMax) const + inline void op(IndexInt idx, Grid<Vec3> &a, const Grid<Vec3> &b, bool keepMax) { Real a1 = normSquare(a[idx]); Real b1 = normSquare(b[idx]); @@ -885,21 +1058,17 @@ struct knJoinVec : public KernelBase { return keepMax; } typedef bool type2; - void runMessage() - { - debMsg("Executing kernel knJoinVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b, keepMax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, b, keepMax); + } } Grid<Vec3> &a; const Grid<Vec3> &b; @@ -912,7 +1081,7 @@ struct knJoinInt : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<int> &a, const Grid<int> &b, bool keepMax) const + inline void op(IndexInt idx, Grid<int> &a, const Grid<int> &b, bool keepMax) { a[idx] = (keepMax) ? max(a[idx], b[idx]) : min(a[idx], b[idx]); } @@ -931,21 +1100,17 @@ struct knJoinInt : public KernelBase { return keepMax; } typedef bool type2; - void runMessage() - { - debMsg("Executing kernel knJoinInt ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b, keepMax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, b, keepMax); + } } Grid<int> &a; const Grid<int> &b; @@ -958,7 +1123,7 @@ struct knJoinReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b, bool keepMax) const + inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b, bool keepMax) { a[idx] = (keepMax) ? max(a[idx], b[idx]) : min(a[idx], b[idx]); } @@ -977,21 +1142,17 @@ struct knJoinReal : public KernelBase { return keepMax; } typedef bool type2; - void runMessage() - { - debMsg("Executing kernel knJoinReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b, keepMax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, b, keepMax); + } } Grid<Real> &a; const Grid<Real> &b; @@ -1218,45 +1379,43 @@ struct knCountCells : public KernelBase { return mask; } typedef Grid<Real> type3; - void runMessage() - { - debMsg("Executing kernel knCountCells ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, flag, bnd, mask, cnt); + +#pragma omp parallel + { + int cnt = 0; +#pragma omp for nowait + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, flag, bnd, mask, cnt); +#pragma omp critical + { + this->cnt += cnt; + } + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, flag, bnd, mask, cnt); +#pragma omp parallel + { + int cnt = 0; +#pragma omp for nowait + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, flag, bnd, mask, cnt); +#pragma omp critical + { + this->cnt += cnt; + } + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, maxY), *this); - } - knCountCells(knCountCells &o, tbb::split) - : KernelBase(o), flags(o.flags), flag(o.flag), bnd(o.bnd), mask(o.mask), cnt(0) - { - } - void join(const knCountCells &o) - { - cnt += o.cnt; - } const FlagGrid &flags; int flag; int bnd; @@ -1408,7 +1567,7 @@ struct knCopyMacToVec3 : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const + inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) { target(i, j, k) = source(i, j, k); } @@ -1422,37 +1581,35 @@ struct knCopyMacToVec3 : public KernelBase { return target; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel knCopyMacToVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, target); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, target); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, target); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, target); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid &source; Grid<Vec3> ⌖ }; @@ -1539,7 +1696,7 @@ struct knResampleVec3ToMac : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &source, MACGrid &target) const + inline void op(int i, int j, int k, Grid<Vec3> &source, MACGrid &target) { target(i, j, k)[0] = 0.5 * (source(i - 1, j, k)[0] + source(i, j, k))[0]; target(i, j, k)[1] = 0.5 * (source(i, j - 1, k)[1] + source(i, j, k))[1]; @@ -1557,37 +1714,35 @@ struct knResampleVec3ToMac : public KernelBase { return target; } typedef MACGrid type1; - void runMessage() - { - debMsg("Executing kernel knResampleVec3ToMac ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, source, target); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, source, target); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, source, target); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, source, target); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Vec3> &source; MACGrid ⌖ }; @@ -1636,7 +1791,7 @@ struct knResampleMacToVec3 : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const + inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) { target(i, j, k) = source.getCentered(i, j, k); } @@ -1650,37 +1805,35 @@ struct knResampleMacToVec3 : public KernelBase { return target; } typedef Grid<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel knResampleMacToVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, source, target); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, source, target); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, source, target); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, source, target); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &source; Grid<Vec3> ⌖ }; @@ -1729,7 +1882,7 @@ struct knCopyLevelsetToReal : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, LevelsetGrid &source, Grid<Real> &target) const + inline void op(int i, int j, int k, LevelsetGrid &source, Grid<Real> &target) { target(i, j, k) = source(i, j, k); } @@ -1743,37 +1896,35 @@ struct knCopyLevelsetToReal : public KernelBase { return target; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel knCopyLevelsetToReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, target); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, target); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, target); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, target); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } LevelsetGrid &source; Grid<Real> ⌖ }; @@ -1834,7 +1985,7 @@ struct knCopyVec3ToReal : public KernelBase { Grid<Vec3> &source, Grid<Real> &targetX, Grid<Real> &targetY, - Grid<Real> &targetZ) const + Grid<Real> &targetZ) { targetX(i, j, k) = source(i, j, k).x; targetY(i, j, k) = source(i, j, k).y; @@ -1860,37 +2011,35 @@ struct knCopyVec3ToReal : public KernelBase { return targetZ; } typedef Grid<Real> type3; - void runMessage() - { - debMsg("Executing kernel knCopyVec3ToReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, targetX, targetY, targetZ); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, targetX, targetY, targetZ); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, source, targetX, targetY, targetZ); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, source, targetX, targetY, targetZ); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Vec3> &source; Grid<Real> &targetX; Grid<Real> &targetY; @@ -1958,7 +2107,7 @@ struct knCopyRealToVec3 : public KernelBase { Grid<Real> &sourceX, Grid<Real> &sourceY, Grid<Real> &sourceZ, - Grid<Vec3> &target) const + Grid<Vec3> &target) { target(i, j, k).x = sourceX(i, j, k); target(i, j, k).y = sourceY(i, j, k); @@ -1984,37 +2133,35 @@ struct knCopyRealToVec3 : public KernelBase { return target; } typedef Grid<Vec3> type3; - void runMessage() - { - debMsg("Executing kernel knCopyRealToVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, sourceX, sourceY, sourceZ, target); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, sourceX, sourceY, sourceZ, target); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, sourceX, sourceY, sourceZ, target); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, sourceX, sourceY, sourceZ, target); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Real> &sourceX; Grid<Real> &sourceY; Grid<Real> &sourceZ; @@ -2228,7 +2375,7 @@ struct knResetUvGrid : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &target, const Vec3 *offset) const + inline void op(int i, int j, int k, Grid<Vec3> &target, const Vec3 *offset) { Vec3 coord = Vec3((Real)i, (Real)j, (Real)k); if (offset) @@ -2245,37 +2392,35 @@ struct knResetUvGrid : public KernelBase { return offset; } typedef Vec3 type1; - void runMessage() - { - debMsg("Executing kernel knResetUvGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, offset); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, offset); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, offset); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, offset); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Vec3> ⌖ const Vec3 *offset; }; @@ -2392,7 +2537,7 @@ template<class T> struct knSetBoundary : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<T> &grid, T value, int w) const + inline void op(int i, int j, int k, Grid<T> &grid, T value, int w) { bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w || (grid.is3D() && (k <= w || k >= grid.getSizeZ() - 1 - w))); @@ -2414,37 +2559,35 @@ template<class T> struct knSetBoundary : public KernelBase { return w; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetBoundary ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> &grid; T value; int w; @@ -2461,7 +2604,7 @@ template<class T> struct knSetBoundaryNeumann : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<T> &grid, int w) const + inline void op(int i, int j, int k, Grid<T> &grid, int w) { bool set = false; int si = i, sj = j, sk = k; @@ -2504,37 +2647,35 @@ template<class T> struct knSetBoundaryNeumann : public KernelBase { return w; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel knSetBoundaryNeumann ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, w); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, w); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, w); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, w); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> &grid; int w; }; @@ -2552,7 +2693,7 @@ struct knSetBoundaryMAC : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) const + inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) { if (i <= w || i >= grid.getSizeX() - w || j <= w - 1 || j >= grid.getSizeY() - 1 - w || (grid.is3D() && (k <= w - 1 || k >= grid.getSizeZ() - 1 - w))) @@ -2579,37 +2720,35 @@ struct knSetBoundaryMAC : public KernelBase { return w; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetBoundaryMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Vec3> &grid; Vec3 value; int w; @@ -2623,7 +2762,7 @@ struct knSetBoundaryMACNorm : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) const + inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) { if (i <= w || i >= grid.getSizeX() - w) grid(i, j, k).x = value.x; @@ -2647,37 +2786,35 @@ struct knSetBoundaryMACNorm : public KernelBase { return w; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetBoundaryMACNorm ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, value, w); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, value, w); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Vec3> &grid; Vec3 value; int w; @@ -2730,29 +2867,21 @@ struct knGridTotalSum : public KernelBase { return flags; } typedef FlagGrid type1; - void runMessage() - { - debMsg("Executing kernel knGridTotalSum ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, flags, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - knGridTotalSum(knGridTotalSum &o, tbb::split) - : KernelBase(o), a(o.a), flags(o.flags), result(0.0) - { - } - void join(const knGridTotalSum &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + double result = 0.0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, a, flags, result); +#pragma omp critical + { + this->result += result; + } + } } const Grid<Real> &a; FlagGrid *flags; @@ -2783,28 +2912,21 @@ struct knCountFluidCells : public KernelBase { return flags; } typedef FlagGrid type0; - void runMessage() - { - debMsg("Executing kernel knCountFluidCells ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, numEmpty); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - knCountFluidCells(knCountFluidCells &o, tbb::split) : KernelBase(o), flags(o.flags), numEmpty(0) - { - } - void join(const knCountFluidCells &o) - { - numEmpty += o.numEmpty; + const IndexInt _sz = size; +#pragma omp parallel + { + int numEmpty = 0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, numEmpty); +#pragma omp critical + { + this->numEmpty += numEmpty; + } + } } FlagGrid &flags; int numEmpty; @@ -2870,7 +2992,7 @@ struct knGetComponent : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid<Vec3> &source, Grid<Real> &target, int component) const + inline void op(IndexInt idx, const Grid<Vec3> &source, Grid<Real> &target, int component) { target[idx] = source[idx][component]; } @@ -2889,21 +3011,17 @@ struct knGetComponent : public KernelBase { return component; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knGetComponent ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, source, target, component); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, source, target, component); + } } const Grid<Vec3> &source; Grid<Real> ⌖ @@ -2953,7 +3071,7 @@ struct knSetComponent : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid<Real> &source, Grid<Vec3> &target, int component) const + inline void op(IndexInt idx, const Grid<Real> &source, Grid<Vec3> &target, int component) { target[idx][component] = source[idx]; } @@ -2972,21 +3090,17 @@ struct knSetComponent : public KernelBase { return component; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetComponent ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, source, target, component); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, source, target, component); + } } const Grid<Real> &source; Grid<Vec3> ⌖ @@ -3327,7 +3441,7 @@ struct knMarkIsolatedFluidCell : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, FlagGrid &flags, const int mark) const + inline void op(IndexInt idx, FlagGrid &flags, const int mark) { if (isIsolatedFluidCell(idx, flags)) flags[idx] = mark; @@ -3342,21 +3456,17 @@ struct knMarkIsolatedFluidCell : public KernelBase { return mark; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel knMarkIsolatedFluidCell ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, mark); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, mark); + } } FlagGrid &flags; const int mark; @@ -3454,6 +3564,156 @@ void PbRegister_copyMACData() } } +template<class T> void Grid<T>::mapToOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Mapping grid (pointer: %p) to device\n", grid); + int size = this->getSizeX() * this->getSizeY() * this->getSizeZ(); + +# pragma omp target enter data map(to \ + : grid, \ + grid->mData [0:size], \ + grid->mSize, \ + grid->mStrideZ, \ + grid->mType, \ + grid->mDx, \ + grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +template<class T> void Grid<T>::mapAllocOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Mapping grid (pointer: %p) to device\n", grid); + int size = this->getSizeX() * this->getSizeY() * this->getSizeZ(); + +# pragma omp target enter data map(alloc \ + : grid, \ + grid->mData [0:size], \ + grid->mSize, \ + grid->mStrideZ, \ + grid->mType, \ + grid->mDx, \ + grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +template<class T> void Grid<T>::mapFromOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Mapping grid (pointer: %p) from device\n", grid); + int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ(); + +# pragma omp target exit data map(from \ + : grid, \ + grid->mData [0:size], \ + grid->mSize, \ + grid->mStrideZ, \ + grid->mType, \ + grid->mDx, \ + grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +template<class T> void Grid<T>::mapDeleteOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Mapping grid (pointer: %p) from device\n", grid); + int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ(); + +# pragma omp target exit data map(delete \ + : grid, \ + grid->mData [0:size], \ + grid->mSize, \ + grid->mStrideZ, \ + grid->mType, \ + grid->mDx, \ + grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +template<class T> void Grid<T>::updateFromOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Updating grid (pointer: %p) from device\n", grid); + int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ(); + +//#pragma omp target update from(grid, grid->mData[0:size], grid->mSize, grid->mStrideZ) +//#pragma omp target update from(grid) +# pragma omp target update from( \ + grid->mData [0:size], grid->mSize, grid->mStrideZ, grid->mType, grid->mDx, grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +template<class T> void Grid<T>::updateToOmp() +{ +#if OPENMP && OPENMP_OFFLOAD + Grid<T> *grid = this; + // printf("Updating grid (pointer: %p) from device\n", grid); + int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ(); + +//#pragma omp target update to(grid, grid->mData[0:size], grid->mSize, grid->mStrideZ) +//#pragma omp target update to(grid) +# pragma omp target update to( \ + grid->mData [0:size], grid->mSize, grid->mStrideZ, grid->mType, grid->mDx, grid->m3D) +#else + // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and + // -DOPENMP_OFFLOAD=ON" << std::endl; +#endif +} + +// template<class T> void Grid<T>::mapDeclare() { +// #if OPENMP && OPENMP_OFFLOAD +// if (this->getType() & GridBase::TypeReal) { +// printf("Map declare for Real\n"); +// Grid<Real> *grid = (Grid<Real> *)this; +// #pragma omp declare mapper(Grid<Real> grid) map(to: \ +// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \ +// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D) +// #pragma omp target enter data map(to: grid) +// } +// else if (this->getType() & GridBase::TypeInt) { +// printf("Map declare for Int\n"); +// Grid<int> *grid = (Grid<int> *)this; +// #pragma omp declare mapper(Grid<int> grid) map(to: \ +// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \ +// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D) +// #pragma omp target enter data map(to: grid) +// } +// else if (this->getType() & GridBase::TypeVec3) { +// printf("Map declare for Vec3\n"); +// Grid<Vec3> *grid = (Grid<Vec3> *)this; +// #pragma omp declare mapper(Grid<Vec3> grid) map(to: \ +// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \ +// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D) +// #pragma omp target enter data map(to: grid) +// } + +// #else +// //std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and +// -DOPENMP_OFFLOAD=ON" << std::endl; #endif +// } + // explicit instantiation template class Grid<int>; template class Grid<Real>; diff --git a/extern/mantaflow/preprocessed/grid.h b/extern/mantaflow/preprocessed/grid.h index 2c4296e78dd..3f0bcb9dfae 100644 --- a/extern/mantaflow/preprocessed/grid.h +++ b/extern/mantaflow/preprocessed/grid.h @@ -383,13 +383,15 @@ class GridBase : public PbClass { } } - protected: + // TODO (sebbas): Moved attributes to public for now GridType mType; Vec3i mSize; Real mDx; - bool m3D; // precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy ! + bool m3D; + // precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy ! IndexInt mStrideZ; + protected: public: PbArgs _args; } @@ -401,7 +403,7 @@ class GridBase : public PbClass { template<class T> class Grid : public GridBase { public: //! init new grid, values are set to zero - Grid(FluidSolver *parent, bool show = true, bool sparse = false); + Grid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false); static int _W_10(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { PbClass *obj = Pb::objFromPy(_self); @@ -416,7 +418,8 @@ template<class T> class Grid : public GridBase { FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock); bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock); - obj = new Grid(parent, show, sparse); + bool offload = _args.getOpt<bool>("offload", 3, false, &_lock); + obj = new Grid(parent, show, sparse, offload); obj->registerObject(_self, &_args); _args.check(); } @@ -490,7 +493,7 @@ template<class T> class Grid : public GridBase { } //! set all cells to zero - void clear(); + void clear(bool isOmp = false); static PyObject *_W_13(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { @@ -501,9 +504,10 @@ template<class T> class Grid : public GridBase { PyObject *_retval = nullptr; { ArgLocker _lock; + bool isOmp = _args.getOpt<bool>("isOmp", 0, false, &_lock); pbo->_args.copy(_args); _retval = getPyNone(); - pbo->clear(); + pbo->clear(isOmp); pbo->_args.check(); } pbFinalizePlugin(pbo->getParent(), "Grid::clear", !noTiming); @@ -591,6 +595,11 @@ template<class T> class Grid : public GridBase { { return mSaveSparse; } + //! query if this grid should be saved as a sparse grid + inline bool isOffload() + { + return mOffload; + } //! set data inline void set(int i, int j, int k, T &val) @@ -616,9 +625,8 @@ template<class T> class Grid : public GridBase { return interpol<T>(mData, mSize, mStrideZ, pos); case 2: return interpolCubic<T>(mData, mSize, mStrideZ, pos); - default: - assertMsg(false, "Unknown interpolation order " << order); } + // default: assertMsg(false, "Unknown interpolation order "<<order); } return T(0.); // should never be reached, just to prevent compiler warnings } @@ -628,7 +636,7 @@ template<class T> class Grid : public GridBase { //! content... // Grid<T>& operator=(const Grid<T>& a); //! copy content from other grid (use this one instead of operator= !) - Grid<T> ©From(const Grid<T> &a, bool copyType = true); + Grid<T> ©From(const Grid<T> &a, bool copyType = true, bool isOmp = false); static PyObject *_W_14(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { @@ -641,8 +649,9 @@ template<class T> class Grid : public GridBase { ArgLocker _lock; const Grid<T> &a = *_args.getPtr<Grid<T>>("a", 0, &_lock); bool copyType = _args.getOpt<bool>("copyType", 1, true, &_lock); + bool isOmp = _args.getOpt<bool>("isOmp", 2, false, &_lock); pbo->_args.copy(_args); - _retval = toPy(pbo->copyFrom(a, copyType)); + _retval = toPy(pbo->copyFrom(a, copyType, isOmp)); pbo->_args.check(); } pbFinalizePlugin(pbo->getParent(), "Grid::copyFrom", !noTiming); @@ -1276,6 +1285,157 @@ template<class T> class Grid : public GridBase { } } + //! OpenMP data mapping from / to target device + void mapToOmp(); + static PyObject *_W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->mapToOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::mapToOmp", e.what()); + return 0; + } + } + + void mapFromOmp(); + static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->mapFromOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::mapFromOmp", e.what()); + return 0; + } + } + + void mapAllocOmp(); + static PyObject *_W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->mapAllocOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::mapAllocOmp", e.what()); + return 0; + } + } + + void mapDeleteOmp(); + static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->mapDeleteOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::mapDeleteOmp", e.what()); + return 0; + } + } + + void updateToOmp(); + static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->updateToOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::updateToOmp", e.what()); + return 0; + } + } + + void updateFromOmp(); + static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + { + try { + PbArgs _args(_linargs, _kwds); + Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self)); + bool noTiming = _args.getOpt<bool>("notiming", -1, 0); + pbPreparePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming); + PyObject *_retval = nullptr; + { + ArgLocker _lock; + pbo->_args.copy(_args); + _retval = getPyNone(); + pbo->updateFromOmp(); + pbo->_args.check(); + } + pbFinalizePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming); + return _retval; + } + catch (std::exception &e) { + pbSetError("Grid::updateFromOmp", e.what()); + return 0; + } + } + // c++ only operators template<class S> Grid<T> &operator+=(const Grid<S> &a); template<class S> Grid<T> &operator+=(const S &a); @@ -1299,10 +1459,12 @@ template<class T> class Grid : public GridBase { return mData[index(i, j, k)]; } - protected: + // TODO (sebbas): Moved attributes to public for now T *mData; + protected: bool mExternalData; // True if mData is managed outside of the Fluidsolver bool mSaveSparse; // True if this grid may be cached in a sparse structure + bool mOffload; // True if this grid shall be allocated on an OpenMP offload device too public: PbArgs _args; } @@ -1314,12 +1476,12 @@ template<class T> class Grid : public GridBase { //! Special function for staggered grids class MACGrid : public Grid<Vec3> { public: - MACGrid(FluidSolver *parent, bool show = true, bool sparse = false) - : Grid<Vec3>(parent, show, sparse) + MACGrid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false) + : Grid<Vec3>(parent, show, sparse, offload) { mType = (GridType)(TypeMAC | TypeVec3); } - static int _W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static int _W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { PbClass *obj = Pb::objFromPy(_self); if (obj) @@ -1333,7 +1495,8 @@ class MACGrid : public Grid<Vec3> { FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock); bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock); - obj = new MACGrid(parent, show, sparse); + bool offload = _args.getOpt<bool>("offload", 3, false, &_lock); + obj = new MACGrid(parent, show, sparse, offload); obj->registerObject(_self, &_args); _args.check(); } @@ -1392,17 +1555,16 @@ class MACGrid : public Grid<Vec3> { case 1: return interpolComponent<comp>(mData, mSize, mStrideZ, pos); case 2: - return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp]; // warning - not yet optimized - default: - assertMsg(false, "Unknown interpolation order " << order); - } + return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp]; + } // warning - not yet optimized + // default: assertMsg(false, "Unknown interpolation order "<<order); } return 0.; // should never be reached, just to prevent compiler warnings } //! set all boundary cells of a MAC grid to certain value (Dirchlet). Respects staggered grid //! locations optionally, only set normal components void setBoundMAC(Vec3 value, int boundaryWidth, bool normalOnly = false); - static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static PyObject *_W_45(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -1439,12 +1601,16 @@ class MACGrid : public Grid<Vec3> { //! Special functions for FlagGrid class FlagGrid : public Grid<int> { public: - FlagGrid(FluidSolver *parent, int dim = 3, bool show = true, bool sparse = false) - : Grid<int>(parent, show, sparse) + FlagGrid(FluidSolver *parent, + int dim = 3, + bool show = true, + bool sparse = false, + bool offload = false) + : Grid<int>(parent, show, sparse, offload) { mType = (GridType)(TypeFlags | TypeInt); } - static int _W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static int _W_46(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { PbClass *obj = Pb::objFromPy(_self); if (obj) @@ -1459,7 +1625,8 @@ class FlagGrid : public Grid<int> { int dim = _args.getOpt<int>("dim", 1, 3, &_lock); bool show = _args.getOpt<bool>("show", 2, true, &_lock); bool sparse = _args.getOpt<bool>("sparse", 3, false, &_lock); - obj = new FlagGrid(parent, dim, show, sparse); + bool offload = _args.getOpt<bool>("offload", 4, false, &_lock); + obj = new FlagGrid(parent, dim, show, sparse, offload); obj->registerObject(_self, &_args); _args.check(); } @@ -1625,7 +1792,7 @@ class FlagGrid : public Grid<int> { const std::string &inflow = " ", const std::string &outflow = " ", Grid<Real> *phiWalls = 0x00); - static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static PyObject *_W_47(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -1659,7 +1826,7 @@ class FlagGrid : public Grid<int> { //! set fluid flags inside levelset (liquids) void updateFromLevelset(LevelsetGrid &levelset); - static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static PyObject *_W_48(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -1686,7 +1853,7 @@ class FlagGrid : public Grid<int> { //! set all cells (except obs/in/outflow) to type (fluid by default) void fillGrid(int type = TypeFluid); - static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static PyObject *_W_49(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -1715,7 +1882,7 @@ class FlagGrid : public Grid<int> { //! warning for large grids! only regular int returned (due to python interface) //! optionally creates mask in RealGrid (1 where flag matches, 0 otherwise) int countCells(int flag, int bnd = 0, Grid<Real> *mask = nullptr); - static PyObject *_W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds) + static PyObject *_W_50(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -1868,7 +2035,7 @@ template<class T, class S> struct gridAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) { me[idx] += other[idx]; } @@ -1882,21 +2049,17 @@ template<class T, class S> struct gridAdd : public KernelBase { return other; } typedef Grid<S> type1; - void runMessage() - { - debMsg("Executing kernel gridAdd ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const Grid<S> &other; @@ -1907,7 +2070,7 @@ template<class T, class S> struct gridSub : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) { me[idx] -= other[idx]; } @@ -1921,21 +2084,17 @@ template<class T, class S> struct gridSub : public KernelBase { return other; } typedef Grid<S> type1; - void runMessage() - { - debMsg("Executing kernel gridSub ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const Grid<S> &other; @@ -1946,7 +2105,7 @@ template<class T, class S> struct gridMult : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) { me[idx] *= other[idx]; } @@ -1960,21 +2119,17 @@ template<class T, class S> struct gridMult : public KernelBase { return other; } typedef Grid<S> type1; - void runMessage() - { - debMsg("Executing kernel gridMult ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const Grid<S> &other; @@ -1985,7 +2140,7 @@ template<class T, class S> struct gridDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) { me[idx] /= other[idx]; } @@ -1999,21 +2154,17 @@ template<class T, class S> struct gridDiv : public KernelBase { return other; } typedef Grid<S> type1; - void runMessage() - { - debMsg("Executing kernel gridDiv ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const Grid<S> &other; @@ -2024,7 +2175,7 @@ template<class T, class S> struct gridAddScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const S &other) const + inline void op(IndexInt idx, Grid<T> &me, const S &other) { me[idx] += other; } @@ -2038,21 +2189,17 @@ template<class T, class S> struct gridAddScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel gridAddScalar ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const S &other; @@ -2063,7 +2210,7 @@ template<class T, class S> struct gridMultScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const S &other) const + inline void op(IndexInt idx, Grid<T> &me, const S &other) { me[idx] *= other; } @@ -2077,25 +2224,22 @@ template<class T, class S> struct gridMultScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel gridMultScalar ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid<T> &me; const S &other; }; + template<class T, class S> struct gridScaledAdd : public KernelBase { gridScaledAdd(Grid<T> &me, const Grid<T> &other, const S &factor) : KernelBase(&me, 0), me(me), other(other), factor(factor) @@ -2103,8 +2247,9 @@ template<class T, class S> struct gridScaledAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other, const S &factor) const + inline void op(int i, int j, int k, Grid<T> &me, const Grid<T> &other, const S &factor) { + const IndexInt idx = me.index(i, j, k); me[idx] += factor * other[idx]; } inline Grid<T> &getArg0() @@ -2122,21 +2267,35 @@ template<class T, class S> struct gridScaledAdd : public KernelBase { return factor; } typedef S type2; - void runMessage() - { - debMsg("Executing kernel gridScaledAdd ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, factor); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const int _maxX = maxX; + const int _maxY = maxY; + if (maxZ > 1) { + Grid<T> &me = getArg0(); + const Grid<T> &other = getArg1(); + const S &factor = getArg2(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, me, other, factor); + } + } + else { + const int k = 0; + Grid<T> &me = getArg0(); + const Grid<T> &other = getArg1(); + const S &factor = getArg2(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, me, other, factor); + } + } } Grid<T> &me; const Grid<T> &other; @@ -2149,7 +2308,7 @@ template<class T> struct gridSetConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<T> &grid, T value) const + inline void op(IndexInt idx, Grid<T> &grid, T value) { grid[idx] = value; } @@ -2163,21 +2322,17 @@ template<class T> struct gridSetConst : public KernelBase { return value; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel gridSetConst ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, grid, value); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, grid, value); + } } Grid<T> &grid; T value; @@ -2279,7 +2434,7 @@ template<class S> struct knInterpolateGridTempl : public KernelBase { const Grid<S> &source, const Vec3 &sourceFactor, Vec3 offset, - int orderSpace = 1) const + int orderSpace = 1) { Vec3 pos = Vec3(i, j, k) * sourceFactor + offset; if (!source.is3D()) @@ -2311,37 +2466,35 @@ template<class S> struct knInterpolateGridTempl : public KernelBase { return orderSpace; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel knInterpolateGridTempl ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, source, sourceFactor, offset, orderSpace); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, source, sourceFactor, offset, orderSpace); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, source, sourceFactor, offset, orderSpace); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, source, sourceFactor, offset, orderSpace); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<S> ⌖ const Grid<S> &source; const Vec3 &sourceFactor; diff --git a/extern/mantaflow/preprocessed/grid.h.reg.cpp b/extern/mantaflow/preprocessed/grid.h.reg.cpp index 2c95c002dad..1f3a80903a4 100644 --- a/extern/mantaflow/preprocessed/grid.h.reg.cpp +++ b/extern/mantaflow/preprocessed/grid.h.reg.cpp @@ -8,11 +8,11 @@ namespace Manta { #ifdef _C_FlagGrid static const Pb::Register _R_26("FlagGrid", "FlagGrid", "Grid<int>"); template<> const char *Namify<FlagGrid>::S = "FlagGrid"; -static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_40); -static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_41); -static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_42); -static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_43); -static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_44); +static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_46); +static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_47); +static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_48); +static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_49); +static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_50); #endif #ifdef _C_Grid static const Pb::Register _R_32("Grid<int>", "Grid<int>", "GridBase"); @@ -45,86 +45,104 @@ static const Pb::Register _R_57("Grid<int>", "setBound", Grid<int>::_W_34); static const Pb::Register _R_58("Grid<int>", "setBoundNeumann", Grid<int>::_W_35); static const Pb::Register _R_59("Grid<int>", "getDataPointer", Grid<int>::_W_36); static const Pb::Register _R_60("Grid<int>", "printGrid", Grid<int>::_W_37); -static const Pb::Register _R_61("Grid<Real>", "Grid<Real>", "GridBase"); +static const Pb::Register _R_61("Grid<int>", "mapToOmp", Grid<int>::_W_38); +static const Pb::Register _R_62("Grid<int>", "mapFromOmp", Grid<int>::_W_39); +static const Pb::Register _R_63("Grid<int>", "mapAllocOmp", Grid<int>::_W_40); +static const Pb::Register _R_64("Grid<int>", "mapDeleteOmp", Grid<int>::_W_41); +static const Pb::Register _R_65("Grid<int>", "updateToOmp", Grid<int>::_W_42); +static const Pb::Register _R_66("Grid<int>", "updateFromOmp", Grid<int>::_W_43); +static const Pb::Register _R_67("Grid<Real>", "Grid<Real>", "GridBase"); template<> const char *Namify<Grid<Real>>::S = "Grid<Real>"; -static const Pb::Register _R_62("Grid<Real>", "Grid", Grid<Real>::_W_10); -static const Pb::Register _R_63("Grid<Real>", "save", Grid<Real>::_W_11); -static const Pb::Register _R_64("Grid<Real>", "load", Grid<Real>::_W_12); -static const Pb::Register _R_65("Grid<Real>", "clear", Grid<Real>::_W_13); -static const Pb::Register _R_66("Grid<Real>", "copyFrom", Grid<Real>::_W_14); -static const Pb::Register _R_67("Grid<Real>", "getGridType", Grid<Real>::_W_15); -static const Pb::Register _R_68("Grid<Real>", "add", Grid<Real>::_W_16); -static const Pb::Register _R_69("Grid<Real>", "sub", Grid<Real>::_W_17); -static const Pb::Register _R_70("Grid<Real>", "setConst", Grid<Real>::_W_18); -static const Pb::Register _R_71("Grid<Real>", "addConst", Grid<Real>::_W_19); -static const Pb::Register _R_72("Grid<Real>", "addScaled", Grid<Real>::_W_20); -static const Pb::Register _R_73("Grid<Real>", "mult", Grid<Real>::_W_21); -static const Pb::Register _R_74("Grid<Real>", "multConst", Grid<Real>::_W_22); -static const Pb::Register _R_75("Grid<Real>", "safeDivide", Grid<Real>::_W_23); -static const Pb::Register _R_76("Grid<Real>", "clamp", Grid<Real>::_W_24); -static const Pb::Register _R_77("Grid<Real>", "stomp", Grid<Real>::_W_25); -static const Pb::Register _R_78("Grid<Real>", "permuteAxes", Grid<Real>::_W_26); -static const Pb::Register _R_79("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27); -static const Pb::Register _R_80("Grid<Real>", "join", Grid<Real>::_W_28); -static const Pb::Register _R_81("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29); -static const Pb::Register _R_82("Grid<Real>", "getMax", Grid<Real>::_W_30); -static const Pb::Register _R_83("Grid<Real>", "getMin", Grid<Real>::_W_31); -static const Pb::Register _R_84("Grid<Real>", "getL1", Grid<Real>::_W_32); -static const Pb::Register _R_85("Grid<Real>", "getL2", Grid<Real>::_W_33); -static const Pb::Register _R_86("Grid<Real>", "setBound", Grid<Real>::_W_34); -static const Pb::Register _R_87("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35); -static const Pb::Register _R_88("Grid<Real>", "getDataPointer", Grid<Real>::_W_36); -static const Pb::Register _R_89("Grid<Real>", "printGrid", Grid<Real>::_W_37); -static const Pb::Register _R_90("Grid<Vec3>", "Grid<Vec3>", "GridBase"); +static const Pb::Register _R_68("Grid<Real>", "Grid", Grid<Real>::_W_10); +static const Pb::Register _R_69("Grid<Real>", "save", Grid<Real>::_W_11); +static const Pb::Register _R_70("Grid<Real>", "load", Grid<Real>::_W_12); +static const Pb::Register _R_71("Grid<Real>", "clear", Grid<Real>::_W_13); +static const Pb::Register _R_72("Grid<Real>", "copyFrom", Grid<Real>::_W_14); +static const Pb::Register _R_73("Grid<Real>", "getGridType", Grid<Real>::_W_15); +static const Pb::Register _R_74("Grid<Real>", "add", Grid<Real>::_W_16); +static const Pb::Register _R_75("Grid<Real>", "sub", Grid<Real>::_W_17); +static const Pb::Register _R_76("Grid<Real>", "setConst", Grid<Real>::_W_18); +static const Pb::Register _R_77("Grid<Real>", "addConst", Grid<Real>::_W_19); +static const Pb::Register _R_78("Grid<Real>", "addScaled", Grid<Real>::_W_20); +static const Pb::Register _R_79("Grid<Real>", "mult", Grid<Real>::_W_21); +static const Pb::Register _R_80("Grid<Real>", "multConst", Grid<Real>::_W_22); +static const Pb::Register _R_81("Grid<Real>", "safeDivide", Grid<Real>::_W_23); +static const Pb::Register _R_82("Grid<Real>", "clamp", Grid<Real>::_W_24); +static const Pb::Register _R_83("Grid<Real>", "stomp", Grid<Real>::_W_25); +static const Pb::Register _R_84("Grid<Real>", "permuteAxes", Grid<Real>::_W_26); +static const Pb::Register _R_85("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27); +static const Pb::Register _R_86("Grid<Real>", "join", Grid<Real>::_W_28); +static const Pb::Register _R_87("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29); +static const Pb::Register _R_88("Grid<Real>", "getMax", Grid<Real>::_W_30); +static const Pb::Register _R_89("Grid<Real>", "getMin", Grid<Real>::_W_31); +static const Pb::Register _R_90("Grid<Real>", "getL1", Grid<Real>::_W_32); +static const Pb::Register _R_91("Grid<Real>", "getL2", Grid<Real>::_W_33); +static const Pb::Register _R_92("Grid<Real>", "setBound", Grid<Real>::_W_34); +static const Pb::Register _R_93("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35); +static const Pb::Register _R_94("Grid<Real>", "getDataPointer", Grid<Real>::_W_36); +static const Pb::Register _R_95("Grid<Real>", "printGrid", Grid<Real>::_W_37); +static const Pb::Register _R_96("Grid<Real>", "mapToOmp", Grid<Real>::_W_38); +static const Pb::Register _R_97("Grid<Real>", "mapFromOmp", Grid<Real>::_W_39); +static const Pb::Register _R_98("Grid<Real>", "mapAllocOmp", Grid<Real>::_W_40); +static const Pb::Register _R_99("Grid<Real>", "mapDeleteOmp", Grid<Real>::_W_41); +static const Pb::Register _R_100("Grid<Real>", "updateToOmp", Grid<Real>::_W_42); +static const Pb::Register _R_101("Grid<Real>", "updateFromOmp", Grid<Real>::_W_43); +static const Pb::Register _R_102("Grid<Vec3>", "Grid<Vec3>", "GridBase"); template<> const char *Namify<Grid<Vec3>>::S = "Grid<Vec3>"; -static const Pb::Register _R_91("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10); -static const Pb::Register _R_92("Grid<Vec3>", "save", Grid<Vec3>::_W_11); -static const Pb::Register _R_93("Grid<Vec3>", "load", Grid<Vec3>::_W_12); -static const Pb::Register _R_94("Grid<Vec3>", "clear", Grid<Vec3>::_W_13); -static const Pb::Register _R_95("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14); -static const Pb::Register _R_96("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15); -static const Pb::Register _R_97("Grid<Vec3>", "add", Grid<Vec3>::_W_16); -static const Pb::Register _R_98("Grid<Vec3>", "sub", Grid<Vec3>::_W_17); -static const Pb::Register _R_99("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18); -static const Pb::Register _R_100("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19); -static const Pb::Register _R_101("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20); -static const Pb::Register _R_102("Grid<Vec3>", "mult", Grid<Vec3>::_W_21); -static const Pb::Register _R_103("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22); -static const Pb::Register _R_104("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23); -static const Pb::Register _R_105("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24); -static const Pb::Register _R_106("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25); -static const Pb::Register _R_107("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26); -static const Pb::Register _R_108("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27); -static const Pb::Register _R_109("Grid<Vec3>", "join", Grid<Vec3>::_W_28); -static const Pb::Register _R_110("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29); -static const Pb::Register _R_111("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30); -static const Pb::Register _R_112("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31); -static const Pb::Register _R_113("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32); -static const Pb::Register _R_114("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33); -static const Pb::Register _R_115("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34); -static const Pb::Register _R_116("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35); -static const Pb::Register _R_117("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36); -static const Pb::Register _R_118("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37); +static const Pb::Register _R_103("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10); +static const Pb::Register _R_104("Grid<Vec3>", "save", Grid<Vec3>::_W_11); +static const Pb::Register _R_105("Grid<Vec3>", "load", Grid<Vec3>::_W_12); +static const Pb::Register _R_106("Grid<Vec3>", "clear", Grid<Vec3>::_W_13); +static const Pb::Register _R_107("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14); +static const Pb::Register _R_108("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15); +static const Pb::Register _R_109("Grid<Vec3>", "add", Grid<Vec3>::_W_16); +static const Pb::Register _R_110("Grid<Vec3>", "sub", Grid<Vec3>::_W_17); +static const Pb::Register _R_111("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18); +static const Pb::Register _R_112("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19); +static const Pb::Register _R_113("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20); +static const Pb::Register _R_114("Grid<Vec3>", "mult", Grid<Vec3>::_W_21); +static const Pb::Register _R_115("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22); +static const Pb::Register _R_116("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23); +static const Pb::Register _R_117("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24); +static const Pb::Register _R_118("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25); +static const Pb::Register _R_119("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26); +static const Pb::Register _R_120("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27); +static const Pb::Register _R_121("Grid<Vec3>", "join", Grid<Vec3>::_W_28); +static const Pb::Register _R_122("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29); +static const Pb::Register _R_123("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30); +static const Pb::Register _R_124("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31); +static const Pb::Register _R_125("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32); +static const Pb::Register _R_126("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33); +static const Pb::Register _R_127("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34); +static const Pb::Register _R_128("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35); +static const Pb::Register _R_129("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36); +static const Pb::Register _R_130("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37); +static const Pb::Register _R_131("Grid<Vec3>", "mapToOmp", Grid<Vec3>::_W_38); +static const Pb::Register _R_132("Grid<Vec3>", "mapFromOmp", Grid<Vec3>::_W_39); +static const Pb::Register _R_133("Grid<Vec3>", "mapAllocOmp", Grid<Vec3>::_W_40); +static const Pb::Register _R_134("Grid<Vec3>", "mapDeleteOmp", Grid<Vec3>::_W_41); +static const Pb::Register _R_135("Grid<Vec3>", "updateToOmp", Grid<Vec3>::_W_42); +static const Pb::Register _R_136("Grid<Vec3>", "updateFromOmp", Grid<Vec3>::_W_43); #endif #ifdef _C_GridBase -static const Pb::Register _R_119("GridBase", "GridBase", "PbClass"); +static const Pb::Register _R_137("GridBase", "GridBase", "PbClass"); template<> const char *Namify<GridBase>::S = "GridBase"; -static const Pb::Register _R_120("GridBase", "GridBase", GridBase::_W_0); -static const Pb::Register _R_121("GridBase", "getSizeX", GridBase::_W_1); -static const Pb::Register _R_122("GridBase", "getSizeY", GridBase::_W_2); -static const Pb::Register _R_123("GridBase", "getSizeZ", GridBase::_W_3); -static const Pb::Register _R_124("GridBase", "getSize", GridBase::_W_4); -static const Pb::Register _R_125("GridBase", "is3D", GridBase::_W_5); -static const Pb::Register _R_126("GridBase", "is4D", GridBase::_W_6); -static const Pb::Register _R_127("GridBase", "getSizeT", GridBase::_W_7); -static const Pb::Register _R_128("GridBase", "getStrideT", GridBase::_W_8); -static const Pb::Register _R_129("GridBase", "setName", GridBase::_W_9); +static const Pb::Register _R_138("GridBase", "GridBase", GridBase::_W_0); +static const Pb::Register _R_139("GridBase", "getSizeX", GridBase::_W_1); +static const Pb::Register _R_140("GridBase", "getSizeY", GridBase::_W_2); +static const Pb::Register _R_141("GridBase", "getSizeZ", GridBase::_W_3); +static const Pb::Register _R_142("GridBase", "getSize", GridBase::_W_4); +static const Pb::Register _R_143("GridBase", "is3D", GridBase::_W_5); +static const Pb::Register _R_144("GridBase", "is4D", GridBase::_W_6); +static const Pb::Register _R_145("GridBase", "getSizeT", GridBase::_W_7); +static const Pb::Register _R_146("GridBase", "getStrideT", GridBase::_W_8); +static const Pb::Register _R_147("GridBase", "setName", GridBase::_W_9); #endif #ifdef _C_MACGrid -static const Pb::Register _R_130("MACGrid", "MACGrid", "Grid<Vec3>"); +static const Pb::Register _R_148("MACGrid", "MACGrid", "Grid<Vec3>"); template<> const char *Namify<MACGrid>::S = "MACGrid"; -static const Pb::Register _R_131("MACGrid", "MACGrid", MACGrid::_W_38); -static const Pb::Register _R_132("MACGrid", "setBoundMAC", MACGrid::_W_39); +static const Pb::Register _R_149("MACGrid", "MACGrid", MACGrid::_W_44); +static const Pb::Register _R_150("MACGrid", "setBoundMAC", MACGrid::_W_45); #endif static const Pb::Register _R_7("GridType_TypeNone", 0); static const Pb::Register _R_8("GridType_TypeReal", 1); @@ -255,6 +273,24 @@ void PbRegister_file_7() KEEP_UNUSED(_R_130); KEEP_UNUSED(_R_131); KEEP_UNUSED(_R_132); + KEEP_UNUSED(_R_133); + KEEP_UNUSED(_R_134); + KEEP_UNUSED(_R_135); + KEEP_UNUSED(_R_136); + KEEP_UNUSED(_R_137); + KEEP_UNUSED(_R_138); + KEEP_UNUSED(_R_139); + KEEP_UNUSED(_R_140); + KEEP_UNUSED(_R_141); + KEEP_UNUSED(_R_142); + KEEP_UNUSED(_R_143); + KEEP_UNUSED(_R_144); + KEEP_UNUSED(_R_145); + KEEP_UNUSED(_R_146); + KEEP_UNUSED(_R_147); + KEEP_UNUSED(_R_148); + KEEP_UNUSED(_R_149); + KEEP_UNUSED(_R_150); } } } // namespace Manta
\ No newline at end of file diff --git a/extern/mantaflow/preprocessed/grid4d.cpp b/extern/mantaflow/preprocessed/grid4d.cpp index 72bd3a6fe50..a896b003736 100644 --- a/extern/mantaflow/preprocessed/grid4d.cpp +++ b/extern/mantaflow/preprocessed/grid4d.cpp @@ -179,29 +179,21 @@ struct kn4dMinReal : public KernelBase { return val; } typedef Grid4d<Real> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMinReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMinReal(kn4dMinReal &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const kn4dMinReal &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } Grid4d<Real> &val; Real minVal; @@ -234,29 +226,21 @@ struct kn4dMaxReal : public KernelBase { return val; } typedef Grid4d<Real> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMaxReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMaxReal(kn4dMaxReal &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const kn4dMaxReal &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } Grid4d<Real> &val; Real maxVal; @@ -289,29 +273,21 @@ struct kn4dMinInt : public KernelBase { return val; } typedef Grid4d<int> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMinInt ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMinInt(kn4dMinInt &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<int>::max()) - { - } - void join(const kn4dMinInt &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + int minVal = std::numeric_limits<int>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } Grid4d<int> &val; int minVal; @@ -344,29 +320,21 @@ struct kn4dMaxInt : public KernelBase { return val; } typedef Grid4d<int> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMaxInt ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMaxInt(kn4dMaxInt &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(std::numeric_limits<int>::min()) - { - } - void join(const kn4dMaxInt &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + int maxVal = std::numeric_limits<int>::min(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } Grid4d<int> &val; int maxVal; @@ -400,29 +368,21 @@ template<class VEC> struct kn4dMinVec : public KernelBase { return val; } typedef Grid4d<VEC> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMinVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMinVec(kn4dMinVec &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const kn4dMinVec &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } Grid4d<VEC> &val; Real minVal; @@ -456,29 +416,21 @@ template<class VEC> struct kn4dMaxVec : public KernelBase { return val; } typedef Grid4d<VEC> type0; - void runMessage() - { - debMsg("Executing kernel kn4dMaxVec ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - kn4dMaxVec(kn4dMaxVec &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const kn4dMaxVec &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } Grid4d<VEC> &val; Real maxVal; @@ -507,7 +459,7 @@ template<class T> struct kn4dSetConstReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, T val) const + inline void op(IndexInt idx, Grid4d<T> &me, T val) { me[idx] = val; } @@ -521,21 +473,17 @@ template<class T> struct kn4dSetConstReal : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel kn4dSetConstReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid4d<T> &me; T val; @@ -546,7 +494,7 @@ template<class T> struct kn4dAddConstReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, T val) const + inline void op(IndexInt idx, Grid4d<T> &me, T val) { me[idx] += val; } @@ -560,21 +508,17 @@ template<class T> struct kn4dAddConstReal : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel kn4dAddConstReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid4d<T> &me; T val; @@ -585,7 +529,7 @@ template<class T> struct kn4dMultConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, T val) const + inline void op(IndexInt idx, Grid4d<T> &me, T val) { me[idx] *= val; } @@ -599,21 +543,17 @@ template<class T> struct kn4dMultConst : public KernelBase { return val; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel kn4dMultConst ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, val); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, val); + } } Grid4d<T> &me; T val; @@ -624,7 +564,7 @@ template<class T> struct kn4dClamp : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, T min, T max) const + inline void op(IndexInt idx, Grid4d<T> &me, T min, T max) { me[idx] = clamp(me[idx], min, max); } @@ -643,21 +583,17 @@ template<class T> struct kn4dClamp : public KernelBase { return max; } typedef T type2; - void runMessage() - { - debMsg("Executing kernel kn4dClamp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, min, max); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, min, max); + } } Grid4d<T> &me; T min; @@ -788,7 +724,7 @@ struct knGetComp4d : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c) const + inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c) { dst[idx] = src[idx][c]; } @@ -807,21 +743,17 @@ struct knGetComp4d : public KernelBase { return c; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knGetComp4d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, src, dst, c); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, src, dst, c); + } } const Grid4d<Vec4> &src; Grid4d<Real> &dst; @@ -835,7 +767,7 @@ struct knSetComp4d : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c) const + inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c) { dst[idx][c] = src[idx]; } @@ -854,21 +786,17 @@ struct knSetComp4d : public KernelBase { return c; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetComp4d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, src, dst, c); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, src, dst, c); + } } const Grid4d<Real> &src; Grid4d<Vec4> &dst; @@ -959,7 +887,7 @@ template<class T> struct knSetBnd4d : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w) const + inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w) { bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w || k <= w || k >= grid.getSizeZ() - 1 - w || t <= w || t >= grid.getSizeT() - 1 - w); @@ -981,50 +909,47 @@ template<class T> struct knSetBnd4d : public KernelBase { return w; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel knSetBnd4d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ - << " " - " t " - << minT << " - " << maxT, - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { + const int _maxX = maxX; + const int _maxY = maxY; if (maxT > 1) { - for (int t = __r.begin(); t != (int)__r.end(); t++) - for (int k = 0; k < maxZ; k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, value, w); + const int _maxZ = maxZ; +#pragma omp parallel + { + +#pragma omp for + for (int t = 0; t < maxT; t++) + for (int k = 0; k < _maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, value, w); + } } else if (maxZ > 1) { const int t = 0; - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, value, w); +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, value, w); + } } else { const int t = 0; const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, value, w); - } - } - void run() - { - if (maxT > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this); - } - else if (maxZ > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - } - else { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, value, w); + } } } Grid4d<T> &grid; @@ -1043,7 +968,7 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w) const + inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w) { bool set = false; int si = i, sj = j, sk = k, st = t; @@ -1092,50 +1017,47 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase { return w; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel knSetBnd4dNeumann ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ - << " " - " t " - << minT << " - " << maxT, - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { + const int _maxX = maxX; + const int _maxY = maxY; if (maxT > 1) { - for (int t = __r.begin(); t != (int)__r.end(); t++) - for (int k = 0; k < maxZ; k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, w); + const int _maxZ = maxZ; +#pragma omp parallel + { + +#pragma omp for + for (int t = 0; t < maxT; t++) + for (int k = 0; k < _maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, w); + } } else if (maxZ > 1) { const int t = 0; - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, w); +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, w); + } } else { const int t = 0; const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, grid, w); - } - } - void run() - { - if (maxT > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this); - } - else if (maxZ > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - } - else { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, grid, w); + } } } Grid4d<T> &grid; @@ -1329,7 +1251,7 @@ template<class S> struct knSetRegion4d : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value) const + inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value) { Vec4 p(i, j, k, t); for (int c = 0; c < 4; ++c) @@ -1357,50 +1279,47 @@ template<class S> struct knSetRegion4d : public KernelBase { return value; } typedef S type3; - void runMessage() - { - debMsg("Executing kernel knSetRegion4d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ - << " " - " t " - << minT << " - " << maxT, - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { + const int _maxX = maxX; + const int _maxY = maxY; if (maxT > 1) { - for (int t = __r.begin(); t != (int)__r.end(); t++) - for (int k = 0; k < maxZ; k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, dst, start, end, value); + const int _maxZ = maxZ; +#pragma omp parallel + { + +#pragma omp for + for (int t = 0; t < maxT; t++) + for (int k = 0; k < _maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, dst, start, end, value); + } } else if (maxZ > 1) { const int t = 0; - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, dst, start, end, value); +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, dst, start, end, value); + } } else { const int t = 0; const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, dst, start, end, value); - } - } - void run() - { - if (maxT > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this); - } - else if (maxZ > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - } - else { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, dst, start, end, value); + } } } Grid4d<S> &dst; @@ -1619,7 +1538,7 @@ template<class S> struct knInterpol4d : public KernelBase { Grid4d<S> &target, Grid4d<S> &source, const Vec4 &srcFac, - const Vec4 &offset) const + const Vec4 &offset) { Vec4 pos = Vec4(i, j, k, t) * srcFac + offset; target(i, j, k, t) = source.getInterpolated(pos); @@ -1644,50 +1563,47 @@ template<class S> struct knInterpol4d : public KernelBase { return offset; } typedef Vec4 type3; - void runMessage() - { - debMsg("Executing kernel knInterpol4d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ - << " " - " t " - << minT << " - " << maxT, - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { + const int _maxX = maxX; + const int _maxY = maxY; if (maxT > 1) { - for (int t = __r.begin(); t != (int)__r.end(); t++) - for (int k = 0; k < maxZ; k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, srcFac, offset); + const int _maxZ = maxZ; +#pragma omp parallel + { + +#pragma omp for + for (int t = 0; t < maxT; t++) + for (int k = 0; k < _maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, srcFac, offset); + } } else if (maxZ > 1) { const int t = 0; - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, srcFac, offset); +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, srcFac, offset); + } } else { const int t = 0; const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, srcFac, offset); - } - } - void run() - { - if (maxT > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this); - } - else if (maxZ > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - } - else { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, srcFac, offset); + } } } Grid4d<S> ⌖ diff --git a/extern/mantaflow/preprocessed/grid4d.h b/extern/mantaflow/preprocessed/grid4d.h index 1741db590b7..2a2abad1e24 100644 --- a/extern/mantaflow/preprocessed/grid4d.h +++ b/extern/mantaflow/preprocessed/grid4d.h @@ -326,7 +326,6 @@ class Grid4dBase : public PbClass { // precomputed Z,T shift: to ensure 2D compatibility, always use this instead of sx*sy ! IndexInt mStrideZ; IndexInt mStrideT; - public: PbArgs _args; } @@ -951,7 +950,6 @@ template<class T> class Grid4d : public Grid4dBase { protected: T *mData; - public: PbArgs _args; } @@ -1027,7 +1025,7 @@ template<class T, class S> struct Grid4dAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) { me[idx] += other[idx]; } @@ -1041,21 +1039,17 @@ template<class T, class S> struct Grid4dAdd : public KernelBase { return other; } typedef Grid4d<S> type1; - void runMessage() - { - debMsg("Executing kernel Grid4dAdd ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const Grid4d<S> &other; @@ -1066,7 +1060,7 @@ template<class T, class S> struct Grid4dSub : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) { me[idx] -= other[idx]; } @@ -1080,21 +1074,17 @@ template<class T, class S> struct Grid4dSub : public KernelBase { return other; } typedef Grid4d<S> type1; - void runMessage() - { - debMsg("Executing kernel Grid4dSub ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const Grid4d<S> &other; @@ -1105,7 +1095,7 @@ template<class T, class S> struct Grid4dMult : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) { me[idx] *= other[idx]; } @@ -1119,21 +1109,17 @@ template<class T, class S> struct Grid4dMult : public KernelBase { return other; } typedef Grid4d<S> type1; - void runMessage() - { - debMsg("Executing kernel Grid4dMult ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const Grid4d<S> &other; @@ -1144,7 +1130,7 @@ template<class T, class S> struct Grid4dDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) { me[idx] /= other[idx]; } @@ -1158,21 +1144,17 @@ template<class T, class S> struct Grid4dDiv : public KernelBase { return other; } typedef Grid4d<S> type1; - void runMessage() - { - debMsg("Executing kernel Grid4dDiv ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const Grid4d<S> &other; @@ -1183,7 +1165,7 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const S &other) { me[idx] += other; } @@ -1197,21 +1179,17 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel Grid4dAddScalar ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const S &other; @@ -1222,7 +1200,7 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const S &other) { me[idx] *= other; } @@ -1236,21 +1214,17 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel Grid4dMultScalar ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const S &other; @@ -1262,7 +1236,7 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor) { me[idx] += factor * other[idx]; } @@ -1281,21 +1255,17 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase { return factor; } typedef S type2; - void runMessage() - { - debMsg("Executing kernel Grid4dScaledAdd ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, factor); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, factor); + } } Grid4d<T> &me; const Grid4d<T> &other; @@ -1308,7 +1278,7 @@ template<class T> struct Grid4dSafeDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other) const + inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other) { me[idx] = safeDivide(me[idx], other[idx]); } @@ -1322,21 +1292,17 @@ template<class T> struct Grid4dSafeDiv : public KernelBase { return other; } typedef Grid4d<T> type1; - void runMessage() - { - debMsg("Executing kernel Grid4dSafeDiv ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } Grid4d<T> &me; const Grid4d<T> &other; @@ -1347,7 +1313,7 @@ template<class T> struct Grid4dSetConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid4d<T> &me, T value) const + inline void op(IndexInt idx, Grid4d<T> &me, T value) { me[idx] = value; } @@ -1361,21 +1327,17 @@ template<class T> struct Grid4dSetConst : public KernelBase { return value; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel Grid4dSetConst ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, value); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, value); + } } Grid4d<T> &me; T value; @@ -1473,7 +1435,7 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase { Grid4d<S> &target, Grid4d<S> &source, const Vec4 &sourceFactor, - Vec4 offset) const + Vec4 offset) { Vec4 pos = Vec4(i, j, k, t) * sourceFactor + offset; if (!source.is3D()) @@ -1502,50 +1464,47 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase { return offset; } typedef Vec4 type3; - void runMessage() - { - debMsg("Executing kernel KnInterpolateGrid4dTempl ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ - << " " - " t " - << minT << " - " << maxT, - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { + const int _maxX = maxX; + const int _maxY = maxY; if (maxT > 1) { - for (int t = __r.begin(); t != (int)__r.end(); t++) - for (int k = 0; k < maxZ; k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, sourceFactor, offset); + const int _maxZ = maxZ; +#pragma omp parallel + { + +#pragma omp for + for (int t = 0; t < maxT; t++) + for (int k = 0; k < _maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, sourceFactor, offset); + } } else if (maxZ > 1) { const int t = 0; - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < maxY; j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, sourceFactor, offset); +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, sourceFactor, offset); + } } else { const int t = 0; const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < maxX; i++) - op(i, j, k, t, target, source, sourceFactor, offset); - } - } - void run() - { - if (maxT > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this); - } - else if (maxZ > 1) { - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - } - else { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, t, target, source, sourceFactor, offset); + } } } Grid4d<S> ⌖ diff --git a/extern/mantaflow/preprocessed/kernel.h b/extern/mantaflow/preprocessed/kernel.h index dbcc2342a11..90e30cd21e1 100644 --- a/extern/mantaflow/preprocessed/kernel.h +++ b/extern/mantaflow/preprocessed/kernel.h @@ -71,19 +71,6 @@ class ParticleBase; for (int j = bnd; j < (grid).getSizeY() - bnd; ++j) \ for (int i = bnd; i < (grid).getSizeX() - bnd; ++i) -#define FOR_NEIGHBORS_BND(grid, radius, bnd) \ - for (int zj = ((grid).is3D() ? std::max(bnd, k - radius) : 0); \ - zj <= ((grid).is3D() ? std::min(k + radius, (grid).getSizeZ() - 1 - bnd) : 0); \ - zj++) \ - for (int yj = std::max(bnd, j - radius); \ - yj <= std::min(j + radius, (grid).getSizeY() - 1 - bnd); \ - yj++) \ - for (int xj = std::max(bnd, i - radius); \ - xj <= std::min(i + radius, (grid).getSizeX() - 1 - bnd); \ - xj++) - -#define FOR_NEIGHBORS(grid, radius) FOR_NEIGHBORS_BND(grid, radius, 0) - //! Basic data structure for kernel data, initialized based on kernel type (e.g. single, idx, etc). struct KernelBase { int maxX, maxY, maxZ, minZ, maxT, minT; diff --git a/extern/mantaflow/preprocessed/levelset.cpp b/extern/mantaflow/preprocessed/levelset.cpp index c4fecace7ca..1ce18fab3f6 100644 --- a/extern/mantaflow/preprocessed/levelset.cpp +++ b/extern/mantaflow/preprocessed/levelset.cpp @@ -62,7 +62,7 @@ struct InitFmIn : public KernelBase { Grid<int> &fmFlags, Grid<Real> &phi, bool ignoreWalls, - int obstacleType) const + int obstacleType) { const IndexInt idx = flags.index(i, j, k); const Real v = phi[idx]; @@ -104,37 +104,35 @@ struct InitFmIn : public KernelBase { return obstacleType; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel InitFmIn ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<int> &fmFlags; Grid<Real> φ @@ -165,7 +163,7 @@ struct InitFmOut : public KernelBase { Grid<int> &fmFlags, Grid<Real> &phi, bool ignoreWalls, - int obstacleType) const + int obstacleType) { const IndexInt idx = flags.index(i, j, k); const Real v = phi[idx]; @@ -205,37 +203,35 @@ struct InitFmOut : public KernelBase { return obstacleType; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel InitFmOut ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<int> &fmFlags; Grid<Real> φ @@ -269,7 +265,7 @@ struct SetUninitialized : public KernelBase { Grid<Real> &phi, const Real val, int ignoreWalls, - int obstacleType) const + int obstacleType) { if (ignoreWalls) { if ((fmFlags(i, j, k) != FlagInited) && ((flags(i, j, k) & obstacleType) == 0)) { @@ -311,37 +307,35 @@ struct SetUninitialized : public KernelBase { return obstacleType; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel SetUninitialized ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<int> &flags; Grid<int> &fmFlags; Grid<Real> φ @@ -371,7 +365,8 @@ inline bool isAtInterface(const Grid<int> &fmFlags, Grid<Real> &phi, const Vec3i //************************************************************************ // Levelset class def -LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show) : Grid<Real>(parent, show) +LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show, bool offload) + : Grid<Real>(parent, show, false /*sparse*/, offload) { mType = (GridType)(TypeLevelset | TypeReal); } @@ -394,7 +389,7 @@ struct KnJoin : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b) const + inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b) { a[idx] = min(a[idx], b[idx]); } @@ -408,21 +403,17 @@ struct KnJoin : public KernelBase { return b; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel KnJoin ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, b); + } } Grid<Real> &a; const Grid<Real> &b; @@ -440,11 +431,8 @@ struct KnSubtract : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, - Grid<Real> &a, - const Grid<Real> &b, - const FlagGrid *flags, - int subtractType) const + inline void op( + IndexInt idx, Grid<Real> &a, const Grid<Real> &b, const FlagGrid *flags, int subtractType) { if (flags && ((*flags)(idx)&subtractType) == 0) return; @@ -471,21 +459,17 @@ struct KnSubtract : public KernelBase { return subtractType; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel KnSubtract ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, b, flags, subtractType); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, b, flags, subtractType); + } } Grid<Real> &a; const Grid<Real> &b; @@ -668,7 +652,7 @@ struct KnFillApply : public KernelBase { Grid<int> &visited, const Real value, const int boundaryWidth, - const bool outside) const + const bool outside) { if (visited(i, j, k) == ID_VISITED) @@ -706,37 +690,35 @@ struct KnFillApply : public KernelBase { return outside; } typedef bool type4; - void runMessage() - { - debMsg("Executing kernel KnFillApply ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = boundaryWidth; j < _maxY; j++) - for (int i = boundaryWidth; i < _maxX; i++) - op(i, j, k, target, visited, value, boundaryWidth, outside); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = boundaryWidth; j < _maxY; j++) + for (int i = boundaryWidth; i < _maxX; i++) + op(i, j, k, target, visited, value, boundaryWidth, outside); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = boundaryWidth; i < _maxX; i++) - op(i, j, k, target, visited, value, boundaryWidth, outside); +#pragma omp parallel + { + +#pragma omp for + for (int j = boundaryWidth; j < _maxY; j++) + for (int i = boundaryWidth; i < _maxX; i++) + op(i, j, k, target, visited, value, boundaryWidth, outside); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this); - } Grid<Real> ⌖ Grid<int> &visited; const Real value; diff --git a/extern/mantaflow/preprocessed/levelset.h b/extern/mantaflow/preprocessed/levelset.h index ae162f73c3d..eea604ca2fd 100644 --- a/extern/mantaflow/preprocessed/levelset.h +++ b/extern/mantaflow/preprocessed/levelset.h @@ -27,7 +27,7 @@ class Mesh; //! Special function for levelsets class LevelsetGrid : public Grid<Real> { public: - LevelsetGrid(FluidSolver *parent, bool show = true); + LevelsetGrid(FluidSolver *parent, bool show = true, bool offload = false); static int _W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { PbClass *obj = Pb::objFromPy(_self); @@ -41,7 +41,8 @@ class LevelsetGrid : public Grid<Real> { ArgLocker _lock; FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock); - obj = new LevelsetGrid(parent, show); + bool offload = _args.getOpt<bool>("offload", 2, false, &_lock); + obj = new LevelsetGrid(parent, show, offload); obj->registerObject(_self, &_args); _args.check(); } @@ -266,7 +267,6 @@ class LevelsetGrid : public Grid<Real> { } static Real invalidTimeValue(); - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/mesh.cpp b/extern/mantaflow/preprocessed/mesh.cpp index 79ddc4bd879..7eb69438e7f 100644 --- a/extern/mantaflow/preprocessed/mesh.cpp +++ b/extern/mantaflow/preprocessed/mesh.cpp @@ -344,22 +344,19 @@ void Mesh::rebuildChannels() mNodeChannels[i]->resize(mNodes.size()); } -struct _KnAdvectMeshInGrid : public KernelBase { - _KnAdvectMeshInGrid(const KernelBase &base, - vector<Node> &nodes, - const FlagGrid &flags, - const MACGrid &vel, - const Real dt, - vector<Vec3> &u) - : KernelBase(base), nodes(nodes), flags(flags), vel(vel), dt(dt), u(u) +struct KnAdvectMeshInGrid : public KernelBase { + KnAdvectMeshInGrid(vector<Node> &nodes, const FlagGrid &flags, const MACGrid &vel, const Real dt) + : KernelBase(nodes.size()), nodes(nodes), flags(flags), vel(vel), dt(dt), u((size)) { + runMessage(); + run(); } inline void op(IndexInt idx, vector<Node> &nodes, const FlagGrid &flags, const MACGrid &vel, const Real dt, - vector<Vec3> &u) const + vector<Vec3> &u) { if (nodes[idx].flags & Mesh::NfFixed) u[idx] = 0.0; @@ -368,38 +365,6 @@ struct _KnAdvectMeshInGrid : public KernelBase { else u[idx] = vel.getInterpolated(nodes[idx].pos) * dt; } - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, nodes, flags, vel, dt, u); - } - void run() - { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); - } - vector<Node> &nodes; - const FlagGrid &flags; - const MACGrid &vel; - const Real dt; - vector<Vec3> &u; -}; -struct KnAdvectMeshInGrid : public KernelBase { - KnAdvectMeshInGrid(vector<Node> &nodes, const FlagGrid &flags, const MACGrid &vel, const Real dt) - : KernelBase(nodes.size()), - _inner(KernelBase(nodes.size()), nodes, flags, vel, dt, u), - nodes(nodes), - flags(flags), - vel(vel), - dt(dt), - u((size)) - { - runMessage(); - run(); - } - void run() - { - _inner.run(); - } inline operator vector<Vec3>() { return u; @@ -428,14 +393,18 @@ struct KnAdvectMeshInGrid : public KernelBase { return dt; } typedef Real type3; - void runMessage() + void runMessage(){}; + void run() { - debMsg("Executing kernel KnAdvectMeshInGrid ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - _KnAdvectMeshInGrid _inner; + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, nodes, flags, vel, dt, u); + } + } vector<Node> &nodes; const FlagGrid &flags; const MACGrid &vel; @@ -1012,7 +981,7 @@ template<class T> struct ApplyMeshToGrid : public KernelBase { run(); } inline void op( - int i, int j, int k, Grid<T> *grid, Grid<Real> &sdf, T value, FlagGrid *respectFlags) const + int i, int j, int k, Grid<T> *grid, Grid<Real> &sdf, T value, FlagGrid *respectFlags) { if (respectFlags && respectFlags->isObstacle(i, j, k)) return; @@ -1040,37 +1009,35 @@ template<class T> struct ApplyMeshToGrid : public KernelBase { return respectFlags; } typedef FlagGrid type3; - void runMessage() - { - debMsg("Executing kernel ApplyMeshToGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, sdf, value, respectFlags); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, sdf, value, respectFlags); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, sdf, value, respectFlags); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, sdf, value, respectFlags); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> *grid; Grid<Real> &sdf; T value; @@ -1431,7 +1398,7 @@ template<class T> struct knSetMdataConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) const + inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) { mdata[idx] = value; } @@ -1445,21 +1412,17 @@ template<class T> struct knSetMdataConst : public KernelBase { return value; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knSetMdataConst ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, mdata, value); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, mdata, value); + } } MeshDataImpl<T> &mdata; T value; @@ -1472,7 +1435,7 @@ template<class T, class S> struct knMdataSet : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) { me[idx] += other[idx]; } @@ -1486,21 +1449,17 @@ template<class T, class S> struct knMdataSet : public KernelBase { return other; } typedef MeshDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knMdataSet ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<S> &other; @@ -1512,7 +1471,7 @@ template<class T, class S> struct knMdataAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) { me[idx] += other[idx]; } @@ -1526,21 +1485,17 @@ template<class T, class S> struct knMdataAdd : public KernelBase { return other; } typedef MeshDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knMdataAdd ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<S> &other; @@ -1552,7 +1507,7 @@ template<class T, class S> struct knMdataSub : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) { me[idx] -= other[idx]; } @@ -1566,21 +1521,17 @@ template<class T, class S> struct knMdataSub : public KernelBase { return other; } typedef MeshDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knMdataSub ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<S> &other; @@ -1592,7 +1543,7 @@ template<class T, class S> struct knMdataMult : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) { me[idx] *= other[idx]; } @@ -1606,21 +1557,17 @@ template<class T, class S> struct knMdataMult : public KernelBase { return other; } typedef MeshDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knMdataMult ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<S> &other; @@ -1632,7 +1579,7 @@ template<class T, class S> struct knMdataDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) { me[idx] /= other[idx]; } @@ -1646,21 +1593,17 @@ template<class T, class S> struct knMdataDiv : public KernelBase { return other; } typedef MeshDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knMdataDiv ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<S> &other; @@ -1673,7 +1616,7 @@ template<class T, class S> struct knMdataSetScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) { me[idx] = other; } @@ -1687,21 +1630,17 @@ template<class T, class S> struct knMdataSetScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knMdataSetScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const S &other; @@ -1713,7 +1652,7 @@ template<class T, class S> struct knMdataAddScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) { me[idx] += other; } @@ -1727,21 +1666,17 @@ template<class T, class S> struct knMdataAddScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knMdataAddScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const S &other; @@ -1753,7 +1688,7 @@ template<class T, class S> struct knMdataMultScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) { me[idx] *= other; } @@ -1767,21 +1702,17 @@ template<class T, class S> struct knMdataMultScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knMdataMultScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const S &other; @@ -1793,10 +1724,7 @@ template<class T, class S> struct knMdataScaledAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, - MeshDataImpl<T> &me, - const MeshDataImpl<T> &other, - const S &factor) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other, const S &factor) { me[idx] += factor * other[idx]; } @@ -1815,21 +1743,17 @@ template<class T, class S> struct knMdataScaledAdd : public KernelBase { return factor; } typedef S type2; - void runMessage() - { - debMsg("Executing kernel knMdataScaledAdd ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, factor); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, factor); + } } MeshDataImpl<T> &me; const MeshDataImpl<T> &other; @@ -1843,7 +1767,7 @@ template<class T> struct knMdataSafeDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other) { me[idx] = safeDivide(me[idx], other[idx]); } @@ -1857,21 +1781,17 @@ template<class T> struct knMdataSafeDiv : public KernelBase { return other; } typedef MeshDataImpl<T> type1; - void runMessage() - { - debMsg("Executing kernel knMdataSafeDiv ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } MeshDataImpl<T> &me; const MeshDataImpl<T> &other; @@ -1883,7 +1803,7 @@ template<class T> struct knMdataSetConst : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) const + inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) { mdata[idx] = value; } @@ -1897,21 +1817,17 @@ template<class T> struct knMdataSetConst : public KernelBase { return value; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knMdataSetConst ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, mdata, value); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, mdata, value); + } } MeshDataImpl<T> &mdata; T value; @@ -1924,7 +1840,7 @@ template<class T> struct knMdataClamp : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, T min, T max) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, T min, T max) { me[idx] = clamp(me[idx], min, max); } @@ -1943,21 +1859,17 @@ template<class T> struct knMdataClamp : public KernelBase { return max; } typedef T type2; - void runMessage() - { - debMsg("Executing kernel knMdataClamp ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, min, max); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, min, max); + } } MeshDataImpl<T> &me; T min; @@ -1969,7 +1881,7 @@ template<class T> struct knMdataClampMin : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmin) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmin) { me[idx] = std::max(vmin, me[idx]); } @@ -1983,21 +1895,17 @@ template<class T> struct knMdataClampMin : public KernelBase { return vmin; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knMdataClampMin ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmin); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmin); + } } MeshDataImpl<T> &me; const T vmin; @@ -2008,7 +1916,7 @@ template<class T> struct knMdataClampMax : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmax) const + inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmax) { me[idx] = std::min(vmax, me[idx]); } @@ -2022,21 +1930,17 @@ template<class T> struct knMdataClampMax : public KernelBase { return vmax; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knMdataClampMax ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmax); + } } MeshDataImpl<T> &me; const T vmax; @@ -2048,7 +1952,7 @@ struct knMdataClampMinVec3 : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmin) const + inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmin) { me[idx].x = std::max(vmin, me[idx].x); me[idx].y = std::max(vmin, me[idx].y); @@ -2064,21 +1968,17 @@ struct knMdataClampMinVec3 : public KernelBase { return vmin; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knMdataClampMinVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmin); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmin); + } } MeshDataImpl<Vec3> &me; const Real vmin; @@ -2090,7 +1990,7 @@ struct knMdataClampMaxVec3 : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmax) const + inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmax) { me[idx].x = std::min(vmax, me[idx].x); me[idx].y = std::min(vmax, me[idx].y); @@ -2106,21 +2006,17 @@ struct knMdataClampMaxVec3 : public KernelBase { return vmax; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knMdataClampMaxVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmax); + } } MeshDataImpl<Vec3> &me; const Real vmax; @@ -2162,7 +2058,7 @@ template<class T, class S> struct knMdataSetScalarIntFlag : public KernelBase { MeshDataImpl<T> &me, const S &other, const MeshDataImpl<int> &t, - const int itype) const + const int itype) { if (t[idx] & itype) me[idx] = other; @@ -2187,21 +2083,17 @@ template<class T, class S> struct knMdataSetScalarIntFlag : public KernelBase { return itype; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel knMdataSetScalarIntFlag ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, t, itype); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, t, itype); + } } MeshDataImpl<T> &me; const S &other; @@ -2311,29 +2203,21 @@ template<typename T> struct KnPtsSum : public KernelBase { return itype; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel KnPtsSum ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, t, itype, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSum(KnPtsSum &o, tbb::split) - : KernelBase(o), val(o.val), t(o.t), itype(o.itype), result(T(0.)) - { - } - void join(const KnPtsSum &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + T result = T(0.); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, t, itype, result); +#pragma omp critical + { + this->result += result; + } + } } const MeshDataImpl<T> &val; const MeshDataImpl<int> *t; @@ -2363,28 +2247,21 @@ template<typename T> struct KnPtsSumSquare : public KernelBase { return val; } typedef MeshDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel KnPtsSumSquare ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSumSquare(KnPtsSumSquare &o, tbb::split) : KernelBase(o), val(o.val), result(0.) - { - } - void join(const KnPtsSumSquare &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + Real result = 0.; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, result); +#pragma omp critical + { + this->result += result; + } + } } const MeshDataImpl<T> &val; Real result; @@ -2412,28 +2289,21 @@ template<typename T> struct KnPtsSumMagnitude : public KernelBase { return val; } typedef MeshDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel KnPtsSumMagnitude ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSumMagnitude(KnPtsSumMagnitude &o, tbb::split) : KernelBase(o), val(o.val), result(0.) - { - } - void join(const KnPtsSumMagnitude &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + Real result = 0.; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, result); +#pragma omp critical + { + this->result += result; + } + } } const MeshDataImpl<T> &val; Real result; @@ -2479,29 +2349,21 @@ struct CompMdata_Min : public KernelBase { return val; } typedef MeshDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel CompMdata_Min ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMdata_Min(CompMdata_Min &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const CompMdata_Min &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const MeshDataImpl<T> &val; Real minVal; @@ -2534,29 +2396,21 @@ struct CompMdata_Max : public KernelBase { return val; } typedef MeshDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel CompMdata_Max ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMdata_Max(CompMdata_Max &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const CompMdata_Max &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const MeshDataImpl<T> &val; Real maxVal; @@ -2632,29 +2486,21 @@ struct CompMdata_MinVec3 : public KernelBase { return val; } typedef MeshDataImpl<Vec3> type0; - void runMessage() - { - debMsg("Executing kernel CompMdata_MinVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMdata_MinVec3(CompMdata_MinVec3 &o, tbb::split) - : KernelBase(o), val(o.val), minVal(-std::numeric_limits<Real>::max()) - { - } - void join(const CompMdata_MinVec3 &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const MeshDataImpl<Vec3> &val; Real minVal; @@ -2686,29 +2532,21 @@ struct CompMdata_MaxVec3 : public KernelBase { return val; } typedef MeshDataImpl<Vec3> type0; - void runMessage() - { - debMsg("Executing kernel CompMdata_MaxVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompMdata_MaxVec3(CompMdata_MaxVec3 &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::min()) - { - } - void join(const CompMdata_MaxVec3 &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::min(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const MeshDataImpl<Vec3> &val; Real maxVal; diff --git a/extern/mantaflow/preprocessed/mesh.h b/extern/mantaflow/preprocessed/mesh.h index b5de66ce095..d3a69abc4ea 100644 --- a/extern/mantaflow/preprocessed/mesh.h +++ b/extern/mantaflow/preprocessed/mesh.h @@ -796,7 +796,6 @@ class Mesh : public PbClass { std::vector<MeshDataImpl<int> *> mMdataInt; //! indicate that mdata of this mesh is copied, and needs to be freed bool mFreeMdata; - public: PbArgs _args; } @@ -882,7 +881,6 @@ class MeshDataBase : public PbClass { protected: Mesh *mMesh; - public: PbArgs _args; } @@ -1647,7 +1645,6 @@ template<class T> class MeshDataImpl : public MeshDataBase { //! optionally , we might have an associated grid from which to grab new data Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3 bool mGridSourceMAC; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/movingobs.h b/extern/mantaflow/preprocessed/movingobs.h index 83ef6ed0c9f..0661ddf5b37 100644 --- a/extern/mantaflow/preprocessed/movingobs.h +++ b/extern/mantaflow/preprocessed/movingobs.h @@ -154,7 +154,6 @@ class MovingObstacle : public PbClass { int mEmptyType; int mID; static int sIDcnt; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/multigrid.cpp b/extern/mantaflow/preprocessed/multigrid.cpp index 9e35c6f9368..763eede540d 100644 --- a/extern/mantaflow/preprocessed/multigrid.cpp +++ b/extern/mantaflow/preprocessed/multigrid.cpp @@ -428,7 +428,7 @@ struct knCopyA : public KernelBase { const Grid<Real> *pA0, const Grid<Real> *pAi, const Grid<Real> *pAj, - const Grid<Real> *pAk) const + const Grid<Real> *pAk) { A0[idx * stencilSize0 + 0] = (*pA0)[idx]; A0[idx * stencilSize0 + 1] = (*pAi)[idx]; @@ -476,21 +476,17 @@ struct knCopyA : public KernelBase { return pAk; } typedef Grid<Real> type7; - void runMessage() - { - debMsg("Executing kernel knCopyA ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk); + } } std::vector<Real> &sizeRef; std::vector<Real> &A0; @@ -523,7 +519,7 @@ struct knActivateVertices : public KernelBase { std::vector<Real> &A0, bool &nonZeroStencilSumFound, bool &trivialEquationsFound, - const GridMg &mg) const + const GridMg &mg) { // active vertices on level 0 are vertices with non-zero diagonal entry in A type_0[idx] = GridMg::vtInactive; @@ -572,21 +568,17 @@ struct knActivateVertices : public KernelBase { return mg; } typedef GridMg type4; - void runMessage() - { - debMsg("Executing kernel knActivateVertices ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg); + } } std::vector<GridMg::VertexType> &type_0; std::vector<Real> &A0; @@ -642,7 +634,7 @@ struct knSetRhs : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) const + inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) { b[idx] = rhs[idx]; @@ -666,21 +658,17 @@ struct knSetRhs : public KernelBase { return mg; } typedef GridMg type2; - void runMessage() - { - debMsg("Executing kernel knSetRhs ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, b, rhs, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, b, rhs, mg); + } } std::vector<Real> &b; const Grid<Real> &rhs; @@ -702,7 +690,7 @@ template<class T> struct knSet : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<T> &data, T value) const + inline void op(IndexInt idx, std::vector<T> &data, T value) { data[idx] = value; } @@ -716,21 +704,17 @@ template<class T> struct knSet : public KernelBase { return value; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knSet ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, data, value); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, data, value); + } } std::vector<T> &data; T value; @@ -743,7 +727,7 @@ template<class T> struct knCopyToVector : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) const + inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) { dst[idx] = src[idx]; } @@ -757,21 +741,17 @@ template<class T> struct knCopyToVector : public KernelBase { return src; } typedef Grid<T> type1; - void runMessage() - { - debMsg("Executing kernel knCopyToVector ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, dst, src); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, dst, src); + } } std::vector<T> &dst; const Grid<T> &src; @@ -784,7 +764,7 @@ template<class T> struct knCopyToGrid : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) const + inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) { dst[idx] = src[idx]; } @@ -798,21 +778,17 @@ template<class T> struct knCopyToGrid : public KernelBase { return dst; } typedef Grid<T> type1; - void runMessage() - { - debMsg("Executing kernel knCopyToGrid ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, src, dst); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, src, dst); + } } const std::vector<T> &src; Grid<T> &dst; @@ -825,7 +801,7 @@ template<class T> struct knAddAssign : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) const + inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) { dst[idx] += src[idx]; } @@ -839,21 +815,17 @@ template<class T> struct knAddAssign : public KernelBase { return src; } typedef std::vector<T> type1; - void runMessage() - { - debMsg("Executing kernel knAddAssign ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, dst, src); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, dst, src); + } } std::vector<T> &dst; const std::vector<T> &src; @@ -930,7 +902,7 @@ struct knActivateCoarseVertices : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) const + inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) { // set all remaining 'free' vertices to 'removed', if (type[idx] == GridMg::vtFree) @@ -952,21 +924,17 @@ struct knActivateCoarseVertices : public KernelBase { return unused; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel knActivateCoarseVertices ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, type, unused); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, type, unused); + } } std::vector<GridMg::VertexType> &type; int unused; @@ -1052,11 +1020,8 @@ struct knGenCoarseGridOperator : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, - std::vector<Real> &sizeRef, - std::vector<Real> &A, - int l, - const GridMg &mg) const + inline void op( + IndexInt idx, std::vector<Real> &sizeRef, std::vector<Real> &A, int l, const GridMg &mg) { if (mg.mType[l][idx] == GridMg::vtInactive) return; @@ -1178,21 +1143,17 @@ struct knGenCoarseGridOperator : public KernelBase { return mg; } typedef GridMg type3; - void runMessage() - { - debMsg("Executing kernel knGenCoarseGridOperator ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, sizeRef, A, l, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for schedule(static, 1) + for (IndexInt i = 0; i < _sz; i++) + op(i, sizeRef, A, l, mg); + } } std::vector<Real> &sizeRef; std::vector<Real> &A; @@ -1232,7 +1193,7 @@ struct knSmoothColor : public KernelBase { const Vec3i &blockSize, const std::vector<Vec3i> &colorOffs, int l, - const GridMg &mg) const + const GridMg &mg) { Vec3i blockOff(int(idx) % blockSize.x, (int(idx) % (blockSize.x * blockSize.y)) / blockSize.x, @@ -1318,21 +1279,17 @@ struct knSmoothColor : public KernelBase { return mg; } typedef GridMg type5; - void runMessage() - { - debMsg("Executing kernel knSmoothColor ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, numBlocks, x, blockSize, colorOffs, l, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for schedule(static, 1) + for (IndexInt i = 0; i < _sz; i++) + op(i, numBlocks, x, blockSize, colorOffs, l, mg); + } } ThreadSize &numBlocks; std::vector<Real> &x; @@ -1386,7 +1343,7 @@ struct knCalcResidual : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) const + inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) { if (mg.mType[l][idx] == GridMg::vtInactive) return; @@ -1443,21 +1400,17 @@ struct knCalcResidual : public KernelBase { return mg; } typedef GridMg type2; - void runMessage() - { - debMsg("Executing kernel knCalcResidual ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, r, l, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for schedule(static, 1) + for (IndexInt i = 0; i < _sz; i++) + op(i, r, l, mg); + } } std::vector<Real> &r; int l; @@ -1506,29 +1459,21 @@ struct knResidualNormSumSqr : public KernelBase { return mg; } typedef GridMg type2; - void runMessage() - { - debMsg("Executing kernel knResidualNormSumSqr ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, r, l, mg, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split) - : KernelBase(o), r(o.r), l(o.l), mg(o.mg), result(Real(0)) - { - } - void join(const knResidualNormSumSqr &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + Real result = Real(0); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, r, l, mg, result); +#pragma omp critical + { + this->result += result; + } + } } const vector<Real> &r; int l; @@ -1700,7 +1645,7 @@ struct knRestrict : public KernelBase { std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, - const GridMg &mg) const + const GridMg &mg) { if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; @@ -1746,21 +1691,17 @@ struct knRestrict : public KernelBase { return mg; } typedef GridMg type3; - void runMessage() - { - debMsg("Executing kernel knRestrict ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, dst, src, l_dst, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for schedule(static, 1) + for (IndexInt i = 0; i < _sz; i++) + op(i, dst, src, l_dst, mg); + } } std::vector<Real> &dst; const std::vector<Real> &src; @@ -1784,7 +1725,7 @@ struct knInterpolate : public KernelBase { std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, - const GridMg &mg) const + const GridMg &mg) { if (mg.mType[l_dst][idx] == GridMg::vtInactive) return; @@ -1827,21 +1768,17 @@ struct knInterpolate : public KernelBase { return mg; } typedef GridMg type3; - void runMessage() - { - debMsg("Executing kernel knInterpolate ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, dst, src, l_dst, mg); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for schedule(static, 1) + for (IndexInt i = 0; i < _sz; i++) + op(i, dst, src, l_dst, mg); + } } std::vector<Real> &dst; const std::vector<Real> &src; diff --git a/extern/mantaflow/preprocessed/noisefield.h b/extern/mantaflow/preprocessed/noisefield.h index 6ed8ac0012d..73c9de779ef 100644 --- a/extern/mantaflow/preprocessed/noisefield.h +++ b/extern/mantaflow/preprocessed/noisefield.h @@ -236,7 +236,6 @@ class WaveletNoiseField : public PbClass { static int randomSeed; // global reference count for noise tile static std::atomic<int> mNoiseReferenceCount; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/particle.cpp b/extern/mantaflow/preprocessed/particle.cpp index ad1c344d307..45234389cfe 100644 --- a/extern/mantaflow/preprocessed/particle.cpp +++ b/extern/mantaflow/preprocessed/particle.cpp @@ -455,7 +455,7 @@ template<class T, class S> struct knPdataAdd : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) { me[idx] += other[idx]; } @@ -469,21 +469,17 @@ template<class T, class S> struct knPdataAdd : public KernelBase { return other; } typedef ParticleDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knPdataAdd ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<S> &other; @@ -495,7 +491,7 @@ template<class T, class S> struct knPdataSub : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) { me[idx] -= other[idx]; } @@ -509,21 +505,17 @@ template<class T, class S> struct knPdataSub : public KernelBase { return other; } typedef ParticleDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knPdataSub ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<S> &other; @@ -535,7 +527,7 @@ template<class T, class S> struct knPdataMult : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) { me[idx] *= other[idx]; } @@ -549,21 +541,17 @@ template<class T, class S> struct knPdataMult : public KernelBase { return other; } typedef ParticleDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knPdataMult ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<S> &other; @@ -575,7 +563,7 @@ template<class T, class S> struct knPdataDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) { me[idx] /= other[idx]; } @@ -589,21 +577,17 @@ template<class T, class S> struct knPdataDiv : public KernelBase { return other; } typedef ParticleDataImpl<S> type1; - void runMessage() - { - debMsg("Executing kernel knPdataDiv ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<S> &other; @@ -615,7 +599,7 @@ template<class T> struct knPdataSafeDiv : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other) { me[idx] = safeDivide(me[idx], other[idx]); } @@ -629,21 +613,17 @@ template<class T> struct knPdataSafeDiv : public KernelBase { return other; } typedef ParticleDataImpl<T> type1; - void runMessage() - { - debMsg("Executing kernel knPdataSafeDiv ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<T> &other; @@ -656,7 +636,7 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) { me[idx] = other; } @@ -670,21 +650,17 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knPdataSetScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const S &other; @@ -696,7 +672,7 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) { me[idx] += other; } @@ -710,21 +686,17 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knPdataAddScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const S &other; @@ -736,7 +708,7 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) { me[idx] *= other; } @@ -750,21 +722,17 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase { return other; } typedef S type1; - void runMessage() - { - debMsg("Executing kernel knPdataMultScalar ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other); + } } ParticleDataImpl<T> &me; const S &other; @@ -779,7 +747,7 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase { inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other, - const S &factor) const + const S &factor) { me[idx] += factor * other[idx]; } @@ -798,21 +766,17 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase { return factor; } typedef S type2; - void runMessage() - { - debMsg("Executing kernel knPdataScaledAdd ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, factor); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, factor); + } } ParticleDataImpl<T> &me; const ParticleDataImpl<T> &other; @@ -826,7 +790,7 @@ template<class T> struct knPdataClamp : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax) { me[idx] = clamp(me[idx], vmin, vmax); } @@ -845,21 +809,17 @@ template<class T> struct knPdataClamp : public KernelBase { return vmax; } typedef T type2; - void runMessage() - { - debMsg("Executing kernel knPdataClamp ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmin, vmax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmin, vmax); + } } ParticleDataImpl<T> &me; const T vmin; @@ -872,7 +832,7 @@ template<class T> struct knPdataClampMin : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin) { me[idx] = std::max(vmin, me[idx]); } @@ -886,21 +846,17 @@ template<class T> struct knPdataClampMin : public KernelBase { return vmin; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knPdataClampMin ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmin); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmin); + } } ParticleDataImpl<T> &me; const T vmin; @@ -912,7 +868,7 @@ template<class T> struct knPdataClampMax : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax) const + inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax) { me[idx] = std::min(vmax, me[idx]); } @@ -926,21 +882,17 @@ template<class T> struct knPdataClampMax : public KernelBase { return vmax; } typedef T type1; - void runMessage() - { - debMsg("Executing kernel knPdataClampMax ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmax); + } } ParticleDataImpl<T> &me; const T vmax; @@ -953,7 +905,7 @@ struct knPdataClampMinVec3 : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin) const + inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin) { me[idx].x = std::max(vmin, me[idx].x); me[idx].y = std::max(vmin, me[idx].y); @@ -969,21 +921,17 @@ struct knPdataClampMinVec3 : public KernelBase { return vmin; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knPdataClampMinVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmin); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmin); + } } ParticleDataImpl<Vec3> &me; const Real vmin; @@ -996,7 +944,7 @@ struct knPdataClampMaxVec3 : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax) const + inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax) { me[idx].x = std::min(vmax, me[idx].x); me[idx].y = std::min(vmax, me[idx].y); @@ -1012,21 +960,17 @@ struct knPdataClampMaxVec3 : public KernelBase { return vmax; } typedef Real type1; - void runMessage() - { - debMsg("Executing kernel knPdataClampMaxVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, vmax); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, vmax); + } } ParticleDataImpl<Vec3> &me; const Real vmax; @@ -1071,7 +1015,7 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase { ParticleDataImpl<T> &me, const S &other, const ParticleDataImpl<int> &t, - const int itype) const + const int itype) { if (t[idx] & itype) me[idx] = other; @@ -1096,21 +1040,17 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase { return itype; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel knPdataSetScalarIntFlag ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, t, itype); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, t, itype); + } } ParticleDataImpl<T> &me; const S &other; @@ -1223,29 +1163,21 @@ template<typename T> struct KnPtsSum : public KernelBase { return itype; } typedef int type2; - void runMessage() - { - debMsg("Executing kernel KnPtsSum ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, t, itype, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSum(KnPtsSum &o, tbb::split) - : KernelBase(o), val(o.val), t(o.t), itype(o.itype), result(T(0.)) - { - } - void join(const KnPtsSum &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + T result = T(0.); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, t, itype, result); +#pragma omp critical + { + this->result += result; + } + } } const ParticleDataImpl<T> &val; const ParticleDataImpl<int> *t; @@ -1275,28 +1207,21 @@ template<typename T> struct KnPtsSumSquare : public KernelBase { return val; } typedef ParticleDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel KnPtsSumSquare ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSumSquare(KnPtsSumSquare &o, tbb::split) : KernelBase(o), val(o.val), result(0.) - { - } - void join(const KnPtsSumSquare &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + Real result = 0.; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, result); +#pragma omp critical + { + this->result += result; + } + } } const ParticleDataImpl<T> &val; Real result; @@ -1324,28 +1249,21 @@ template<typename T> struct KnPtsSumMagnitude : public KernelBase { return val; } typedef ParticleDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel KnPtsSumMagnitude ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, result); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - KnPtsSumMagnitude(KnPtsSumMagnitude &o, tbb::split) : KernelBase(o), val(o.val), result(0.) - { - } - void join(const KnPtsSumMagnitude &o) - { - result += o.result; + const IndexInt _sz = size; +#pragma omp parallel + { + Real result = 0.; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, result); +#pragma omp critical + { + this->result += result; + } + } } const ParticleDataImpl<T> &val; Real result; @@ -1392,29 +1310,21 @@ struct CompPdata_Min : public KernelBase { return val; } typedef ParticleDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel CompPdata_Min ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompPdata_Min(CompPdata_Min &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const CompPdata_Min &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const ParticleDataImpl<T> &val; Real minVal; @@ -1447,29 +1357,21 @@ struct CompPdata_Max : public KernelBase { return val; } typedef ParticleDataImpl<T> type0; - void runMessage() - { - debMsg("Executing kernel CompPdata_Max ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompPdata_Max(CompPdata_Max &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const CompPdata_Max &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const ParticleDataImpl<T> &val; Real maxVal; @@ -1545,29 +1447,21 @@ struct CompPdata_MinVec3 : public KernelBase { return val; } typedef ParticleDataImpl<Vec3> type0; - void runMessage() - { - debMsg("Executing kernel CompPdata_MinVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, minVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompPdata_MinVec3(CompPdata_MinVec3 &o, tbb::split) - : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) - { - } - void join(const CompPdata_MinVec3 &o) - { - minVal = min(minVal, o.minVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real minVal = std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, minVal); +#pragma omp critical + { + this->minVal = min(minVal, this->minVal); + } + } } const ParticleDataImpl<Vec3> &val; Real minVal; @@ -1599,29 +1493,21 @@ struct CompPdata_MaxVec3 : public KernelBase { return val; } typedef ParticleDataImpl<Vec3> type0; - void runMessage() - { - debMsg("Executing kernel CompPdata_MaxVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, val, maxVal); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CompPdata_MaxVec3(CompPdata_MaxVec3 &o, tbb::split) - : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) - { - } - void join(const CompPdata_MaxVec3 &o) - { - maxVal = max(maxVal, o.maxVal); + const IndexInt _sz = size; +#pragma omp parallel + { + Real maxVal = -std::numeric_limits<Real>::max(); +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, val, maxVal); +#pragma omp critical + { + this->maxVal = max(maxVal, this->maxVal); + } + } } const ParticleDataImpl<Vec3> &val; Real maxVal; diff --git a/extern/mantaflow/preprocessed/particle.h b/extern/mantaflow/preprocessed/particle.h index 7e0c64e6d03..1e3abcaca20 100644 --- a/extern/mantaflow/preprocessed/particle.h +++ b/extern/mantaflow/preprocessed/particle.h @@ -205,7 +205,6 @@ class ParticleBase : public PbClass { //! custom seed for particle systems, used by plugins int mSeed; //! fix global random seed storage, used mainly by functions in this class static int globalSeed; - public: PbArgs _args; } @@ -629,7 +628,6 @@ template<class S> class ParticleSystem : public ParticleBase { std::vector<S> mData; //! reduce storage , called by doCompress virtual void compress(); - public: PbArgs _args; } @@ -920,7 +918,6 @@ class ParticleIndexSystem : public ParticleSystem<ParticleIndexData> { return -1; } }; - public: PbArgs _args; } @@ -985,7 +982,6 @@ template<class DATA, class CON> class ConnectedParticleSystem : public ParticleS protected: std::vector<CON> mSegments; virtual void compress(); - public: PbArgs _args; } @@ -1075,7 +1071,6 @@ class ParticleDataBase : public PbClass { protected: ParticleBase *mpParticleSys; - public: PbArgs _args; } @@ -1848,7 +1843,6 @@ template<class T> class ParticleDataImpl : public ParticleDataBase { //! optionally , we might have an associated grid from which to grab new data Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3 bool mGridSourceMAC; - public: PbArgs _args; } @@ -1912,19 +1906,17 @@ template<class S> void ParticleSystem<S>::transformPositions(Vec3i dimOld, Vec3i // check for deletion/invalid position, otherwise return velocity -template<class S> struct _GridAdvectKernel : public KernelBase { - _GridAdvectKernel(const KernelBase &base, - std::vector<S> &p, - const MACGrid &vel, - const FlagGrid &flags, - const Real dt, - const bool deleteInObstacle, - const bool stopInObstacle, - const bool skipNew, - const ParticleDataImpl<int> *ptype, - const int exclude, - std::vector<Vec3> &u) - : KernelBase(base), +template<class S> struct GridAdvectKernel : public KernelBase { + GridAdvectKernel(std::vector<S> &p, + const MACGrid &vel, + const FlagGrid &flags, + const Real dt, + const bool deleteInObstacle, + const bool stopInObstacle, + const bool skipNew, + const ParticleDataImpl<int> *ptype, + const int exclude) + : KernelBase(p.size()), p(p), vel(vel), flags(flags), @@ -1934,8 +1926,10 @@ template<class S> struct _GridAdvectKernel : public KernelBase { skipNew(skipNew), ptype(ptype), exclude(exclude), - u(u) + u((size)) { + runMessage(); + run(); } inline void op(IndexInt idx, std::vector<S> &p, @@ -1947,7 +1941,7 @@ template<class S> struct _GridAdvectKernel : public KernelBase { const bool skipNew, const ParticleDataImpl<int> *ptype, const int exclude, - std::vector<Vec3> &u) const + std::vector<Vec3> &u) { if ((p[idx].flag & ParticleBase::PDELETE) || (ptype && ((*ptype)[idx] & exclude)) || (skipNew && (p[idx].flag & ParticleBase::PNEW))) { @@ -1968,66 +1962,6 @@ template<class S> struct _GridAdvectKernel : public KernelBase { } u[idx] = vel.getInterpolated(p[idx].pos) * dt; } - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u); - } - void run() - { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); - } - std::vector<S> &p; - const MACGrid &vel; - const FlagGrid &flags; - const Real dt; - const bool deleteInObstacle; - const bool stopInObstacle; - const bool skipNew; - const ParticleDataImpl<int> *ptype; - const int exclude; - std::vector<Vec3> &u; -}; -template<class S> struct GridAdvectKernel : public KernelBase { - GridAdvectKernel(std::vector<S> &p, - const MACGrid &vel, - const FlagGrid &flags, - const Real dt, - const bool deleteInObstacle, - const bool stopInObstacle, - const bool skipNew, - const ParticleDataImpl<int> *ptype, - const int exclude) - : KernelBase(p.size()), - _inner(KernelBase(p.size()), - p, - vel, - flags, - dt, - deleteInObstacle, - stopInObstacle, - skipNew, - ptype, - exclude, - u), - p(p), - vel(vel), - flags(flags), - dt(dt), - deleteInObstacle(deleteInObstacle), - stopInObstacle(stopInObstacle), - skipNew(skipNew), - ptype(ptype), - exclude(exclude), - u((size)) - { - runMessage(); - run(); - } - void run() - { - _inner.run(); - } inline operator std::vector<Vec3>() { return u; @@ -2081,14 +2015,18 @@ template<class S> struct GridAdvectKernel : public KernelBase { return exclude; } typedef int type8; - void runMessage() + void runMessage(){}; + void run() { - debMsg("Executing kernel GridAdvectKernel ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - _GridAdvectKernel<S> _inner; + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u); + } + } std::vector<S> &p; const MACGrid &vel; const FlagGrid &flags; @@ -2112,7 +2050,7 @@ template<class S> struct KnDeleteInObstacle : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags) const + inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags) { if (p[idx].flag & ParticleBase::PDELETE) return; @@ -2130,21 +2068,17 @@ template<class S> struct KnDeleteInObstacle : public KernelBase { return flags; } typedef FlagGrid type1; - void runMessage() - { - debMsg("Executing kernel KnDeleteInObstacle ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, flags); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, flags); + } } std::vector<S> &p; const FlagGrid &flags; @@ -2189,7 +2123,7 @@ template<class S> struct KnClampPositions : public KernelBase { ParticleDataImpl<Vec3> *posOld = nullptr, bool stopInObstacle = true, const ParticleDataImpl<int> *ptype = nullptr, - const int exclude = 0) const + const int exclude = 0) { if (p[idx].flag & ParticleBase::PDELETE) return; @@ -2235,21 +2169,17 @@ template<class S> struct KnClampPositions : public KernelBase { return exclude; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnClampPositions ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, flags, posOld, stopInObstacle, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, flags, posOld, stopInObstacle, ptype, exclude); + } } std::vector<S> &p; const FlagGrid &flags; @@ -2341,13 +2271,7 @@ template<class S> struct KnProjectParticles : public KernelBase { return rand; } typedef RandomStream type2; - void runMessage() - { - debMsg("Executing kernel KnProjectParticles ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; + void runMessage(){}; void run() { const IndexInt _sz = size; @@ -2389,7 +2313,7 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase { const Real bnd, const bool *axis, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (!part.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) return; @@ -2438,21 +2362,17 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase { return exclude; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnProjectOutOfBnd ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, part, flags, bnd, axis, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, part, flags, bnd, axis, ptype, exclude); + } } ParticleSystem<S> ∂ const FlagGrid &flags; diff --git a/extern/mantaflow/preprocessed/plugin/advection.cpp b/extern/mantaflow/preprocessed/plugin/advection.cpp index dd891e22088..6a548841bef 100644 --- a/extern/mantaflow/preprocessed/plugin/advection.cpp +++ b/extern/mantaflow/preprocessed/plugin/advection.cpp @@ -59,7 +59,7 @@ template<class T> struct SemiLagrange : public KernelBase { Real dt, bool isLevelset, int orderSpace, - int orderTrace) const + int orderTrace) { if (orderTrace == 1) { // traceback position @@ -117,37 +117,35 @@ template<class T> struct SemiLagrange : public KernelBase { return orderTrace; } typedef int type7; - void runMessage() - { - debMsg("Executing kernel SemiLagrange ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; Grid<T> &dst; @@ -189,7 +187,7 @@ struct SemiLagrangeMAC : public KernelBase { const MACGrid &src, Real dt, int orderSpace, - int orderTrace) const + int orderTrace) { if (orderTrace == 1) { // get currect velocity at MAC position @@ -259,37 +257,35 @@ struct SemiLagrangeMAC : public KernelBase { return orderTrace; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel SemiLagrangeMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; MACGrid &dst; @@ -331,7 +327,7 @@ template<class T> struct MacCormackCorrect : public KernelBase { const Grid<T> &bwd, Real strength, bool isLevelSet, - bool isMAC = false) const + bool isMAC = false) { dst[idx] = fwd[idx]; @@ -380,21 +376,17 @@ template<class T> struct MacCormackCorrect : public KernelBase { return isMAC; } typedef bool type7; - void runMessage() - { - debMsg("Executing kernel MacCormackCorrect ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); + } } const FlagGrid &flags; Grid<T> &dst; @@ -440,7 +432,7 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase { const Grid<T> &bwd, Real strength, bool isLevelSet, - bool isMAC = false) const + bool isMAC = false) { bool skip[3] = {false, false, false}; @@ -505,37 +497,35 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase { return isMAC; } typedef bool type7; - void runMessage() - { - debMsg("Executing kernel MacCormackCorrectMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<T> &dst; const Grid<T> &old; @@ -762,7 +752,7 @@ template<class T> struct MacCormackClamp : public KernelBase { const Grid<T> &orig, const Grid<T> &fwd, Real dt, - const int clampMode) const + const int clampMode) { T dval = dst(i, j, k); Vec3i gridUpper = flags.getSize() - 1; @@ -830,37 +820,35 @@ template<class T> struct MacCormackClamp : public KernelBase { return clampMode; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel MacCormackClamp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; Grid<T> &dst; @@ -901,7 +889,7 @@ struct MacCormackClampMAC : public KernelBase { const MACGrid &orig, const MACGrid &fwd, Real dt, - const int clampMode) const + const int clampMode) { Vec3 pos(i, j, k); Vec3 dval = dst(i, j, k); @@ -957,37 +945,35 @@ struct MacCormackClampMAC : public KernelBase { return clampMode; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel MacCormackClampMAC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; MACGrid &dst; @@ -1016,27 +1002,39 @@ void fnAdvectSemiLagrange(FluidSolver *parent, bool levelset = orig.getType() & GridBase::TypeLevelset; // forward step - GridType fwd(parent); - SemiLagrange<T>(flags, vel, fwd, orig, dt, levelset, orderSpace, orderTrace); + GridType *fwd = new GridType(parent, true, false, false); + SemiLagrange<T>(flags, vel, *fwd, orig, dt, levelset, orderSpace, orderTrace); if (order == 1) { - orig.swap(fwd); +#if OPENMP && OPENMP_OFFLOAD + orig.copyFrom(*fwd, true, false); +#else + orig.swap(*fwd); +#endif } else if (order == 2) { // MacCormack GridType bwd(parent); - GridType newGrid(parent); + GridType *newGrid = new GridType(parent, true, false, false); // bwd <- backwards step - SemiLagrange<T>(flags, vel, bwd, fwd, -dt, levelset, orderSpace, orderTrace); + SemiLagrange<T>(flags, vel, bwd, *fwd, -dt, levelset, orderSpace, orderTrace); // newGrid <- compute correction - MacCormackCorrect<T>(flags, newGrid, orig, fwd, bwd, strength, levelset); + MacCormackCorrect<T>(flags, *newGrid, orig, *fwd, bwd, strength, levelset); // clamp values - MacCormackClamp<T>(flags, vel, newGrid, orig, fwd, dt, clampMode); - - orig.swap(newGrid); - } + MacCormackClamp<T>(flags, vel, *newGrid, orig, *fwd, dt, clampMode); + +#if OPENMP && OPENMP_OFFLOAD + orig.copyFrom(*newGrid, true, false); +#else + orig.swap(*newGrid); +#endif + if (newGrid) + delete newGrid; + } + if (fwd) + delete fwd; } // outflow functions @@ -1087,7 +1085,7 @@ struct extrapolateVelConvectiveBC : public KernelBase { const MACGrid &vel, MACGrid &velDst, const MACGrid &velPrev, - Real timeStep) const + Real timeStep) { if (flags.isOutflow(i, j, k)) { const Vec3 bulkVel = getBulkVel(flags, vel, i, j, k); @@ -1154,37 +1152,35 @@ struct extrapolateVelConvectiveBC : public KernelBase { return timeStep; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel extrapolateVelConvectiveBC ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velDst, velPrev, timeStep); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velDst, velPrev, timeStep); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velDst, velPrev, timeStep); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velDst, velPrev, timeStep); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; MACGrid &velDst; @@ -1200,8 +1196,7 @@ struct copyChangedVels : public KernelBase { runMessage(); run(); } - inline void op( - int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel) const + inline void op(int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel) { if (flags.isOutflow(i, j, k)) vel(i, j, k) = velDst(i, j, k); @@ -1221,37 +1216,35 @@ struct copyChangedVels : public KernelBase { return vel; } typedef MACGrid type2; - void runMessage() - { - debMsg("Executing kernel copyChangedVels ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, velDst, vel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, velDst, vel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, velDst, vel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, velDst, vel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; const MACGrid &velDst; MACGrid &vel; @@ -1275,7 +1268,7 @@ struct knResetPhiInObs : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf) const + inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf) { if (flags.isObstacle(i, j, k) && (sdf(i, j, k) < 0.)) { sdf(i, j, k) = 0.1; @@ -1291,37 +1284,35 @@ struct knResetPhiInObs : public KernelBase { return sdf; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel knResetPhiInObs ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, sdf); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, sdf); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, sdf); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, sdf); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> &sdf; }; @@ -1378,33 +1369,45 @@ void fnAdvectSemiLagrange<MACGrid>(FluidSolver *parent, Real dt = parent->getDt(); // forward step - MACGrid fwd(parent); - SemiLagrangeMAC(flags, vel, fwd, orig, dt, orderSpace, orderTrace); + MACGrid *fwd = new MACGrid(parent, true, false, false); + SemiLagrangeMAC(flags, vel, *fwd, orig, dt, orderSpace, orderTrace); if (orderSpace != 1) { debMsg("Warning higher order for MAC grids not yet implemented...", 1); } if (order == 1) { - applyOutflowBC(flags, fwd, orig, dt); - orig.swap(fwd); + applyOutflowBC(flags, *fwd, orig, dt); +#if OPENMP && OPENMP_OFFLOAD + orig.copyFrom(*fwd, true, false); +#else + orig.swap(*fwd); +#endif } else if (order == 2) { // MacCormack MACGrid bwd(parent); - MACGrid newGrid(parent); + MACGrid *newGrid = new MACGrid(parent, true, false, false); // bwd <- backwards step - SemiLagrangeMAC(flags, vel, bwd, fwd, -dt, orderSpace, orderTrace); + SemiLagrangeMAC(flags, vel, bwd, *fwd, -dt, orderSpace, orderTrace); // newGrid <- compute correction - MacCormackCorrectMAC<Vec3>(flags, newGrid, orig, fwd, bwd, strength, false, true); + MacCormackCorrectMAC<Vec3>(flags, *newGrid, orig, *fwd, bwd, strength, false, true); // clamp values - MacCormackClampMAC(flags, vel, newGrid, orig, fwd, dt, clampMode); - - applyOutflowBC(flags, newGrid, orig, dt); - orig.swap(newGrid); - } + MacCormackClampMAC(flags, vel, *newGrid, orig, *fwd, dt, clampMode); + + applyOutflowBC(flags, *newGrid, orig, dt); +#if OPENMP && OPENMP_OFFLOAD + orig.copyFrom(*newGrid, true, false); +#else + orig.swap(*newGrid); +#endif + if (newGrid) + delete newGrid; + } + if (fwd) + delete fwd; } //! Perform semi-lagrangian advection of target Real- or Vec3 grid diff --git a/extern/mantaflow/preprocessed/plugin/apic.cpp b/extern/mantaflow/preprocessed/plugin/apic.cpp index 9df6b597e05..5da507bb44b 100644 --- a/extern/mantaflow/preprocessed/plugin/apic.cpp +++ b/extern/mantaflow/preprocessed/plugin/apic.cpp @@ -239,13 +239,7 @@ struct knApicMapLinearVec3ToMACGrid : public KernelBase { return boundaryWidth; } typedef int type9; - void runMessage() - { - debMsg("Executing kernel knApicMapLinearVec3ToMACGrid ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; + void runMessage(){}; void run() { const IndexInt _sz = size; @@ -370,7 +364,7 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase { const FlagGrid &flags, const ParticleDataImpl<int> *ptype, const int exclude, - const int boundaryWidth) const + const int boundaryWidth) { if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) return; @@ -509,21 +503,17 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase { return boundaryWidth; } typedef int type9; - void runMessage() - { - debMsg("Executing kernel knApicMapLinearMACGridToVec3 ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth); + } } ParticleDataImpl<Vec3> &vp; ParticleDataImpl<Vec3> &cpx; diff --git a/extern/mantaflow/preprocessed/plugin/extforces.cpp b/extern/mantaflow/preprocessed/plugin/extforces.cpp index 88935fa7ae9..a0fb4659aa2 100644 --- a/extern/mantaflow/preprocessed/plugin/extforces.cpp +++ b/extern/mantaflow/preprocessed/plugin/extforces.cpp @@ -52,7 +52,7 @@ struct KnApplyForceField : public KernelBase { const Grid<Vec3> &force, const Grid<Real> *include, bool additive, - bool isMAC) const + bool isMAC) { bool curFluid = flags.isFluid(i, j, k); bool curEmpty = flags.isEmpty(i, j, k); @@ -105,37 +105,35 @@ struct KnApplyForceField : public KernelBase { return isMAC; } typedef bool type5; - void runMessage() - { - debMsg("Executing kernel KnApplyForceField ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force, include, additive, isMAC); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force, include, additive, isMAC); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force, include, additive, isMAC); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force, include, additive, isMAC); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; MACGrid &vel; const Grid<Vec3> &force; @@ -165,7 +163,7 @@ struct KnApplyForce : public KernelBase { MACGrid &vel, Vec3 force, const Grid<Real> *exclude, - bool additive) const + bool additive) { bool curFluid = flags.isFluid(i, j, k); bool curEmpty = flags.isEmpty(i, j, k); @@ -206,37 +204,35 @@ struct KnApplyForce : public KernelBase { return additive; } typedef bool type4; - void runMessage() - { - debMsg("Executing kernel KnApplyForce ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force, exclude, additive); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force, exclude, additive); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force, exclude, additive); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force, exclude, additive); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; MACGrid &vel; Vec3 force; @@ -346,7 +342,7 @@ struct KnAddBuoyancy : public KernelBase { const FlagGrid &flags, const Grid<Real> &factor, MACGrid &vel, - Vec3 strength) const + Vec3 strength) { if (!flags.isFluid(i, j, k)) return; @@ -377,37 +373,35 @@ struct KnAddBuoyancy : public KernelBase { return strength; } typedef Vec3 type3; - void runMessage() - { - debMsg("Executing kernel KnAddBuoyancy ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, factor, vel, strength); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, factor, vel, strength); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, factor, vel, strength); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, factor, vel, strength); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const Grid<Real> &factor; MACGrid &vel; @@ -662,7 +656,7 @@ struct KnSetInflow : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val) const + inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val) { Vec3i p(i, j, k); if (p[dim] == p0 || p[dim] == p0 + 1) @@ -688,37 +682,35 @@ struct KnSetInflow : public KernelBase { return val; } typedef Vec3 type3; - void runMessage() - { - debMsg("Executing kernel KnSetInflow ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, vel, dim, p0, val); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, vel, dim, p0, val); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, vel, dim, p0, val); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, vel, dim, p0, val); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid &vel; int dim; int p0; @@ -784,8 +776,7 @@ struct KnSetWallBcs : public KernelBase { runMessage(); run(); } - inline void op( - int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel) const + inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel) { bool curFluid = flags.isFluid(i, j, k); @@ -848,37 +839,35 @@ struct KnSetWallBcs : public KernelBase { return obvel; } typedef MACGrid type2; - void runMessage() - { - debMsg("Executing kernel KnSetWallBcs ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, obvel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, obvel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, obvel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, obvel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; MACGrid &vel; const MACGrid *obvel; @@ -912,7 +901,7 @@ struct KnSetWallBcsFrac : public KernelBase { MACGrid &velTarget, const MACGrid *obvel, const Grid<Real> *phiObs, - const int &boundaryWidth = 0) const + const int &boundaryWidth = 0) { bool curFluid = flags.isFluid(i, j, k); bool curObs = flags.isObstacle(i, j, k); @@ -1025,37 +1014,35 @@ struct KnSetWallBcsFrac : public KernelBase { return boundaryWidth; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnSetWallBcsFrac ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; MACGrid &velTarget; @@ -1127,8 +1114,7 @@ struct KnAddForceIfLower : public KernelBase { runMessage(); run(); } - inline void op( - int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force) const + inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force) { bool curFluid = flags.isFluid(i, j, k); bool curEmpty = flags.isEmpty(i, j, k); @@ -1173,37 +1159,35 @@ struct KnAddForceIfLower : public KernelBase { return force; } typedef Grid<Vec3> type2; - void runMessage() - { - debMsg("Executing kernel KnAddForceIfLower ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, force); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, force); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; MACGrid &vel; const Grid<Vec3> &force; @@ -1266,7 +1250,7 @@ struct KnConfForce : public KernelBase { const Grid<Real> &grid, const Grid<Vec3> &curl, Real str, - const Grid<Real> *strGrid) const + const Grid<Real> *strGrid) { Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k), grid(i, j + 1, k) - grid(i, j - 1, k), @@ -1303,37 +1287,35 @@ struct KnConfForce : public KernelBase { return strGrid; } typedef Grid<Real> type4; - void runMessage() - { - debMsg("Executing kernel KnConfForce ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, force, grid, curl, str, strGrid); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, force, grid, curl, str, strGrid); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, force, grid, curl, str, strGrid); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, force, grid, curl, str, strGrid); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Vec3> &force; const Grid<Real> &grid; const Grid<Vec3> &curl; @@ -1514,7 +1496,7 @@ struct KnDissolveSmoke : public KernelBase { int speed, bool logFalloff, float dydx, - float fac) const + float fac) { bool curFluid = flags.isFluid(i, j, k); @@ -1602,37 +1584,35 @@ struct KnDissolveSmoke : public KernelBase { return fac; } typedef float type9; - void runMessage() - { - debMsg("Executing kernel KnDissolveSmoke ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> &density; Grid<Real> *heat; diff --git a/extern/mantaflow/preprocessed/plugin/fire.cpp b/extern/mantaflow/preprocessed/plugin/fire.cpp index f907bdf0504..8f924c57ddc 100644 --- a/extern/mantaflow/preprocessed/plugin/fire.cpp +++ b/extern/mantaflow/preprocessed/plugin/fire.cpp @@ -71,7 +71,7 @@ struct KnProcessBurn : public KernelBase { Real ignitionTemp, Real maxTemp, Real dt, - Vec3 flameSmokeColor) const + Vec3 flameSmokeColor) { // Save initial values Real origFuel = fuel(i, j, k); @@ -179,19 +179,44 @@ struct KnProcessBurn : public KernelBase { return flameSmokeColor; } typedef Vec3 type12; - void runMessage() - { - debMsg("Executing kernel KnProcessBurn ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, + j, + k, + fuel, + density, + react, + red, + green, + blue, + heat, + burningRate, + flameSmoke, + ignitionTemp, + maxTemp, + dt, + flameSmokeColor); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for for (int j = 1; j < _maxY; j++) for (int i = 1; i < _maxX; i++) op(i, @@ -210,35 +235,8 @@ struct KnProcessBurn : public KernelBase { maxTemp, dt, flameSmokeColor); + } } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, - j, - k, - fuel, - density, - react, - red, - green, - blue, - heat, - burningRate, - flameSmoke, - ignitionTemp, - maxTemp, - dt, - flameSmokeColor); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); } Grid<Real> &fuel; Grid<Real> &density; @@ -344,7 +342,7 @@ struct KnUpdateFlame : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame) const + inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame) { if (react(i, j, k) > 0.0f) flame(i, j, k) = pow(react(i, j, k), 0.5f); @@ -361,37 +359,35 @@ struct KnUpdateFlame : public KernelBase { return flame; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel KnUpdateFlame ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, react, flame); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, react, flame); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, react, flame); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, react, flame); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<Real> &react; Grid<Real> &flame; }; diff --git a/extern/mantaflow/preprocessed/plugin/flip.cpp b/extern/mantaflow/preprocessed/plugin/flip.cpp index 8757958d4b0..e63d8236f28 100644 --- a/extern/mantaflow/preprocessed/plugin/flip.cpp +++ b/extern/mantaflow/preprocessed/plugin/flip.cpp @@ -283,7 +283,7 @@ struct knClearFluidFlags : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0) const + inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0) { if (flags.isFluid(i, j, k)) { flags(i, j, k) = (flags(i, j, k) | FlagGrid::TypeEmpty) & ~FlagGrid::TypeFluid; @@ -299,37 +299,35 @@ struct knClearFluidFlags : public KernelBase { return dummy; } typedef int type1; - void runMessage() - { - debMsg("Executing kernel knClearFluidFlags ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, dummy); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dummy); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, dummy); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, dummy); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } FlagGrid &flags; int dummy; }; @@ -342,7 +340,7 @@ struct knSetNbObstacle : public KernelBase { run(); } inline void op( - int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs) const + int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs) { if (phiObs(i, j, k) > 0.) return; @@ -381,37 +379,35 @@ struct knSetNbObstacle : public KernelBase { return phiObs; } typedef Grid<Real> type2; - void runMessage() - { - debMsg("Executing kernel knSetNbObstacle ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, nflags, flags, phiObs); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, nflags, flags, phiObs); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, nflags, flags, phiObs); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, nflags, flags, phiObs); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } FlagGrid &nflags; const FlagGrid &flags; const Grid<Real> &phiObs; @@ -438,7 +434,11 @@ void markFluidCells(const BasicParticleSystem &parts, if (phiObs) { FlagGrid tmp(flags); knSetNbObstacle(tmp, flags, *phiObs); +#if OPENMP && OPENMP_OFFLOAD + flags.copyFrom(tmp, true, false); +#else flags.swap(tmp); +#endif } } static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds) @@ -818,33 +818,37 @@ struct ComputeUnionLevelsetPindex : public KernelBase { LevelsetGrid &phi, const Real radius, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell Real phiv = radius * 1.0; // outside - const int r = int(radius) + 1; - FOR_NEIGHBORS(phi, r) - { + int r = int(radius) + 1; + int rZ = phi.is3D() ? r : 0; + for (int zj = k - rZ; zj <= k + rZ; zj++) + for (int yj = j - r; yj <= j + r; yj++) + for (int xj = i - r; xj <= i + r; xj++) { + if (!phi.isInBounds(Vec3i(xj, yj, zj))) + continue; - // note, for the particle indices in indexSys the access is periodic (ie, dont skip for eg - // inBounds(sx,10,10) - IndexInt isysIdxS = index.index(xj, yj, zj); - IndexInt pStart = index(isysIdxS), pEnd = 0; - if (phi.isInBounds(isysIdxS + 1)) - pEnd = index(isysIdxS + 1); - else - pEnd = indexSys.size(); - - // now loop over particles in cell - for (IndexInt p = pStart; p < pEnd; ++p) { - const int psrc = indexSys[p].sourceIndex; - if (ptype && ((*ptype)[psrc] & exclude)) - continue; - const Vec3 pos = parts[psrc].pos; - phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius); - } - } + // note, for the particle indices in indexSys the access is periodic (ie, dont skip for + // eg inBounds(sx,10,10) + IndexInt isysIdxS = index.index(xj, yj, zj); + IndexInt pStart = index(isysIdxS), pEnd = 0; + if (phi.isInBounds(isysIdxS + 1)) + pEnd = index(isysIdxS + 1); + else + pEnd = indexSys.size(); + + // now loop over particles in cell + for (IndexInt p = pStart; p < pEnd; ++p) { + const int psrc = indexSys[p].sourceIndex; + if (ptype && ((*ptype)[psrc] & exclude)) + continue; + const Vec3 pos = parts[psrc].pos; + phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius); + } + } phi(i, j, k) = phiv; } inline const Grid<int> &getArg0() @@ -882,37 +886,35 @@ struct ComputeUnionLevelsetPindex : public KernelBase { return exclude; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel ComputeUnionLevelsetPindex ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const Grid<int> &index; const BasicParticleSystem &parts; const ParticleIndexSystem &indexSys; @@ -1015,42 +1017,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase { const ParticleDataImpl<int> *ptype, const int exclude, Grid<Vec3> *save_pAcc = nullptr, - Grid<Real> *save_rAcc = nullptr) const + Grid<Real> *save_rAcc = nullptr) { const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell Real phiv = radius * 1.0; // outside // loop over neighborhood, similar to ComputeUnionLevelsetPindex const Real sradiusInv = 1. / (4. * radius * radius); - const int r = int(radius) + 1; + int r = int(1. * radius) + 1; + int rZ = phi.is3D() ? r : 0; // accumulators Real wacc = 0.; Vec3 pacc = Vec3(0.); Real racc = 0.; - FOR_NEIGHBORS(phi, r) - { + for (int zj = k - rZ; zj <= k + rZ; zj++) + for (int yj = j - r; yj <= j + r; yj++) + for (int xj = i - r; xj <= i + r; xj++) { + if (!phi.isInBounds(Vec3i(xj, yj, zj))) + continue; - IndexInt isysIdxS = index.index(xj, yj, zj); - IndexInt pStart = index(isysIdxS), pEnd = 0; - if (phi.isInBounds(isysIdxS + 1)) - pEnd = index(isysIdxS + 1); - else - pEnd = indexSys.size(); - for (IndexInt p = pStart; p < pEnd; ++p) { - IndexInt psrc = indexSys[p].sourceIndex; - if (ptype && ((*ptype)[psrc] & exclude)) - continue; + IndexInt isysIdxS = index.index(xj, yj, zj); + IndexInt pStart = index(isysIdxS), pEnd = 0; + if (phi.isInBounds(isysIdxS + 1)) + pEnd = index(isysIdxS + 1); + else + pEnd = indexSys.size(); + for (IndexInt p = pStart; p < pEnd; ++p) { + IndexInt psrc = indexSys[p].sourceIndex; + if (ptype && ((*ptype)[psrc] & exclude)) + continue; - Vec3 pos = parts[psrc].pos; - Real s = normSquare(gridPos - pos) * sradiusInv; - // Real w = std::max(0., cubed(1.-s) ); - Real w = std::max(0., (1. - s)); // a bit smoother - wacc += w; - racc += radius * w; - pacc += pos * w; - } - } + Vec3 pos = parts[psrc].pos; + Real s = normSquare(gridPos - pos) * sradiusInv; + // Real w = std::max(0., cubed(1.-s) ); + Real w = std::max(0., (1. - s)); // a bit smoother + wacc += w; + racc += radius * w; + pacc += pos * w; + } + } if (wacc > VECTOR_EPSILON) { racc /= wacc; @@ -1109,37 +1115,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase { return save_rAcc; } typedef Grid<Real> type8; - void runMessage() - { - debMsg("Executing kernel ComputeAveragedLevelsetWeight ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, + j, + k, + parts, + index, + indexSys, + phi, + radius, + ptype, + exclude, + save_pAcc, + save_rAcc); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const BasicParticleSystem &parts; const Grid<int> &index; const ParticleIndexSystem &indexSys; @@ -1165,7 +1180,7 @@ template<class T> struct knSmoothGrid : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const + inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) { T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k); if (me.is3D()) { @@ -1188,37 +1203,35 @@ template<class T> struct knSmoothGrid : public KernelBase { return factor; } typedef Real type2; - void runMessage() - { - debMsg("Executing kernel knSmoothGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, me, tmp, factor); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, me, tmp, factor); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, me, tmp, factor); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, me, tmp, factor); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<T> &me; Grid<T> &tmp; Real factor; @@ -1231,7 +1244,7 @@ template<class T> struct knSmoothGridNeg : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const + inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) { T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k); if (me.is3D()) { @@ -1258,37 +1271,35 @@ template<class T> struct knSmoothGridNeg : public KernelBase { return factor; } typedef Real type2; - void runMessage() - { - debMsg("Executing kernel knSmoothGridNeg ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, me, tmp, factor); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, me, tmp, factor); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, me, tmp, factor); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, me, tmp, factor); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<T> &me; Grid<T> &tmp; Real factor; @@ -1396,7 +1407,7 @@ struct correctLevelset : public KernelBase { const Grid<Real> &rAcc, const Real radius, const Real t_low, - const Real t_high) const + const Real t_high) { if (rAcc(i, j, k) <= VECTOR_EPSILON) return; // outside nothing happens @@ -1463,37 +1474,35 @@ struct correctLevelset : public KernelBase { return t_high; } typedef Real type5; - void runMessage() - { - debMsg("Executing kernel correctLevelset ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } LevelsetGrid φ const Grid<Vec3> &pAcc; const Grid<Real> &rAcc; @@ -1624,7 +1633,7 @@ struct knPushOutofObs : public KernelBase { const Real shift, const Real thresh, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (!parts.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) return; @@ -1675,21 +1684,17 @@ struct knPushOutofObs : public KernelBase { return exclude; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel knPushOutofObs ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, parts, flags, phiObs, shift, thresh, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, parts, flags, phiObs, shift, thresh, ptype, exclude); + } } BasicParticleSystem &parts; const FlagGrid &flags; @@ -1759,10 +1764,7 @@ template<class T> struct knSafeDivReal : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, - Grid<T> &me, - const Grid<Real> &other, - Real cutoff = VECTOR_EPSILON) const + inline void op(IndexInt idx, Grid<T> &me, const Grid<Real> &other, Real cutoff = VECTOR_EPSILON) { if (other[idx] < cutoff) { me[idx] = 0.; @@ -1787,21 +1789,17 @@ template<class T> struct knSafeDivReal : public KernelBase { return cutoff; } typedef Real type2; - void runMessage() - { - debMsg("Executing kernel knSafeDivReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, me, other, cutoff); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, me, other, cutoff); + } } Grid<T> &me; const Grid<Real> &other; @@ -1879,13 +1877,7 @@ struct knMapLinearVec3ToMACGrid : public KernelBase { return exclude; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel knMapLinearVec3ToMACGrid ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; + void runMessage(){}; void run() { const IndexInt _sz = size; @@ -2022,13 +2014,7 @@ template<class T> struct knMapLinear : public KernelBase { return psource; } typedef ParticleDataImpl<T> type4; - void runMessage() - { - debMsg("Executing kernel knMapLinear ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; + void runMessage(){}; void run() { const IndexInt _sz = size; @@ -2153,7 +2139,7 @@ template<class T> struct knMapFromGrid : public KernelBase { inline void op(IndexInt idx, const BasicParticleSystem &p, const Grid<T> &gsrc, - ParticleDataImpl<T> &target) const + ParticleDataImpl<T> &target) { if (!p.isActive(idx)) return; @@ -2174,21 +2160,17 @@ template<class T> struct knMapFromGrid : public KernelBase { return target; } typedef ParticleDataImpl<T> type2; - void runMessage() - { - debMsg("Executing kernel knMapFromGrid ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, gsrc, target); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, gsrc, target); + } } const BasicParticleSystem &p; const Grid<T> &gsrc; @@ -2298,7 +2280,7 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase { const MACGrid &vel, ParticleDataImpl<Vec3> &pvel, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) return; @@ -2335,21 +2317,17 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase { return exclude; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel knMapLinearMACGridToVec3_PIC ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, flags, vel, pvel, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, flags, vel, pvel, ptype, exclude); + } } const BasicParticleSystem &p; const FlagGrid &flags; @@ -2438,7 +2416,7 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase { ParticleDataImpl<Vec3> &pvel, const Real flipRatio, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) return; @@ -2486,21 +2464,17 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase { return exclude; } typedef int type7; - void runMessage() - { - debMsg("Executing kernel knMapLinearMACGridToVec3_FLIP ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude); + } } const BasicParticleSystem &p; const FlagGrid &flags; @@ -2592,7 +2566,7 @@ struct knCombineVels : public KernelBase { MACGrid &combineVel, const LevelsetGrid *phi, Real narrowBand, - Real thresh) const + Real thresh) { int idx = vel.index(i, j, k); @@ -2648,37 +2622,35 @@ struct knCombineVels : public KernelBase { return thresh; } typedef Real type5; - void runMessage() - { - debMsg("Executing kernel knCombineVels ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid &vel; const Grid<Vec3> &w; MACGrid &combineVel; diff --git a/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp b/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp index de881840a2e..2b11f3a2557 100644 --- a/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp +++ b/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp @@ -62,7 +62,7 @@ struct apply1DKernelDirX : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const + inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) { int nx = in.getSizeX(); int kn = kernel.n; @@ -91,37 +91,35 @@ struct apply1DKernelDirX : public KernelBase { return kernel; } typedef Matrix type2; - void runMessage() - { - debMsg("Executing kernel apply1DKernelDirX ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const MACGrid ∈ MACGrid &out; const Matrix &kernel; @@ -136,7 +134,7 @@ struct apply1DKernelDirY : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const + inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) { int ny = in.getSizeY(); int kn = kernel.n; @@ -165,37 +163,35 @@ struct apply1DKernelDirY : public KernelBase { return kernel; } typedef Matrix type2; - void runMessage() - { - debMsg("Executing kernel apply1DKernelDirY ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const MACGrid ∈ MACGrid &out; const Matrix &kernel; @@ -210,7 +206,7 @@ struct apply1DKernelDirZ : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const + inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) { int nz = in.getSizeZ(); int kn = kernel.n; @@ -239,37 +235,35 @@ struct apply1DKernelDirZ : public KernelBase { return kernel; } typedef Matrix type2; - void runMessage() - { - debMsg("Executing kernel apply1DKernelDirZ ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, in, out, kernel); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, in, out, kernel); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const MACGrid ∈ MACGrid &out; const Matrix &kernel; @@ -569,197 +563,88 @@ void prox_f(MACGrid &v, // ***************************************************************************** -// re-uses main pressure solve from pressure.cpp -void solvePressure(MACGrid &vel, - Grid<Real> &pressure, - const FlagGrid &flags, - Real cgAccuracy = 1e-3, - const Grid<Real> *phi = nullptr, - const Grid<Real> *perCellCorr = nullptr, - const MACGrid *fractions = nullptr, - const MACGrid *obvel = nullptr, - Real gfClamp = 1e-04, - Real cgMaxIterFac = 1.5, - bool precondition = true, - int preconditioner = 1, - bool enforceCompatibility = false, - bool useL2Norm = false, - bool zeroPressureFixing = false, - const Grid<Real> *curv = nullptr, - const Real surfTens = 0.0, - Grid<Real> *retRhs = nullptr); - -//! Main function for fluid guiding , includes "regular" pressure solve - -void PD_fluid_guiding(MACGrid &vel, - MACGrid &velT, - Grid<Real> &pressure, - FlagGrid &flags, - Grid<Real> &weight, - int blurRadius = 5, - Real theta = 1.0, - Real tau = 1.0, - Real sigma = 1.0, - Real epsRel = 1e-3, - Real epsAbs = 1e-3, - int maxIters = 200, - Grid<Real> *phi = nullptr, - Grid<Real> *perCellCorr = nullptr, - MACGrid *fractions = nullptr, - MACGrid *obvel = nullptr, - Real gfClamp = 1e-04, - Real cgMaxIterFac = 1.5, - Real cgAccuracy = 1e-3, - int preconditioner = 1, - bool zeroPressureFixing = false, - const Grid<Real> *curv = nullptr, - const Real surfTens = 0.) -{ - FluidSolver *parent = vel.getParent(); - - // initialize dual/slack variables - MACGrid velC = MACGrid(parent); - velC.copyFrom(vel); - MACGrid x = MACGrid(parent); - MACGrid y = MACGrid(parent); - MACGrid z = MACGrid(parent); - MACGrid x0 = MACGrid(parent); - MACGrid z0 = MACGrid(parent); - - // precomputation - ADMM_precompute_Separable(blurRadius); - MACGrid Q = MACGrid(parent); - precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma); - MACGrid invA = MACGrid(parent); - precomputeInvA(invA, weight, sigma); - - // loop - int iter = 0; - for (iter = 0; iter < maxIters; iter++) { - // x-update - x0.copyFrom(x); - x.multConst(1.0 / sigma); - x.add(y); - prox_f(x, flags, Q, velC, sigma, invA); - x.multConst(-sigma); - x.addScaled(y, sigma); - x.add(x0); - - // z-update - z0.copyFrom(z); - z.addScaled(x, -tau); - Real cgAccuracyAdaptive = cgAccuracy; - - solvePressure(z, - pressure, - flags, - cgAccuracyAdaptive, - phi, - perCellCorr, - fractions, - obvel, - gfClamp, - cgMaxIterFac, - true, - preconditioner, - false, - false, - zeroPressureFixing, - curv, - surfTens); - - // y-update - y.copyFrom(z); - y.sub(z0); - y.multConst(theta); - y.add(z); - - // stopping criterion - bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z)); - - if (stop || (iter == maxIters - 1)) - break; - } - - // vel_new = z - vel.copyFrom(z); - - debMsg("PD_fluid_guiding iterations:" << iter, 1); -} -static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds) -{ - try { - PbArgs _args(_linargs, _kwds); - FluidSolver *parent = _args.obtainParent(); - bool noTiming = _args.getOpt<bool>("notiming", -1, 0); - pbPreparePlugin(parent, "PD_fluid_guiding", !noTiming); - PyObject *_retval = nullptr; - { - ArgLocker _lock; - MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock); - MACGrid &velT = *_args.getPtr<MACGrid>("velT", 1, &_lock); - Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock); - FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock); - Grid<Real> &weight = *_args.getPtr<Grid<Real>>("weight", 4, &_lock); - int blurRadius = _args.getOpt<int>("blurRadius", 5, 5, &_lock); - Real theta = _args.getOpt<Real>("theta", 6, 1.0, &_lock); - Real tau = _args.getOpt<Real>("tau", 7, 1.0, &_lock); - Real sigma = _args.getOpt<Real>("sigma", 8, 1.0, &_lock); - Real epsRel = _args.getOpt<Real>("epsRel", 9, 1e-3, &_lock); - Real epsAbs = _args.getOpt<Real>("epsAbs", 10, 1e-3, &_lock); - int maxIters = _args.getOpt<int>("maxIters", 11, 200, &_lock); - Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock); - Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>("perCellCorr", 13, nullptr, &_lock); - MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock); - MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 15, nullptr, &_lock); - Real gfClamp = _args.getOpt<Real>("gfClamp", 16, 1e-04, &_lock); - Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 17, 1.5, &_lock); - Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 18, 1e-3, &_lock); - int preconditioner = _args.getOpt<int>("preconditioner", 19, 1, &_lock); - bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 20, false, &_lock); - const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 21, nullptr, &_lock); - const Real surfTens = _args.getOpt<Real>("surfTens", 22, 0., &_lock); - _retval = getPyNone(); - PD_fluid_guiding(vel, - velT, - pressure, - flags, - weight, - blurRadius, - theta, - tau, - sigma, - epsRel, - epsAbs, - maxIters, - phi, - perCellCorr, - fractions, - obvel, - gfClamp, - cgMaxIterFac, - cgAccuracy, - preconditioner, - zeroPressureFixing, - curv, - surfTens); - _args.check(); - } - pbFinalizePlugin(parent, "PD_fluid_guiding", !noTiming); - return _retval; - } - catch (std::exception &e) { - pbSetError("PD_fluid_guiding", e.what()); - return 0; - } -} -static const Pb::Register _RP_PD_fluid_guiding("", "PD_fluid_guiding", _W_2); -extern "C" { -void PbRegister_PD_fluid_guiding() -{ - KEEP_UNUSED(_RP_PD_fluid_guiding); -} -} +// TODO (sebbas): Disabled for now +// // re-uses main pressure solve from pressure.cpp +// void solvePressure( +// MACGrid& vel, Grid<Real>& pressure, const FlagGrid& flags, Real cgAccuracy = 1e-3, +// const Grid<Real>* phi = nullptr, +// const Grid<Real>* perCellCorr = nullptr, +// const MACGrid* fractions = nullptr, +// const MACGrid* obvel = nullptr, +// Real gfClamp = 1e-04, +// Real cgMaxIterFac = 1.5, +// bool precondition = true, +// int preconditioner = 1, +// bool enforceCompatibility = false, +// bool useL2Norm = false, +// bool zeroPressureFixing = false, +// const Grid<Real> *curv = nullptr, +// const Real surfTens = 0.0, +// Grid<Real>* retRhs = nullptr ); + +// //! Main function for fluid guiding , includes "regular" pressure solve +// PYTHON() void PD_fluid_guiding(MACGrid& vel, MACGrid& velT, +// Grid<Real>& pressure, FlagGrid& flags, Grid<Real>& weight, int blurRadius = 5, +// Real theta = 1.0, Real tau = 1.0, Real sigma = 1.0, +// Real epsRel = 1e-3, Real epsAbs = 1e-3, int maxIters = 200, +// // duplicated for pressure solve +// Grid<Real>* phi = nullptr, Grid<Real>* perCellCorr = nullptr, MACGrid* fractions = nullptr, +// MACGrid* obvel = nullptr, Real gfClamp = 1e-04, Real cgMaxIterFac = 1.5, Real cgAccuracy = 1e-3, +// int preconditioner = 1, bool zeroPressureFixing = false, const Grid<Real> *curv = nullptr, +// const Real surfTens = 0.) +// { +// FluidSolver* parent = vel.getParent(); + +// // initialize dual/slack variables +// MACGrid velC = MACGrid(parent); velC.copyFrom(vel); +// MACGrid x = MACGrid(parent); +// MACGrid y = MACGrid(parent); +// MACGrid z = MACGrid(parent); +// MACGrid x0 = MACGrid(parent); +// MACGrid z0 = MACGrid(parent); + +// // precomputation +// ADMM_precompute_Separable(blurRadius); +// MACGrid Q = MACGrid(parent); +// precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma); +// MACGrid invA = MACGrid(parent); +// precomputeInvA(invA, weight, sigma); + +// // loop +// int iter = 0; +// for (iter = 0; iter < maxIters; iter++) { +// // x-update +// x0.copyFrom(x); +// x.multConst(1.0 / sigma); +// x.add(y); +// prox_f(x, flags, Q, velC, sigma, invA); +// x.multConst(-sigma); x.addScaled(y, sigma); x.add(x0); + +// // z-update +// z0.copyFrom(z); +// z.addScaled(x, -tau); +// Real cgAccuracyAdaptive = cgAccuracy; + +// solvePressure (z, pressure, flags, cgAccuracyAdaptive, phi, perCellCorr, fractions, obvel, +// gfClamp, cgMaxIterFac, true, preconditioner, false, false, zeroPressureFixing, curv, surfTens ); + +// // y-update +// y.copyFrom(z); +// y.sub(z0); +// y.multConst(theta); +// y.add(z); + +// // stopping criterion +// bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z)); + +// if (stop || (iter == maxIters - 1)) break; +// } + +// // vel_new = z +// vel.copyFrom(z); + +// debMsg("PD_fluid_guiding iterations:" << iter, 1); +// } //! reset precomputation void releaseBlurPrecomp() @@ -768,7 +653,7 @@ void releaseBlurPrecomp() gBlurKernelRadius = -1; gBlurKernel = 0.f; } -static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds) +static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { try { PbArgs _args(_linargs, _kwds); @@ -790,7 +675,7 @@ static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds) return 0; } } -static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_3); +static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_2); extern "C" { void PbRegister_releaseBlurPrecomp() { diff --git a/extern/mantaflow/preprocessed/plugin/initplugins.cpp b/extern/mantaflow/preprocessed/plugin/initplugins.cpp index a0cc2761dab..db507642f68 100644 --- a/extern/mantaflow/preprocessed/plugin/initplugins.cpp +++ b/extern/mantaflow/preprocessed/plugin/initplugins.cpp @@ -56,7 +56,7 @@ struct KnApplyNoiseInfl : public KernelBase { const WaveletNoiseField &noise, const Grid<Real> &sdf, Real scale, - Real sigma) const + Real sigma) { if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma) return; @@ -96,37 +96,35 @@ struct KnApplyNoiseInfl : public KernelBase { return sigma; } typedef Real type5; - void runMessage() - { - debMsg("Executing kernel KnApplyNoiseInfl ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, noise, sdf, scale, sigma); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, noise, sdf, scale, sigma); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, noise, sdf, scale, sigma); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, noise, sdf, scale, sigma); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> &density; const WaveletNoiseField &noise; @@ -202,7 +200,7 @@ struct KnAddNoise : public KernelBase { Grid<Real> &density, const WaveletNoiseField &noise, const Grid<Real> *sdf, - Real scale) const + Real scale) { if (!flags.isFluid(i, j, k) || (sdf && (*sdf)(i, j, k) > 0.)) return; @@ -233,37 +231,35 @@ struct KnAddNoise : public KernelBase { return scale; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel KnAddNoise ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, noise, sdf, scale); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, noise, sdf, scale); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, noise, sdf, scale); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, noise, sdf, scale); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> &density; const WaveletNoiseField &noise; @@ -329,7 +325,7 @@ template<class T> struct knSetPdataNoise : public KernelBase { const BasicParticleSystem &parts, ParticleDataImpl<T> &pdata, const WaveletNoiseField &noise, - Real scale) const + Real scale) { pdata[idx] = noise.evaluate(parts.getPos(idx)) * scale; } @@ -353,21 +349,17 @@ template<class T> struct knSetPdataNoise : public KernelBase { return scale; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel knSetPdataNoise ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, parts, pdata, noise, scale); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, parts, pdata, noise, scale); + } } const BasicParticleSystem &parts; ParticleDataImpl<T> &pdata; @@ -389,7 +381,7 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase { const BasicParticleSystem &parts, ParticleDataImpl<T> &pdata, const WaveletNoiseField &noise, - Real scale) const + Real scale) { pdata[idx] = noise.evaluateVec(parts.getPos(idx)) * scale; } @@ -413,21 +405,17 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase { return scale; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel knSetPdataNoiseVec ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, parts, pdata, noise, scale); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, parts, pdata, noise, scale); + } } const BasicParticleSystem &parts; ParticleDataImpl<T> &pdata; @@ -688,7 +676,7 @@ struct KnApplyEmission : public KernelBase { const Grid<Real> &source, const Grid<Real> *emissionTexture, bool isAbsolute, - int type) const + int type) { // if type is given, only apply emission when celltype matches type from flaggrid // and if emission texture is given, only apply emission when some emission is present at cell @@ -733,37 +721,35 @@ struct KnApplyEmission : public KernelBase { return type; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnApplyEmission ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> ⌖ const Grid<Real> &source; @@ -837,7 +823,7 @@ struct KnApplyDensity : public KernelBase { Grid<Real> &density, const Grid<Real> &sdf, Real value, - Real sigma) const + Real sigma) { if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma) return; @@ -868,37 +854,35 @@ struct KnApplyDensity : public KernelBase { return sigma; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel KnApplyDensity ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, sdf, value, sigma); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, sdf, value, sigma); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, density, sdf, value, sigma); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, density, sdf, value, sigma); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> &density; const Grid<Real> &sdf; @@ -1041,7 +1025,7 @@ struct KnResetInObstacle : public KernelBase { Grid<Real> *red, Grid<Real> *green, Grid<Real> *blue, - Real resetValue) const + Real resetValue) { if (!flags.isObstacle(i, j, k)) return; @@ -1115,37 +1099,35 @@ struct KnResetInObstacle : public KernelBase { return resetValue; } typedef Real type9; - void runMessage() - { - debMsg("Executing kernel KnResetInObstacle ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } FlagGrid &flags; MACGrid &vel; Grid<Real> *density; @@ -1692,7 +1674,7 @@ struct KnUpdateFractions : public KernelBase { const Grid<Real> &phiObs, MACGrid &fractions, const int &boundaryWidth, - const Real fracThreshold) const + const Real fracThreshold) { // walls at domain bounds and inner objects @@ -1787,37 +1769,35 @@ struct KnUpdateFractions : public KernelBase { return fracThreshold; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel KnUpdateFractions ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; const Grid<Real> &phiObs; MACGrid &fractions; @@ -1896,7 +1876,7 @@ struct KnUpdateFlagsObs : public KernelBase { const Grid<Real> &phiObs, const Grid<Real> *phiOut, const Grid<Real> *phiIn, - int boundaryWidth) const + int boundaryWidth) { bool isObs = false; @@ -1964,37 +1944,35 @@ struct KnUpdateFlagsObs : public KernelBase { return boundaryWidth; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnUpdateFlagsObs ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = boundaryWidth; j < _maxY; j++) - for (int i = boundaryWidth; i < _maxX; i++) - op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = boundaryWidth; j < _maxY; j++) + for (int i = boundaryWidth; i < _maxX; i++) + op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = boundaryWidth; i < _maxX; i++) - op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth); +#pragma omp parallel + { + +#pragma omp for + for (int j = boundaryWidth; j < _maxY; j++) + for (int i = boundaryWidth; i < _maxX; i++) + op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this); - } FlagGrid &flags; const MACGrid *fractions; const Grid<Real> &phiObs; @@ -2067,7 +2045,7 @@ struct kninitVortexVelocity : public KernelBase { const Grid<Real> &phiObs, MACGrid &vel, const Vec3 ¢er, - const Real &radius) const + const Real &radius) { if (phiObs(i, j, k) >= -1.) { @@ -2115,37 +2093,35 @@ struct kninitVortexVelocity : public KernelBase { return radius; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel kninitVortexVelocity ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phiObs, vel, center, radius); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phiObs, vel, center, radius); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phiObs, vel, center, radius); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phiObs, vel, center, radius); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const Grid<Real> &phiObs; MACGrid &vel; const Vec3 ¢er; @@ -2318,7 +2294,7 @@ template<class T> struct knBlurGrid : public KernelBase { Grid<T> &originGrid, Grid<T> &targetGrid, GaussianKernelCreator &gkSigma, - int cdir) const + int cdir) { targetGrid(i, j, k) = convolveGrid<T>(originGrid, gkSigma, Vec3(i, j, k), cdir); } @@ -2342,37 +2318,35 @@ template<class T> struct knBlurGrid : public KernelBase { return cdir; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel knBlurGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, originGrid, targetGrid, gkSigma, cdir); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, originGrid, targetGrid, gkSigma, cdir); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, originGrid, targetGrid, gkSigma, cdir); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, originGrid, targetGrid, gkSigma, cdir); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> &originGrid; Grid<T> &targetGrid; GaussianKernelCreator &gkSigma; @@ -2412,7 +2386,7 @@ struct KnBlurMACGridGauss : public KernelBase { MACGrid &originGrid, MACGrid &target, GaussianKernelCreator &gkSigma, - int cdir) const + int cdir) { Vec3 pos(i, j, k); Vec3 step(1.0, 0.0, 0.0); @@ -2462,37 +2436,35 @@ struct KnBlurMACGridGauss : public KernelBase { return cdir; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel KnBlurMACGridGauss ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, originGrid, target, gkSigma, cdir); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, originGrid, target, gkSigma, cdir); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, originGrid, target, gkSigma, cdir); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, originGrid, target, gkSigma, cdir); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid &originGrid; MACGrid ⌖ GaussianKernelCreator &gkSigma; diff --git a/extern/mantaflow/preprocessed/plugin/kepsilon.cpp b/extern/mantaflow/preprocessed/plugin/kepsilon.cpp index 32425a5756e..c5711b95242 100644 --- a/extern/mantaflow/preprocessed/plugin/kepsilon.cpp +++ b/extern/mantaflow/preprocessed/plugin/kepsilon.cpp @@ -61,7 +61,7 @@ struct KnTurbulenceClamp : public KernelBase { Real minK, Real maxK, Real minNu, - Real maxNu) const + Real maxNu) { Real eps = egrid[idx]; Real ke = clamp(kgrid[idx], minK, maxK); @@ -104,21 +104,17 @@ struct KnTurbulenceClamp : public KernelBase { return maxNu; } typedef Real type5; - void runMessage() - { - debMsg("Executing kernel KnTurbulenceClamp ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, kgrid, egrid, minK, maxK, minNu, maxNu); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, kgrid, egrid, minK, maxK, minNu, maxNu); + } } Grid<Real> &kgrid; Grid<Real> &egrid; @@ -163,7 +159,7 @@ struct KnComputeProduction : public KernelBase { Grid<Real> &prod, Grid<Real> &nuT, Grid<Real> *strain, - Real pscale = 1.0f) const + Real pscale = 1.0f) { Real curEps = eps(i, j, k); if (curEps > 0) { @@ -234,37 +230,35 @@ struct KnComputeProduction : public KernelBase { return pscale; } typedef Real type7; - void runMessage() - { - debMsg("Executing kernel KnComputeProduction ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const MACGrid &vel; const Grid<Vec3> &velCenter; const Grid<Real> &ke; @@ -345,7 +339,7 @@ struct KnAddTurbulenceSource : public KernelBase { run(); } inline void op( - IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt) const + IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt) { Real eps = egrid[idx], prod = pgrid[idx], ke = kgrid[idx]; if (ke <= 0) @@ -379,21 +373,17 @@ struct KnAddTurbulenceSource : public KernelBase { return dt; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel KnAddTurbulenceSource ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, kgrid, egrid, pgrid, dt); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, kgrid, egrid, pgrid, dt); + } } Grid<Real> &kgrid; Grid<Real> &egrid; diff --git a/extern/mantaflow/preprocessed/plugin/pressure.cpp b/extern/mantaflow/preprocessed/plugin/pressure.cpp index 593aeb16859..4674bf4a7bf 100644 --- a/extern/mantaflow/preprocessed/plugin/pressure.cpp +++ b/extern/mantaflow/preprocessed/plugin/pressure.cpp @@ -138,9 +138,10 @@ struct MakeRhs : public KernelBase { } } + // TODO (sebbas): Disabled for now // per cell divergence correction (optional) - if (perCellCorr) - set += perCellCorr->get(i, j, k); + // if(perCellCorr) + // set += perCellCorr->get(i,j,k); // obtain sum, cell count sum += set; @@ -198,19 +199,61 @@ struct MakeRhs : public KernelBase { return gfClamp; } typedef Real type9; - void runMessage() - { - debMsg("Executing kernel MakeRhs ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) + const FlagGrid &flags = getArg0(); + Grid<Real> &rhs = getArg1(); + const MACGrid &vel = getArg2(); + const Grid<Real> *perCellCorr = getArg3(); + const MACGrid *fractions = getArg4(); + const MACGrid *obvel = getArg5(); + const Grid<Real> *phi = getArg6(); + const Grid<Real> *curv = getArg7(); + const Real &surfTens = getArg8(); + const Real &gfClamp = getArg9(); +#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(3) schedule(static,1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, + j, + k, + flags, + rhs, + vel, + perCellCorr, + fractions, + obvel, + phi, + curv, + surfTens, + gfClamp, + cnt, + sum); + } + { + this->sum = sum; + } + } + else { + const int k = 0; + const FlagGrid &flags = getArg0(); + Grid<Real> &rhs = getArg1(); + const MACGrid &vel = getArg2(); + const Grid<Real> *perCellCorr = getArg3(); + const MACGrid *fractions = getArg4(); + const MACGrid *obvel = getArg5(); + const Grid<Real> *phi = getArg6(); + const Grid<Real> *curv = getArg7(); + const Real &surfTens = getArg8(); + const Real &gfClamp = getArg9(); +#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(2) schedule(static,1) + { for (int j = 1; j < _maxY; j++) for (int i = 1; i < _maxX; i++) op(i, @@ -228,55 +271,11 @@ struct MakeRhs : public KernelBase { gfClamp, cnt, sum); + } + { + this->sum = sum; + } } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, - j, - k, - flags, - rhs, - vel, - perCellCorr, - fractions, - obvel, - phi, - curv, - surfTens, - gfClamp, - cnt, - sum); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this); - } - MakeRhs(MakeRhs &o, tbb::split) - : KernelBase(o), - flags(o.flags), - rhs(o.rhs), - vel(o.vel), - perCellCorr(o.perCellCorr), - fractions(o.fractions), - obvel(o.obvel), - phi(o.phi), - curv(o.curv), - surfTens(o.surfTens), - gfClamp(o.gfClamp), - cnt(0), - sum(0) - { - } - void join(const MakeRhs &o) - { - cnt += o.cnt; - sum += o.sum; } const FlagGrid &flags; Grid<Real> &rhs; @@ -302,7 +301,7 @@ struct knCorrectVelocity : public KernelBase { run(); } inline void op( - int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure) const + int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure) { const IndexInt idx = flags.index(i, j, k); if (flags.isFluid(idx)) { @@ -353,37 +352,36 @@ struct knCorrectVelocity : public KernelBase { return pressure; } typedef Grid<Real> type2; - void runMessage() - { - debMsg("Executing kernel knCorrectVelocity ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, pressure); + const FlagGrid &flags = getArg0(); + MACGrid &vel = getArg1(); + const Grid<Real> &pressure = getArg2(); +#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1) + { + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, pressure); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, vel, pressure); + const FlagGrid &flags = getArg0(); + MACGrid &vel = getArg1(); + const Grid<Real> &pressure = getArg2(); +#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1) + { + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, vel, pressure); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; MACGrid &vel; const Grid<Real> &pressure; @@ -441,7 +439,7 @@ struct ApplyGhostFluidDiagonal : public KernelBase { Grid<Real> &A0, const FlagGrid &flags, const Grid<Real> &phi, - const Real gfClamp) const + const Real gfClamp) { const int X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); const IndexInt idx = flags.index(i, j, k); @@ -483,37 +481,35 @@ struct ApplyGhostFluidDiagonal : public KernelBase { return gfClamp; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel ApplyGhostFluidDiagonal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, A0, flags, phi, gfClamp); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, A0, flags, phi, gfClamp); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, A0, flags, phi, gfClamp); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, A0, flags, phi, gfClamp); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> &A0; const FlagGrid &flags; const Grid<Real> φ @@ -551,7 +547,7 @@ struct knCorrectVelocityGhostFluid : public KernelBase { const Grid<Real> &phi, Real gfClamp, const Grid<Real> *curv, - const Real surfTens) const + const Real surfTens) { const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); const IndexInt idx = flags.index(i, j, k); @@ -640,37 +636,35 @@ struct knCorrectVelocityGhostFluid : public KernelBase { return surfTens; } typedef Real type6; - void runMessage() - { - debMsg("Executing kernel knCorrectVelocityGhostFluid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &vel; const FlagGrid &flags; const Grid<Real> &pressure; @@ -710,7 +704,7 @@ struct knReplaceClampedGhostFluidVels : public KernelBase { const FlagGrid &flags, const Grid<Real> &pressure, const Grid<Real> &phi, - Real gfClamp) const + Real gfClamp) { const IndexInt idx = flags.index(i, j, k); const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); @@ -758,37 +752,35 @@ struct knReplaceClampedGhostFluidVels : public KernelBase { return gfClamp; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel knReplaceClampedGhostFluidVels ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, flags, pressure, phi, gfClamp); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, flags, pressure, phi, gfClamp); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, flags, pressure, phi, gfClamp); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, flags, pressure, phi, gfClamp); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } MACGrid &vel; const FlagGrid &flags; const Grid<Real> &pressure; @@ -822,28 +814,21 @@ struct CountEmptyCells : public KernelBase { return flags; } typedef FlagGrid type0; - void runMessage() - { - debMsg("Executing kernel CountEmptyCells ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, numEmpty); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - CountEmptyCells(CountEmptyCells &o, tbb::split) : KernelBase(o), flags(o.flags), numEmpty(0) - { - } - void join(const CountEmptyCells &o) - { - numEmpty += o.numEmpty; + const IndexInt _sz = size; +#pragma omp parallel + { + int numEmpty = 0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, numEmpty); +#pragma omp critical + { + this->numEmpty += numEmpty; + } + } } const FlagGrid &flags; int numEmpty; @@ -964,11 +949,19 @@ void computePressureRhs(Grid<Real> &rhs, const Real surfTens = 0.) { // compute divergence and init right hand side - MakeRhs kernMakeRhs( - flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp); - - if (enforceCompatibility) - rhs += (Real)(-kernMakeRhs.sum / (Real)kernMakeRhs.cnt); + // auto kernMakeRhs = new MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, + // surfTens, gfClamp ); + printf("pressure = %p, flags = %p, rhs = %p, vel = %p\n", + pressure.mData, + flags.mData, + rhs.mData, + vel.mData); + MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp); + + // TODO (sebbas): Disabled for now + // if(enforceCompatibility) + // rhs += (Real)(-kernMakeRhs->sum / (Real)kernMakeRhs->cnt); + // delete kernMakeRhs; } static PyObject *_W_1(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { @@ -1050,6 +1043,13 @@ void solvePressureSystem(Grid<Real> &rhs, MACGrid &vel, Grid<Real> &pressure, const FlagGrid &flags, + Grid<Real> *residual = nullptr, + Grid<Real> *search = nullptr, + Grid<Real> *A0 = nullptr, + Grid<Real> *Ai = nullptr, + Grid<Real> *Aj = nullptr, + Grid<Real> *Ak = nullptr, + Grid<Real> *tmp = nullptr, Real cgAccuracy = 1e-3, const Grid<Real> *phi = nullptr, const Grid<Real> *perCellCorr = nullptr, @@ -1069,19 +1069,37 @@ void solvePressureSystem(Grid<Real> &rhs, // reserve temp grids FluidSolver *parent = flags.getParent(); - Grid<Real> residual(parent); - Grid<Real> search(parent); - Grid<Real> A0(parent); - Grid<Real> Ai(parent); - Grid<Real> Aj(parent); - Grid<Real> Ak(parent); - Grid<Real> tmp(parent); + + bool cleanUp = false; + if (!residual) { + residual = new Grid<Real>(parent, true, false, true); + search = new Grid<Real>(parent, true, false, true); + A0 = new Grid<Real>(parent, true, false, true); + Ai = new Grid<Real>(parent, true, false, true); + Aj = new Grid<Real>(parent, true, false, true); + Ak = new Grid<Real>(parent, true, false, true); + tmp = new Grid<Real>(parent, true, false, true); + cleanUp = true; + } + else { + residual->clear(true); + search->clear(true); + A0->clear(true); + Ai->clear(true); + Aj->clear(true); + Ak->clear(true); + tmp->clear(true); + } + + std::cout << "HERE 5" << std::endl; // setup matrix and boundaries - MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions); + MakeLaplaceMatrix(flags, *A0, *Ai, *Aj, *Ak, fractions); + // MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions); + // TODO (sebbas): Disabled for now if (phi) { - ApplyGhostFluidDiagonal(A0, flags, *phi, gfClamp); + ApplyGhostFluidDiagonal(*A0, flags, *phi, gfClamp); } // check whether we need to fix some pressure value... @@ -1125,7 +1143,8 @@ void solvePressureSystem(Grid<Real> &rhs, // debMsg("No empty cells! Fixing pressure of cell "<<fixPidx<<" to zero",1); } if (fixPidx >= 0) { - fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak); + fixPressure(fixPidx, Real(0), rhs, *A0, *Ai, *Aj, *Ak); + // fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak); static bool msgOnce = false; if (!msgOnce) { debMsg("Pinning pressure of cell " << fixPidx << " to zero", 2); @@ -1133,20 +1152,21 @@ void solvePressureSystem(Grid<Real> &rhs, } } } + std::cout << "HERE 6" << std::endl; // CG setup // note: the last factor increases the max iterations for 2d, which right now can't use a // preconditioner GridCgInterface *gcg; - vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; - - if (vel.is3D()) { - matA.push_back(&Ak); - gcg = new GridCg<ApplyMatrix>(pressure, rhs, residual, search, flags, tmp, matA); - } - else { - gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, matA); - } + if (vel.is3D()) + gcg = new GridCg<ApplyMatrix>(pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak); + // gcg = new GridCg<ApplyMatrix> (pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, + // &Ak); + else + gcg = new GridCg<ApplyMatrix2D>( + pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak); + // gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, + // &Ak); gcg->setAccuracy(cgAccuracy); gcg->setUseL2Norm(useL2Norm); @@ -1155,6 +1175,7 @@ void solvePressureSystem(Grid<Real> &rhs, Grid<Real> *pca0 = nullptr, *pca1 = nullptr, *pca2 = nullptr, *pca3 = nullptr; GridMg *pmg = nullptr; + std::cout << "HERE 7" << std::endl; // optional preconditioning if (preconditioner == PcMIC) { @@ -1180,10 +1201,12 @@ void solvePressureSystem(Grid<Real> &rhs, gcg->setMGPreconditioner(GridCgInterface::PC_MGP, pmg); } + std::cout << "HERE 8" << std::endl; // CG solve + Real time = 0; for (int iter = 0; iter < maxIter; iter++) { - if (!gcg->iterate()) + if (!gcg->iterate(time)) iter = maxIter; if (iter < maxIter) debMsg("FluidSolver::solvePressure iteration " << iter @@ -1193,8 +1216,26 @@ void solvePressureSystem(Grid<Real> &rhs, debMsg("FluidSolver::solvePressure done. Iterations:" << gcg->getIterations() << ", residual:" << gcg->getResNorm(), 2); + // std::cout << "TIME: " << time << std::endl; // Cleanup + if (cleanUp) { + if (residual) + delete residual; + if (search) + delete search; + if (A0) + delete A0; + if (Ai) + delete Ai; + if (Aj) + delete Aj; + if (Ak) + delete Ak; + if (tmp) + delete tmp; + } + if (gcg) delete gcg; if (pca0) @@ -1225,26 +1266,40 @@ static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds) MACGrid &vel = *_args.getPtr<MACGrid>("vel", 1, &_lock); Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock); const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock); - Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 4, 1e-3, &_lock); - const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 5, nullptr, &_lock); + Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 4, nullptr, &_lock); + Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 5, nullptr, &_lock); + Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 6, nullptr, &_lock); + Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 7, nullptr, &_lock); + Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 8, nullptr, &_lock); + Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 9, nullptr, &_lock); + Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 10, nullptr, &_lock); + Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 11, 1e-3, &_lock); + const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock); const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>( - "perCellCorr", 6, nullptr, &_lock); - const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 7, nullptr, &_lock); - Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock); - Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock); - bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock); - int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock); + "perCellCorr", 13, nullptr, &_lock); + const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock); + Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock); + Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock); + bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock); + int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock); const bool enforceCompatibility = _args.getOpt<bool>( - "enforceCompatibility", 12, false, &_lock); - const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock); - const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock); - const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock); - const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock); + "enforceCompatibility", 19, false, &_lock); + const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock); + const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock); + const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock); + const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock); _retval = getPyNone(); solvePressureSystem(rhs, vel, pressure, flags, + residual, + search, + A0, + Ai, + Aj, + Ak, + tmp, cgAccuracy, phi, perCellCorr, @@ -1370,6 +1425,13 @@ void PbRegister_correctVelocity() void solvePressure(MACGrid &vel, Grid<Real> &pressure, const FlagGrid &flags, + Grid<Real> *residual = nullptr, + Grid<Real> *search = nullptr, + Grid<Real> *A0 = nullptr, + Grid<Real> *Ai = nullptr, + Grid<Real> *Aj = nullptr, + Grid<Real> *Ak = nullptr, + Grid<Real> *tmp = nullptr, Real cgAccuracy = 1e-3, const Grid<Real> *phi = nullptr, const Grid<Real> *perCellCorr = nullptr, @@ -1411,6 +1473,13 @@ void solvePressure(MACGrid &vel, vel, pressure, flags, + residual, + search, + A0, + Ai, + Aj, + Aj, + tmp, cgAccuracy, phi, perCellCorr, @@ -1442,10 +1511,11 @@ void solvePressure(MACGrid &vel, curv, surfTens); + // TODO (sebbas): Disabled for now // optionally , return RHS - if (retRhs) { - retRhs->copyFrom(rhs); - } + // if(retRhs) { + // retRhs->copyFrom(rhs); + // } } static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds) { @@ -1460,26 +1530,40 @@ static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds) MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock); Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 1, &_lock); const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 2, &_lock); - Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 3, 1e-3, &_lock); - const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 4, nullptr, &_lock); + Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 3, nullptr, &_lock); + Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 4, nullptr, &_lock); + Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 5, nullptr, &_lock); + Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 6, nullptr, &_lock); + Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 7, nullptr, &_lock); + Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 8, nullptr, &_lock); + Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 9, nullptr, &_lock); + Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 10, 1e-3, &_lock); + const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 11, nullptr, &_lock); const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>( - "perCellCorr", 5, nullptr, &_lock); - const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 6, nullptr, &_lock); - const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 7, nullptr, &_lock); - Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock); - Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock); - bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock); - int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock); - bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 12, false, &_lock); - bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock); - bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock); - const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock); - const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock); - Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 17, nullptr, &_lock); + "perCellCorr", 12, nullptr, &_lock); + const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 13, nullptr, &_lock); + const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 14, nullptr, &_lock); + Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock); + Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock); + bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock); + int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock); + bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 19, false, &_lock); + bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock); + bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock); + const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock); + const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock); + Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 24, nullptr, &_lock); _retval = getPyNone(); solvePressure(vel, pressure, flags, + residual, + search, + A0, + Ai, + Aj, + Ak, + tmp, cgAccuracy, phi, perCellCorr, diff --git a/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp b/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp index 7b2aedb694e..6d75e220d7a 100644 --- a/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp +++ b/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp @@ -34,7 +34,7 @@ struct KnAddForcePvel : public KernelBase { ParticleDataImpl<Vec3> &v, const Vec3 &da, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (ptype && ((*ptype)[idx] & exclude)) return; @@ -60,21 +60,17 @@ struct KnAddForcePvel : public KernelBase { return exclude; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel KnAddForcePvel ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, v, da, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, v, da, ptype, exclude); + } } ParticleDataImpl<Vec3> &v; const Vec3 &da; @@ -150,7 +146,7 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase { const ParticleDataImpl<Vec3> &x_prev, const Real over_dt, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (ptype && ((*ptype)[idx] & exclude)) return; @@ -186,21 +182,17 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase { return exclude; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnUpdateVelocityFromDeltaPos ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, v, x_prev, over_dt, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, v, x_prev, over_dt, ptype, exclude); + } } const BasicParticleSystem &p; ParticleDataImpl<Vec3> &v; @@ -273,7 +265,7 @@ struct KnStepEuler : public KernelBase { const ParticleDataImpl<Vec3> &v, const Real dt, const ParticleDataImpl<int> *ptype, - const int exclude) const + const int exclude) { if (ptype && ((*ptype)[idx] & exclude)) return; @@ -304,21 +296,17 @@ struct KnStepEuler : public KernelBase { return exclude; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel KnStepEuler ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, v, dt, ptype, exclude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, v, dt, ptype, exclude); + } } BasicParticleSystem &p; const ParticleDataImpl<Vec3> &v; @@ -393,7 +381,7 @@ struct KnSetPartType : public KernelBase { const int mark, const int stype, const FlagGrid &flags, - const int cflag) const + const int cflag) { if (flags.isInBounds(part.getPos(idx), 0) && (flags.getAt(part.getPos(idx)) & cflag) && (ptype[idx] & stype)) @@ -429,21 +417,17 @@ struct KnSetPartType : public KernelBase { return cflag; } typedef int type5; - void runMessage() - { - debMsg("Executing kernel KnSetPartType ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, ptype, part, mark, stype, flags, cflag); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, ptype, part, mark, stype, flags, cflag); + } } ParticleDataImpl<int> &ptype; const BasicParticleSystem ∂ diff --git a/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp b/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp index 7a1d8224d94..5d519710296 100644 --- a/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp +++ b/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp @@ -99,7 +99,7 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase { const Real scaleFromManta, const int itype = FlagGrid::TypeFluid, const int jtype = FlagGrid::TypeObstacle | FlagGrid::TypeOutflow | - FlagGrid::TypeInflow) const + FlagGrid::TypeInflow) { if (!(flags(i, j, k) & itype)) @@ -253,19 +253,48 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase { return jtype; } typedef int type16; - void runMessage() - { - debMsg("Executing kernel knFlipComputeSecondaryParticlePotentials ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = radius; j < _maxY; j++) + for (int i = radius; i < _maxX; i++) + op(i, + j, + k, + potTA, + potWC, + potKE, + neighborRatio, + flags, + v, + normal, + radius, + tauMinTA, + tauMaxTA, + tauMinWC, + tauMaxWC, + tauMinKE, + tauMaxKE, + scaleFromManta, + itype, + jtype); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for for (int j = radius; j < _maxY; j++) for (int i = radius; i < _maxX; i++) op(i, @@ -288,40 +317,9 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase { scaleFromManta, itype, jtype); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = radius; i < _maxX; i++) - op(i, - j, - k, - potTA, - potWC, - potKE, - neighborRatio, - flags, - v, - normal, - radius, - tauMinTA, - tauMaxTA, - tauMinWC, - tauMaxWC, - tauMinKE, - tauMaxKE, - scaleFromManta, - itype, - jtype); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(radius, maxY), *this); - } Grid<Real> &potTA; Grid<Real> &potWC; Grid<Real> &potKE; @@ -670,13 +668,7 @@ struct knFlipSampleSecondaryParticlesMoreCylinders : public KernelBase { return rand; } typedef RandomStream type17; - void runMessage() - { - debMsg("Executing kernel knFlipSampleSecondaryParticlesMoreCylinders ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; + void runMessage(){}; void run() { const int _maxX = maxX; @@ -930,13 +922,7 @@ struct knFlipSampleSecondaryParticles : public KernelBase { return rand; } typedef RandomStream type17; - void runMessage() - { - debMsg("Executing kernel knFlipSampleSecondaryParticles ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; + void runMessage(){}; void run() { const int _maxX = maxX; @@ -1192,7 +1178,7 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase { const Real c_b, const Real dt, const int exclude, - const int antitunneling) const + const int antitunneling) { if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude) @@ -1342,36 +1328,32 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase { return antitunneling; } typedef int type14; - void runMessage() - { - debMsg("Executing kernel knFlipUpdateSecondaryParticlesLinear ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, - pts_sec, - v_sec, - l_sec, - f_sec, - flags, - v, - neighborRatio, - gravity, - k_b, - k_d, - c_s, - c_b, - dt, - exclude, - antitunneling); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, + pts_sec, + v_sec, + l_sec, + f_sec, + flags, + v, + neighborRatio, + gravity, + k_b, + k_d, + c_s, + c_b, + dt, + exclude, + antitunneling); + } } BasicParticleSystem &pts_sec; ParticleDataImpl<Vec3> &v_sec; @@ -1449,7 +1431,7 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase { const Real dt, const int exclude, const int antitunneling, - const int itype) const + const int itype) { if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude) @@ -1655,38 +1637,34 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase { return itype; } typedef int type16; - void runMessage() - { - debMsg("Executing kernel knFlipUpdateSecondaryParticlesCubic ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, - pts_sec, - v_sec, - l_sec, - f_sec, - flags, - v, - neighborRatio, - radius, - gravity, - k_b, - k_d, - c_s, - c_b, - dt, - exclude, - antitunneling, - itype); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, + pts_sec, + v_sec, + l_sec, + f_sec, + flags, + v, + neighborRatio, + radius, + gravity, + k_b, + k_d, + c_s, + c_b, + dt, + exclude, + antitunneling, + itype); + } } BasicParticleSystem &pts_sec; ParticleDataImpl<Vec3> &v_sec; @@ -1856,7 +1834,7 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags) const + inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags) { if (!pts.isActive(idx)) @@ -1885,21 +1863,17 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase { return flags; } typedef FlagGrid type1; - void runMessage() - { - debMsg("Executing kernel knFlipDeleteParticlesInObstacle ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, pts, flags); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, pts, flags); + } } BasicParticleSystem &pts; const FlagGrid &flags; @@ -2031,7 +2005,7 @@ struct knSetFlagsFromLevelset : public KernelBase { FlagGrid &flags, const Grid<Real> &phi, const int exclude = FlagGrid::TypeObstacle, - const int itype = FlagGrid::TypeFluid) const + const int itype = FlagGrid::TypeFluid) { if (phi(idx) < 0 && !(flags(idx) & exclude)) flags(idx) = itype; @@ -2056,21 +2030,17 @@ struct knSetFlagsFromLevelset : public KernelBase { return itype; } typedef int type3; - void runMessage() - { - debMsg("Executing kernel knSetFlagsFromLevelset ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, flags, phi, exclude, itype); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, flags, phi, exclude, itype); + } } FlagGrid &flags; const Grid<Real> φ @@ -2126,7 +2096,7 @@ struct knSetMACFromLevelset : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c) const + inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c) { if (phi.getInterpolated(Vec3(i, j, k)) > 0) v(i, j, k) = c; @@ -2146,37 +2116,35 @@ struct knSetMACFromLevelset : public KernelBase { return c; } typedef Vec3 type2; - void runMessage() - { - debMsg("Executing kernel knSetMACFromLevelset ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, v, phi, c); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, v, phi, c); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, v, phi, c); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, v, phi, c); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid &v; const Grid<Real> φ const Vec3 c; @@ -2268,7 +2236,7 @@ struct knFlipComputePotentialTrappedAir : public KernelBase { const Real tauMax, const Real scaleFromManta, const int itype = FlagGrid::TypeFluid, - const int jtype = FlagGrid::TypeFluid) const + const int jtype = FlagGrid::TypeFluid) { if (!(flags(i, j, k) & itype)) @@ -2342,37 +2310,35 @@ struct knFlipComputePotentialTrappedAir : public KernelBase { return jtype; } typedef int type8; - void runMessage() - { - debMsg("Executing kernel knFlipComputePotentialTrappedAir ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> &pot; const FlagGrid &flags; const MACGrid &v; @@ -2472,7 +2438,7 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase { const Real tauMin, const Real tauMax, const Real scaleFromManta, - const int itype = FlagGrid::TypeFluid) const + const int itype = FlagGrid::TypeFluid) { if (!(flags(i, j, k) & itype)) @@ -2520,37 +2486,35 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase { return itype; } typedef int type6; - void runMessage() - { - debMsg("Executing kernel knFlipComputePotentialKineticEnergy ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Real> &pot; const FlagGrid &flags; const MACGrid &v; @@ -2650,7 +2614,7 @@ struct knFlipComputePotentialWaveCrest : public KernelBase { const Real tauMax, const Real scaleFromManta, const int itype = FlagGrid::TypeFluid, - const int jtype = FlagGrid::TypeFluid) const + const int jtype = FlagGrid::TypeFluid) { if (!(flags(i, j, k) & itype)) @@ -2736,19 +2700,41 @@ struct knFlipComputePotentialWaveCrest : public KernelBase { return jtype; } typedef int type9; - void runMessage() - { - debMsg("Executing kernel knFlipComputePotentialWaveCrest ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, + j, + k, + pot, + flags, + v, + radius, + normal, + tauMin, + tauMax, + scaleFromManta, + itype, + jtype); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for for (int j = 1; j < _maxY; j++) for (int i = 1; i < _maxX; i++) op(i, @@ -2764,21 +2750,9 @@ struct knFlipComputePotentialWaveCrest : public KernelBase { scaleFromManta, itype, jtype); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, pot, flags, v, radius, normal, tauMin, tauMax, scaleFromManta, itype, jtype); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } Grid<Real> &pot; const FlagGrid &flags; const MACGrid &v; @@ -2860,7 +2834,7 @@ struct knFlipComputeSurfaceNormals : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi) const + inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi) { normal[idx] = getNormalized(normal[idx]); } @@ -2874,21 +2848,17 @@ struct knFlipComputeSurfaceNormals : public KernelBase { return phi; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel knFlipComputeSurfaceNormals ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, normal, phi); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, normal, phi); + } } Grid<Vec3> &normal; const Grid<Real> φ @@ -2958,7 +2928,7 @@ struct knFlipUpdateNeighborRatio : public KernelBase { Grid<Real> &neighborRatio, const int radius, const int itype = FlagGrid::TypeFluid, - const int jtype = FlagGrid::TypeObstacle) const + const int jtype = FlagGrid::TypeObstacle) { if (!(flags(i, j, k) & itype)) @@ -3008,37 +2978,35 @@ struct knFlipUpdateNeighborRatio : public KernelBase { return jtype; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel knFlipUpdateNeighborRatio ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, neighborRatio, radius, itype, jtype); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, neighborRatio, radius, itype, jtype); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, neighborRatio, radius, itype, jtype); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, neighborRatio, radius, itype, jtype); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<Real> &neighborRatio; const int radius; diff --git a/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp b/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp index e5aa09117ea..c4be7ab3ea5 100644 --- a/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp +++ b/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp @@ -569,7 +569,7 @@ struct advectSurfacePoints : public KernelBase { inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &coarseParticles, - const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos) const + const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos) { if (surfacePoints.isActive(idx)) { Vec3 avgDisplacement(0, 0, 0); @@ -606,21 +606,17 @@ struct advectSurfacePoints : public KernelBase { return coarseParticlesPrevPos; } typedef ParticleDataImplVec3Wrapper type2; - void runMessage() - { - debMsg("Executing kernel advectSurfacePoints ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, coarseParticles, coarseParticlesPrevPos); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, coarseParticles, coarseParticlesPrevPos); + } } BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &coarseParticles; @@ -673,7 +669,7 @@ struct computeSurfaceNormals : public KernelBase { inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &coarseParticles, - ParticleDataImpl<Vec3> &surfaceNormals) const + ParticleDataImpl<Vec3> &surfaceNormals) { Vec3 pos = surfacePoints.getPos(idx); @@ -743,21 +739,17 @@ struct computeSurfaceNormals : public KernelBase { return surfaceNormals; } typedef ParticleDataImpl<Vec3> type2; - void runMessage() - { - debMsg("Executing kernel computeSurfaceNormals ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, coarseParticles, surfaceNormals); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, coarseParticles, surfaceNormals); + } } const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &coarseParticles; @@ -780,7 +772,7 @@ struct computeAveragedNormals : public KernelBase { } inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, - const ParticleDataImpl<Vec3> &surfaceNormals) const + const ParticleDataImpl<Vec3> &surfaceNormals) { Vec3 pos = surfacePoints.getPos(idx); Vec3 newNormal = Vec3(0, 0, 0); @@ -800,21 +792,17 @@ struct computeAveragedNormals : public KernelBase { return surfaceNormals; } typedef ParticleDataImpl<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel computeAveragedNormals ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals); + } } const BasicParticleSystemWrapper &surfacePoints; const ParticleDataImpl<Vec3> &surfaceNormals; @@ -832,7 +820,7 @@ struct assignNormals : public KernelBase { } inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, - ParticleDataImpl<Vec3> &surfaceNormals) const + ParticleDataImpl<Vec3> &surfaceNormals) { surfaceNormals[idx] = tempSurfaceVec3[idx]; } @@ -846,21 +834,17 @@ struct assignNormals : public KernelBase { return surfaceNormals; } typedef ParticleDataImpl<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel assignNormals ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Vec3> &surfaceNormals; @@ -963,7 +947,7 @@ struct computeSurfaceDensities : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy) const + inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy) { Vec3 pos = surfacePoints.getPos(idx); Real density = 0; @@ -984,21 +968,17 @@ struct computeSurfaceDensities : public KernelBase { return dummy; } typedef void type1; - void runMessage() - { - debMsg("Executing kernel computeSurfaceDensities ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, dummy); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, dummy); + } } const BasicParticleSystemWrapper &surfacePoints; void *dummy; @@ -1016,7 +996,7 @@ struct computeSurfaceDisplacements : public KernelBase { } inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, - const ParticleDataImpl<Vec3> &surfaceNormals) const + const ParticleDataImpl<Vec3> &surfaceNormals) { Vec3 pos = surfacePoints.getPos(idx); Vec3 normal = surfaceNormals[idx]; @@ -1068,21 +1048,17 @@ struct computeSurfaceDisplacements : public KernelBase { return surfaceNormals; } typedef ParticleDataImpl<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel computeSurfaceDisplacements ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals); + } } const BasicParticleSystemWrapper &surfacePoints; const ParticleDataImpl<Vec3> &surfaceNormals; @@ -1095,7 +1071,7 @@ struct applySurfaceDisplacements : public KernelBase { runMessage(); run(); } - inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy) const + inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy) { surfacePoints.setPos(idx, surfacePoints.getPos(idx) + tempSurfaceVec3[idx]); } @@ -1109,21 +1085,17 @@ struct applySurfaceDisplacements : public KernelBase { return dummy; } typedef void type1; - void runMessage() - { - debMsg("Executing kernel applySurfaceDisplacements ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, dummy); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, dummy); + } } BasicParticleSystemWrapper &surfacePoints; void *dummy; @@ -1152,7 +1124,7 @@ struct constrainSurface : public KernelBase { } inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, - const BasicParticleSystemWrapper &coarseParticles) const + const BasicParticleSystemWrapper &coarseParticles) { Vec3 pos = surfacePoints.getPos(idx); Real level = computeConstraintLevel(coarseParticles, surfacePoints.getPos(idx)); @@ -1179,21 +1151,17 @@ struct constrainSurface : public KernelBase { return coarseParticles; } typedef BasicParticleSystemWrapper type1; - void runMessage() - { - debMsg("Executing kernel constrainSurface ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, coarseParticles); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, coarseParticles); + } } BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &coarseParticles; @@ -1220,7 +1188,7 @@ struct interpolateNewWaveData : public KernelBase { ParticleDataImpl<Real> &surfaceWaveH, ParticleDataImpl<Real> &surfaceWaveDtH, ParticleDataImpl<Real> &surfaceWaveSeed, - ParticleDataImpl<Real> &surfaceWaveSeedAmplitude) const + ParticleDataImpl<Real> &surfaceWaveSeedAmplitude) { if (surfacePoints.getStatus(idx) & ParticleBase::PNEW) { Vec3 pos = surfacePoints.getPos(idx); @@ -1270,26 +1238,22 @@ struct interpolateNewWaveData : public KernelBase { return surfaceWaveSeedAmplitude; } typedef ParticleDataImpl<Real> type4; - void runMessage() - { - debMsg("Executing kernel interpolateNewWaveData ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, - surfacePoints, - surfaceWaveH, - surfaceWaveDtH, - surfaceWaveSeed, - surfaceWaveSeedAmplitude); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, + surfacePoints, + surfaceWaveH, + surfaceWaveDtH, + surfaceWaveSeed, + surfaceWaveSeedAmplitude); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Real> &surfaceWaveH; @@ -1345,7 +1309,7 @@ struct addSeed : public KernelBase { inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, ParticleDataImpl<Real> &surfaceWaveH, - const ParticleDataImpl<Real> &surfaceWaveSeed) const + const ParticleDataImpl<Real> &surfaceWaveSeed) { surfaceWaveH[idx] += surfaceWaveSeed[idx]; } @@ -1364,21 +1328,17 @@ struct addSeed : public KernelBase { return surfaceWaveSeed; } typedef ParticleDataImpl<Real> type2; - void runMessage() - { - debMsg("Executing kernel addSeed ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceWaveH, surfaceWaveSeed); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceWaveH, surfaceWaveSeed); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Real> &surfaceWaveH; @@ -1400,7 +1360,7 @@ struct computeSurfaceWaveNormal : public KernelBase { inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, const ParticleDataImpl<Vec3> &surfaceNormals, - const ParticleDataImpl<Real> &surfaceWaveH) const + const ParticleDataImpl<Real> &surfaceWaveH) { Vec3 pos = surfacePoints.getPos(idx); @@ -1464,21 +1424,17 @@ struct computeSurfaceWaveNormal : public KernelBase { return surfaceWaveH; } typedef ParticleDataImpl<Real> type2; - void runMessage() - { - debMsg("Executing kernel computeSurfaceWaveNormal ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals, surfaceWaveH); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals, surfaceWaveH); + } } const BasicParticleSystemWrapper &surfacePoints; const ParticleDataImpl<Vec3> &surfaceNormals; @@ -1500,7 +1456,7 @@ struct computeSurfaceWaveLaplacians : public KernelBase { inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, const ParticleDataImpl<Vec3> &surfaceNormals, - const ParticleDataImpl<Real> &surfaceWaveH) const + const ParticleDataImpl<Real> &surfaceWaveH) { Real laplacian = 0; Real wTotal = 0; @@ -1561,21 +1517,17 @@ struct computeSurfaceWaveLaplacians : public KernelBase { return surfaceWaveH; } typedef ParticleDataImpl<Real> type2; - void runMessage() - { - debMsg("Executing kernel computeSurfaceWaveLaplacians ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals, surfaceWaveH); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals, surfaceWaveH); + } } const BasicParticleSystemWrapper &surfacePoints; const ParticleDataImpl<Vec3> &surfaceNormals; @@ -1600,7 +1552,7 @@ struct evolveWave : public KernelBase { const BasicParticleSystemWrapper &surfacePoints, ParticleDataImpl<Real> &surfaceWaveH, ParticleDataImpl<Real> &surfaceWaveDtH, - const ParticleDataImpl<Real> &surfaceWaveSeed) const + const ParticleDataImpl<Real> &surfaceWaveSeed) { surfaceWaveDtH[idx] += params.waveSpeed * params.waveSpeed * params.dt * tempSurfaceFloat[idx]; surfaceWaveDtH[idx] /= (1 + params.dt * params.waveDamping); @@ -1635,21 +1587,17 @@ struct evolveWave : public KernelBase { return surfaceWaveSeed; } typedef ParticleDataImpl<Real> type3; - void runMessage() - { - debMsg("Executing kernel evolveWave ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Real> &surfaceWaveH; @@ -1669,7 +1617,7 @@ struct computeSurfaceCurvature : public KernelBase { } inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, - const ParticleDataImpl<Vec3> &surfaceNormals) const + const ParticleDataImpl<Vec3> &surfaceNormals) { Vec3 pPos = surfacePoints.getPos(idx); Real wTotal = 0; @@ -1710,21 +1658,17 @@ struct computeSurfaceCurvature : public KernelBase { return surfaceNormals; } typedef ParticleDataImpl<Vec3> type1; - void runMessage() - { - debMsg("Executing kernel computeSurfaceCurvature ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceNormals); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceNormals); + } } const BasicParticleSystemWrapper &surfacePoints; const ParticleDataImpl<Vec3> &surfaceNormals; @@ -1742,7 +1686,7 @@ struct smoothCurvature : public KernelBase { } inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, - ParticleDataImpl<Real> &surfaceWaveSource) const + ParticleDataImpl<Real> &surfaceWaveSource) { Vec3 pPos = surfacePoints.getPos(idx); Real curv = 0; @@ -1768,21 +1712,17 @@ struct smoothCurvature : public KernelBase { return surfaceWaveSource; } typedef ParticleDataImpl<Real> type1; - void runMessage() - { - debMsg("Executing kernel smoothCurvature ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceWaveSource); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceWaveSource); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Real> &surfaceWaveSource; @@ -1806,7 +1746,7 @@ struct seedWaves : public KernelBase { const BasicParticleSystemWrapper &surfacePoints, ParticleDataImpl<Real> &surfaceWaveSeed, ParticleDataImpl<Real> &surfaceWaveSeedAmplitude, - ParticleDataImpl<Real> &surfaceWaveSource) const + ParticleDataImpl<Real> &surfaceWaveSource) { Real source = smoothstep(params.waveSeedingCurvatureThresholdRegionCenter - params.waveSeedingCurvatureThresholdRegionRadius, @@ -1850,21 +1790,17 @@ struct seedWaves : public KernelBase { return surfaceWaveSource; } typedef ParticleDataImpl<Real> type3; - void runMessage() - { - debMsg("Executing kernel seedWaves ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource); + } } const BasicParticleSystemWrapper &surfacePoints; ParticleDataImpl<Real> &surfaceWaveSeed; diff --git a/extern/mantaflow/preprocessed/plugin/viscosity.cpp b/extern/mantaflow/preprocessed/plugin/viscosity.cpp deleted file mode 100644 index a9e1985336e..00000000000 --- a/extern/mantaflow/preprocessed/plugin/viscosity.cpp +++ /dev/null @@ -1,1428 +0,0 @@ - - -// DO NOT EDIT ! -// This file is generated using the MantaFlow preprocessor (prep generate). - -/****************************************************************************** - * - * MantaFlow fluid solver framework - * Copyright 2020 Sebastian Barschkis, Nils Thuerey - * - * This program is free software, distributed under the terms of the - * Apache License, Version 2.0 - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Accurate Viscous Free Surfaces for Buckling, Coiling, and Rotating Liquids - * Batty et al., SCA 2008 - * - ******************************************************************************/ - -#include "conjugategrad.h" -#include "general.h" -#include "grid.h" -#include "vectorbase.h" - -#include <chrono> - -#if OPENMP == 1 || TBB == 1 -# define ENABLE_PARALLEL 0 -#endif - -#if ENABLE_PARALLEL == 1 -# include <thread> -# include <algorithm> - -static const int manta_num_threads = std::thread::hardware_concurrency(); - -# define parallel_block \ - { \ - std::vector<std::thread> threads; \ - { - -# define do_parallel threads.push_back( std::thread([&]() { -# define do_end \ - } ) ); - -# define block_end \ - } \ - for (auto &thread : threads) { \ - thread.join(); \ - } \ - } - -#endif - -#define FOR_INT_IJK(num) \ - for (int k_off = 0; k_off < num; ++k_off) \ - for (int j_off = 0; j_off < num; ++j_off) \ - for (int i_off = 0; i_off < num; ++i_off) - -using namespace std; - -namespace Manta { - -//! Assumes phi0<0 and phi1>=0, phi2>=0, and phi3>=0 or vice versa. -//! In particular, phi0 must not equal any of phi1, phi2 or phi3. -static Real sortedTetFraction(Real phi0, Real phi1, Real phi2, Real phi3) -{ - return phi0 * phi0 * phi0 / ((phi0 - phi1) * (phi0 - phi2) * (phi0 - phi3)); -} - -//! Assumes phi0<0, phi1<0, and phi2>=0, and phi3>=0 or vice versa. -//! In particular, phi0 and phi1 must not equal any of phi2 and phi3. -static Real sortedPrismFraction(Real phi0, Real phi1, Real phi2, Real phi3) -{ - Real a = phi0 / (phi0 - phi2); - Real b = phi0 / (phi0 - phi3); - Real c = phi1 / (phi1 - phi3); - Real d = phi1 / (phi1 - phi2); - return a * b * (1 - d) + b * (1 - c) * d + c * d; -} - -Real volumeFraction(Real phi0, Real phi1, Real phi2, Real phi3) -{ - sort(phi0, phi1, phi2, phi3); - if (phi3 <= 0) - return 1; - else if (phi2 <= 0) - return 1 - sortedTetFraction(phi3, phi2, phi1, phi0); - else if (phi1 <= 0) - return sortedPrismFraction(phi0, phi1, phi2, phi3); - else if (phi0 <= 0) - return sortedTetFraction(phi0, phi1, phi2, phi3); - else - return 0; -} - -//! The average of the two possible decompositions of the cube into five tetrahedra. -Real volumeFraction(Real phi000, - Real phi100, - Real phi010, - Real phi110, - Real phi001, - Real phi101, - Real phi011, - Real phi111) -{ - return (volumeFraction(phi000, phi001, phi101, phi011) + - volumeFraction(phi000, phi101, phi100, phi110) + - volumeFraction(phi000, phi010, phi011, phi110) + - volumeFraction(phi101, phi011, phi111, phi110) + - 2 * volumeFraction(phi000, phi011, phi101, phi110) + - volumeFraction(phi100, phi101, phi001, phi111) + - volumeFraction(phi100, phi001, phi000, phi010) + - volumeFraction(phi100, phi110, phi111, phi010) + - volumeFraction(phi001, phi111, phi011, phi010) + - 2 * volumeFraction(phi100, phi111, phi001, phi010)) / - 12; -} - -//! Kernel loop over grid with 2x base resolution! - -struct KnEstimateVolumeFraction : public KernelBase { - KnEstimateVolumeFraction(Grid<Real> &volumes, - const Grid<Real> &phi, - const Vec3 &startCentre, - const Real dx) - : KernelBase(&volumes, 0), volumes(volumes), phi(phi), startCentre(startCentre), dx(dx) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - Grid<Real> &volumes, - const Grid<Real> &phi, - const Vec3 &startCentre, - const Real dx) const - { - const Vec3 centre = startCentre + Vec3(i, j, k) * 0.5; - const Real offset = 0.5 * dx; - const int order = 1; // is sufficient - - Real phi000 = phi.getInterpolatedHi(centre + Vec3(-offset, -offset, -offset), order); - Real phi001 = phi.getInterpolatedHi(centre + Vec3(-offset, -offset, +offset), order); - Real phi010 = phi.getInterpolatedHi(centre + Vec3(-offset, +offset, -offset), order); - Real phi011 = phi.getInterpolatedHi(centre + Vec3(-offset, +offset, +offset), order); - Real phi100 = phi.getInterpolatedHi(centre + Vec3(+offset, -offset, -offset), order); - Real phi101 = phi.getInterpolatedHi(centre + Vec3(+offset, -offset, +offset), order); - Real phi110 = phi.getInterpolatedHi(centre + Vec3(+offset, +offset, -offset), order); - Real phi111 = phi.getInterpolatedHi(centre + Vec3(+offset, +offset, +offset), order); - - volumes(i, j, k) = volumeFraction( - phi000, phi100, phi010, phi110, phi001, phi101, phi011, phi111); - } - inline Grid<Real> &getArg0() - { - return volumes; - } - typedef Grid<Real> type0; - inline const Grid<Real> &getArg1() - { - return phi; - } - typedef Grid<Real> type1; - inline const Vec3 &getArg2() - { - return startCentre; - } - typedef Vec3 type2; - inline const Real &getArg3() - { - return dx; - } - typedef Real type3; - void runMessage() - { - debMsg("Executing kernel KnEstimateVolumeFraction ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, volumes, phi, startCentre, dx); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, volumes, phi, startCentre, dx); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } - Grid<Real> &volumes; - const Grid<Real> φ - const Vec3 &startCentre; - const Real dx; -}; - -struct KnUpdateVolumeGrid : public KernelBase { - KnUpdateVolumeGrid(Grid<Real> &cVolLiquid, - Grid<Real> &uVolLiquid, - Grid<Real> &vVolLiquid, - Grid<Real> &wVolLiquid, - Grid<Real> &exVolLiquid, - Grid<Real> &eyVolLiquid, - Grid<Real> &ezVolLiquid, - const Grid<Real> &src) - : KernelBase(&cVolLiquid, 0), - cVolLiquid(cVolLiquid), - uVolLiquid(uVolLiquid), - vVolLiquid(vVolLiquid), - wVolLiquid(wVolLiquid), - exVolLiquid(exVolLiquid), - eyVolLiquid(eyVolLiquid), - ezVolLiquid(ezVolLiquid), - src(src) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - Grid<Real> &cVolLiquid, - Grid<Real> &uVolLiquid, - Grid<Real> &vVolLiquid, - Grid<Real> &wVolLiquid, - Grid<Real> &exVolLiquid, - Grid<Real> &eyVolLiquid, - Grid<Real> &ezVolLiquid, - const Grid<Real> &src) const - { - // Work out c - cVolLiquid(i, j, k) = 0; - FOR_INT_IJK(2) - { - cVolLiquid(i, j, k) += src(2 * i + i_off, 2 * j + j_off, 2 * k + k_off); - } - cVolLiquid(i, j, k) /= 8; - - // Work out u - if (i >= 1) { - uVolLiquid(i, j, k) = 0; - int base_i = 2 * i - 1; - int base_j = 2 * j; - int base_k = 2 * k; - FOR_INT_IJK(2) - { - uVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - uVolLiquid(i, j, k) /= 8; - } - - // v - if (j >= 1) { - vVolLiquid(i, j, k) = 0; - int base_i = 2 * i; - int base_j = 2 * j - 1; - int base_k = 2 * k; - FOR_INT_IJK(2) - { - vVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - vVolLiquid(i, j, k) /= 8; - } - - // w - if (k >= 1) { - wVolLiquid(i, j, k) = 0; - int base_i = 2 * i; - int base_j = 2 * j; - int base_k = 2 * k - 1; - FOR_INT_IJK(2) - { - wVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - wVolLiquid(i, j, k) /= 8; - } - - // e-x - if (j >= 1 && k >= 1) { - exVolLiquid(i, j, k) = 0; - int base_i = 2 * i; - int base_j = 2 * j - 1; - int base_k = 2 * k - 1; - FOR_INT_IJK(2) - { - exVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - exVolLiquid(i, j, k) /= 8; - } - - // e-y - if (i >= 1 && k >= 1) { - eyVolLiquid(i, j, k) = 0; - int base_i = 2 * i - 1; - int base_j = 2 * j; - int base_k = 2 * k - 1; - FOR_INT_IJK(2) - { - eyVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - eyVolLiquid(i, j, k) /= 8; - } - - // e-z - if (i >= 1 && j >= 1) { - ezVolLiquid(i, j, k) = 0; - int base_i = 2 * i - 1; - int base_j = 2 * j - 1; - int base_k = 2 * k; - FOR_INT_IJK(2) - { - ezVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off); - } - ezVolLiquid(i, j, k) /= 8; - } - } - inline Grid<Real> &getArg0() - { - return cVolLiquid; - } - typedef Grid<Real> type0; - inline Grid<Real> &getArg1() - { - return uVolLiquid; - } - typedef Grid<Real> type1; - inline Grid<Real> &getArg2() - { - return vVolLiquid; - } - typedef Grid<Real> type2; - inline Grid<Real> &getArg3() - { - return wVolLiquid; - } - typedef Grid<Real> type3; - inline Grid<Real> &getArg4() - { - return exVolLiquid; - } - typedef Grid<Real> type4; - inline Grid<Real> &getArg5() - { - return eyVolLiquid; - } - typedef Grid<Real> type5; - inline Grid<Real> &getArg6() - { - return ezVolLiquid; - } - typedef Grid<Real> type6; - inline const Grid<Real> &getArg7() - { - return src; - } - typedef Grid<Real> type7; - void runMessage() - { - debMsg("Executing kernel KnUpdateVolumeGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, - j, - k, - cVolLiquid, - uVolLiquid, - vVolLiquid, - wVolLiquid, - exVolLiquid, - eyVolLiquid, - ezVolLiquid, - src); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, - j, - k, - cVolLiquid, - uVolLiquid, - vVolLiquid, - wVolLiquid, - exVolLiquid, - eyVolLiquid, - ezVolLiquid, - src); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } - Grid<Real> &cVolLiquid; - Grid<Real> &uVolLiquid; - Grid<Real> &vVolLiquid; - Grid<Real> &wVolLiquid; - Grid<Real> &exVolLiquid; - Grid<Real> &eyVolLiquid; - Grid<Real> &ezVolLiquid; - const Grid<Real> &src; -}; - -void computeWeights(const Grid<Real> &phi, - Grid<Real> &doubleSized, - Grid<Real> &cVolLiquid, - Grid<Real> &uVolLiquid, - Grid<Real> &vVolLiquid, - Grid<Real> &wVolLiquid, - Grid<Real> &exVolLiquid, - Grid<Real> &eyVolLiquid, - Grid<Real> &ezVolLiquid, - Real dx) -{ - KnEstimateVolumeFraction(doubleSized, phi, Vec3(0.25 * dx, 0.25 * dx, 0.25 * dx), 0.5 * dx); - KnUpdateVolumeGrid(cVolLiquid, - uVolLiquid, - vVolLiquid, - wVolLiquid, - exVolLiquid, - eyVolLiquid, - ezVolLiquid, - doubleSized); -} - -struct KnUpdateFaceStates : public KernelBase { - KnUpdateFaceStates(const FlagGrid &flags, - Grid<int> &uState, - Grid<int> &vState, - Grid<int> &wState) - : KernelBase(&flags, 0), flags(flags), uState(uState), vState(vState), wState(wState) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - const FlagGrid &flags, - Grid<int> &uState, - Grid<int> &vState, - Grid<int> &wState) const - { - bool curObs = flags.isObstacle(i, j, k); - uState(i, j, k) = (i > 0 && !flags.isObstacle(i - 1, j, k) && !curObs) ? - FlagGrid::TypeFluid : - FlagGrid::TypeObstacle; - vState(i, j, k) = (j > 0 && !flags.isObstacle(i, j - 1, k) && !curObs) ? - FlagGrid::TypeFluid : - FlagGrid::TypeObstacle; - wState(i, j, k) = (k > 0 && !flags.isObstacle(i, j, k - 1) && !curObs) ? - FlagGrid::TypeFluid : - FlagGrid::TypeObstacle; - } - inline const FlagGrid &getArg0() - { - return flags; - } - typedef FlagGrid type0; - inline Grid<int> &getArg1() - { - return uState; - } - typedef Grid<int> type1; - inline Grid<int> &getArg2() - { - return vState; - } - typedef Grid<int> type2; - inline Grid<int> &getArg3() - { - return wState; - } - typedef Grid<int> type3; - void runMessage() - { - debMsg("Executing kernel KnUpdateFaceStates ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, uState, vState, wState); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, uState, vState, wState); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } - const FlagGrid &flags; - Grid<int> &uState; - Grid<int> &vState; - Grid<int> &wState; -}; - -struct KnApplyVelocities : public KernelBase { - KnApplyVelocities(MACGrid &dst, - const Grid<int> &uState, - const Grid<int> &vState, - const Grid<int> &wState, - Grid<Real> &srcU, - Grid<Real> &srcV, - Grid<Real> &srcW) - : KernelBase(&dst, 0), - dst(dst), - uState(uState), - vState(vState), - wState(wState), - srcU(srcU), - srcV(srcV), - srcW(srcW) - { - runMessage(); - run(); - } - inline void op(int i, - int j, - int k, - MACGrid &dst, - const Grid<int> &uState, - const Grid<int> &vState, - const Grid<int> &wState, - Grid<Real> &srcU, - Grid<Real> &srcV, - Grid<Real> &srcW) const - { - dst(i, j, k).x = (uState(i, j, k) == FlagGrid::TypeFluid) ? srcU(i, j, k) : 0; - dst(i, j, k).y = (vState(i, j, k) == FlagGrid::TypeFluid) ? srcV(i, j, k) : 0; - if (dst.is3D()) - dst(i, j, k).z = (wState(i, j, k) == FlagGrid::TypeFluid) ? srcW(i, j, k) : 0; - } - inline MACGrid &getArg0() - { - return dst; - } - typedef MACGrid type0; - inline const Grid<int> &getArg1() - { - return uState; - } - typedef Grid<int> type1; - inline const Grid<int> &getArg2() - { - return vState; - } - typedef Grid<int> type2; - inline const Grid<int> &getArg3() - { - return wState; - } - typedef Grid<int> type3; - inline Grid<Real> &getArg4() - { - return srcU; - } - typedef Grid<Real> type4; - inline Grid<Real> &getArg5() - { - return srcV; - } - typedef Grid<Real> type5; - inline Grid<Real> &getArg6() - { - return srcW; - } - typedef Grid<Real> type6; - void runMessage() - { - debMsg("Executing kernel KnApplyVelocities ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - const int _maxX = maxX; - const int _maxY = maxY; - if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, dst, uState, vState, wState, srcU, srcV, srcW); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, dst, uState, vState, wState, srcU, srcV, srcW); - } - } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } - MACGrid &dst; - const Grid<int> &uState; - const Grid<int> &vState; - const Grid<int> &wState; - Grid<Real> &srcU; - Grid<Real> &srcV; - Grid<Real> &srcW; -}; - -void solveViscosity(const FlagGrid &flags, - MACGrid &vel, - Grid<Real> &cVolLiquid, - Grid<Real> &uVolLiquid, - Grid<Real> &vVolLiquid, - Grid<Real> &wVolLiquid, - Grid<Real> &exVolLiquid, - Grid<Real> &eyVolLiquid, - Grid<Real> &ezVolLiquid, - Grid<Real> &viscosity, - const Real dt, - const Real dx, - const Real cgAccuracy, - const Real cgMaxIterFac) -{ - const Real factor = dt * square(1.0 / dx); - const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); - GridCg<ApplyMatrixViscosityU> *uGcg; - GridCg<ApplyMatrixViscosityV> *vGcg; - GridCg<ApplyMatrixViscosityW> *wGcg; - - // Tmp grids for CG solve in U, V, W dimensions - FluidSolver *parent = flags.getParent(); - Grid<Real> uResidual(parent); - Grid<Real> vResidual(parent); - Grid<Real> wResidual(parent); - Grid<Real> uSearch(parent); - Grid<Real> vSearch(parent); - Grid<Real> wSearch(parent); - Grid<Real> uTmp(parent); - Grid<Real> vTmp(parent); - Grid<Real> wTmp(parent); - Grid<Real> uRhs(parent); - Grid<Real> vRhs(parent); - Grid<Real> wRhs(parent); - - // A matrix U grids - Grid<Real> uA0(parent); // diagonal elements in A matrix - Grid<Real> uAplusi(parent); // neighbor at i+1 - Grid<Real> uAplusj(parent); // neighbor at j+1 - Grid<Real> uAplusk(parent); // neighbor at k+1 - Grid<Real> uAminusi(parent); // neighbor at i-1 - Grid<Real> uAminusj(parent); // neighbor at j-1 - Grid<Real> uAminusk(parent); // neighbor at k-1 - Grid<Real> uAhelper1(parent); // additional helper grids for off diagonal elements - Grid<Real> uAhelper2(parent); - Grid<Real> uAhelper3(parent); - Grid<Real> uAhelper4(parent); - Grid<Real> uAhelper5(parent); - Grid<Real> uAhelper6(parent); - Grid<Real> uAhelper7(parent); - Grid<Real> uAhelper8(parent); - - // A matrix V grids - Grid<Real> vA0(parent); - Grid<Real> vAplusi(parent); - Grid<Real> vAplusj(parent); - Grid<Real> vAplusk(parent); - Grid<Real> vAminusi(parent); - Grid<Real> vAminusj(parent); - Grid<Real> vAminusk(parent); - Grid<Real> vAhelper1(parent); - Grid<Real> vAhelper2(parent); - Grid<Real> vAhelper3(parent); - Grid<Real> vAhelper4(parent); - Grid<Real> vAhelper5(parent); - Grid<Real> vAhelper6(parent); - Grid<Real> vAhelper7(parent); - Grid<Real> vAhelper8(parent); - - // A matrix W grids - Grid<Real> wA0(parent); - Grid<Real> wAplusi(parent); - Grid<Real> wAplusj(parent); - Grid<Real> wAplusk(parent); - Grid<Real> wAminusi(parent); - Grid<Real> wAminusj(parent); - Grid<Real> wAminusk(parent); - Grid<Real> wAhelper1(parent); - Grid<Real> wAhelper2(parent); - Grid<Real> wAhelper3(parent); - Grid<Real> wAhelper4(parent); - Grid<Real> wAhelper5(parent); - Grid<Real> wAhelper6(parent); - Grid<Real> wAhelper7(parent); - Grid<Real> wAhelper8(parent); - - // Solution grids for CG solvers - Grid<Real> uSolution(parent); - Grid<Real> vSolution(parent); - Grid<Real> wSolution(parent); - - // Save state of voxel face (fluid or obstacle) - Grid<int> uState(parent); - Grid<int> vState(parent); - Grid<int> wState(parent); - - // Save state of voxel face (fluid or obstacle) - KnUpdateFaceStates(flags, uState, vState, wState); - - // Shorter names for flags, we will use them often - int isFluid = FlagGrid::TypeFluid; - int isObstacle = FlagGrid::TypeObstacle; - - // Main viscosity loop: construct A matrices and rhs's in all dimensions - FOR_IJK_BND(flags, 1) - { - - // U-terms: 2u_xx+ v_xy +uyy + u_zz + w_xz - if (uState(i, j, k) == isFluid) { - - uRhs(i, j, k) = uVolLiquid(i, j, k) * vel(i, j, k).x; - uA0(i, j, k) = uVolLiquid(i, j, k); - - Real viscRight = viscosity(i, j, k); - Real viscLeft = viscosity(i - 1, j, k); - Real volRight = cVolLiquid(i, j, k); - Real volLeft = cVolLiquid(i - 1, j, k); - - Real viscTop = 0.25 * (viscosity(i - 1, j + 1, k) + viscosity(i - 1, j, k) + - viscosity(i, j + 1, k) + viscosity(i, j, k)); - Real viscBottom = 0.25 * (viscosity(i - 1, j, k) + viscosity(i - 1, j - 1, k) + - viscosity(i, j, k) + viscosity(i, j - 1, k)); - Real volTop = ezVolLiquid(i, j + 1, k); - Real volBottom = ezVolLiquid(i, j, k); - - Real viscFront = 0.25 * (viscosity(i - 1, j, k + 1) + viscosity(i - 1, j, k) + - viscosity(i, j, k + 1) + viscosity(i, j, k)); - Real viscBack = 0.25 * (viscosity(i - 1, j, k) + viscosity(i - 1, j, k - 1) + - viscosity(i, j, k) + viscosity(i, j, k - 1)); - Real volFront = eyVolLiquid(i, j, k + 1); - Real volBack = eyVolLiquid(i, j, k); - - Real factorRight = 2 * factor * viscRight * volRight; - Real factorLeft = 2 * factor * viscLeft * volLeft; - Real factorTop = factor * viscTop * volTop; - Real factorBottom = factor * viscBottom * volBottom; - Real factorFront = factor * viscFront * volFront; - Real factorBack = factor * viscBack * volBack; - - // u_x_right - uA0(i, j, k) += factorRight; - if (uState(i + 1, j, k) == isFluid) { - uAplusi(i, j, k) += -factorRight; - } - else if (uState(i + 1, j, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight; - } - - // u_x_left - uA0(i, j, k) += factorLeft; - if (uState(i - 1, j, k) == isFluid) { - uAminusi(i, j, k) += -factorLeft; - } - else if (uState(i - 1, j, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i - 1, j, k).x * factorLeft; - } - - // u_y_top - uA0(i, j, k) += factorTop; - if (uState(i, j + 1, k) == isFluid) { - uAplusj(i, j, k) += -factorTop; - } - else if (uState(i, j + 1, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j + 1, k).x * factorTop; - } - - // u_y_bottom - uA0(i, j, k) += factorBottom; - if (uState(i, j - 1, k) == isFluid) { - uAminusj(i, j, k) += -factorBottom; - } - else if (uState(i, j - 1, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j - 1, k).x * factorBottom; - } - - // u_z_front - uA0(i, j, k) += factorFront; - if (uState(i, j, k + 1) == isFluid) { - uAplusk(i, j, k) += -factorFront; - } - else if (uState(i, j, k + 1) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j, k + 1).x * factorFront; - } - - // u_z_back - uA0(i, j, k) += factorBack; - if (uState(i, j, k - 1) == isFluid) { - uAminusk(i, j, k) += -factorBack; - } - else if (uState(i, j, k - 1) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j, k - 1).x * factorBack; - } - - // v_x_top - if (vState(i, j + 1, k) == isFluid) { - uAhelper1(i, j, k) += -factorTop; - } - else if (vState(i, j + 1, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop; - } - - if (vState(i - 1, j + 1, k) == isFluid) { - uAhelper2(i, j, k) += factorTop; - } - else if (vState(i - 1, j + 1, k) == isObstacle) { - uRhs(i, j, k) -= vel(i - 1, j + 1, k).y * factorTop; - } - - // v_x_bottom - if (vState(i, j, k) == isFluid) { - uAhelper3(i, j, k) += factorBottom; - } - else if (vState(i, j, k) == isObstacle) { - uRhs(i, j, k) -= vel(i, j, k).y * factorBottom; - } - - if (vState(i - 1, j, k) == isFluid) { - uAhelper4(i, j, k) += -factorBottom; - } - else if (vState(i - 1, j, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i - 1, j, k).y * factorBottom; - } - - // w_x_front - if (wState(i, j, k + 1) == isFluid) { - uAhelper5(i, j, k) += -factorFront; - } - else if (wState(i, j, k + 1) == isObstacle) { - uRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront; - } - - if (wState(i - 1, j, k + 1) == isFluid) { - uAhelper6(i, j, k) += factorFront; - } - else if (wState(i - 1, j, k + 1) == isObstacle) { - uRhs(i, j, k) -= vel(i - 1, j, k + 1).z * factorFront; - } - - // w_x_back - if (wState(i, j, k) == isFluid) { - uAhelper7(i, j, k) += factorBack; - } - else if (wState(i, j, k) == isObstacle) { - uRhs(i, j, k) -= vel(i, j, k).z * factorBack; - } - - if (wState(i - 1, j, k) == isFluid) { - uAhelper8(i, j, k) += -factorBack; - } - else if (wState(i - 1, j, k) == isObstacle) { - uRhs(i, j, k) -= -vel(i - 1, j, k).z * factorBack; - } - } - - // V-terms: vxx + 2vyy + vzz + u_yx + w_yz - if (vState(i, j, k) == isFluid) { - - vRhs(i, j, k) = vVolLiquid(i, j, k) * vel(i, j, k).y; - vA0(i, j, k) = vVolLiquid(i, j, k); - - Real viscRight = 0.25 * (viscosity(i, j - 1, k) + viscosity(i + 1, j - 1, k) + - viscosity(i, j, k) + viscosity(i + 1, j, k)); - Real viscLeft = 0.25 * (viscosity(i, j - 1, k) + viscosity(i - 1, j - 1, k) + - viscosity(i, j, k) + viscosity(i - 1, j, k)); - Real volRight = ezVolLiquid(i + 1, j, k); - Real volLeft = ezVolLiquid(i, j, k); - - Real viscTop = viscosity(i, j, k); - Real viscBottom = viscosity(i, j - 1, k); - Real volTop = cVolLiquid(i, j, k); - Real volBottom = cVolLiquid(i, j - 1, k); - - Real viscFront = 0.25 * (viscosity(i, j - 1, k) + viscosity(i, j - 1, k + 1) + - viscosity(i, j, k) + viscosity(i, j, k + 1)); - Real viscBack = 0.25 * (viscosity(i, j - 1, k) + viscosity(i, j - 1, k - 1) + - viscosity(i, j, k) + viscosity(i, j, k - 1)); - Real volFront = exVolLiquid(i, j, k + 1); - Real volBack = exVolLiquid(i, j, k); - - Real factorRight = factor * viscRight * volRight; - Real factorLeft = factor * viscLeft * volLeft; - Real factorTop = 2 * factor * viscTop * volTop; - Real factorBottom = 2 * factor * viscBottom * volBottom; - Real factorFront = factor * viscFront * volFront; - Real factorBack = factor * viscBack * volBack; - - // v_x_right - vA0(i, j, k) += factorRight; - if (vState(i + 1, j, k) == isFluid) { - vAplusi(i, j, k) += -factorRight; - } - else if (vState(i + 1, j, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i + 1, j, k).y * factorRight; - } - - // v_x_left - vA0(i, j, k) += factorLeft; - if (vState(i - 1, j, k) == isFluid) { - vAminusi(i, j, k) += -factorLeft; - } - else if (vState(i - 1, j, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i - 1, j, k).y * factorLeft; - } - - // vy_top - vA0(i, j, k) += factorTop; - if (vState(i, j + 1, k) == isFluid) { - vAplusj(i, j, k) += -factorTop; - } - else if (vState(i, j + 1, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop; - } - - // vy_bottom - vA0(i, j, k) += factorBottom; - if (vState(i, j - 1, k) == isFluid) { - vAminusj(i, j, k) += -factorBottom; - } - else if (vState(i, j - 1, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j - 1, k).y * factorBottom; - } - - // v_z_front - vA0(i, j, k) += factorFront; - if (vState(i, j, k + 1) == isFluid) { - vAplusk(i, j, k) += -factorFront; - } - else if (vState(i, j, k + 1) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j, k + 1).y * factorFront; - } - - // v_z_back - vA0(i, j, k) += factorBack; - if (vState(i, j, k - 1) == isFluid) { - vAminusk(i, j, k) += -factorBack; - } - else if (vState(i, j, k - 1) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j, k - 1).y * factorBack; - } - - // u_y_right - if (uState(i + 1, j, k) == isFluid) { - vAhelper1(i, j, k) += -factorRight; - } - else if (uState(i + 1, j, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight; - } - - if (uState(i + 1, j - 1, k) == isFluid) { - vAhelper2(i, j, k) += factorRight; - } - else if (uState(i + 1, j - 1, k) == isObstacle) { - vRhs(i, j, k) -= vel(i + 1, j - 1, k).x * factorRight; - } - - // u_y_left - if (uState(i, j, k) == isFluid) { - vAhelper3(i, j, k) += factorLeft; - } - else if (uState(i, j, k) == isObstacle) { - vRhs(i, j, k) -= vel(i, j, k).x * factorLeft; - } - - if (uState(i, j - 1, k) == isFluid) { - vAhelper4(i, j, k) += -factorLeft; - } - else if (uState(i, j - 1, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j - 1, k).x * factorLeft; - } - - // w_y_front - if (wState(i, j, k + 1) == isFluid) { - vAhelper5(i, j, k) += -factorFront; - } - else if (wState(i, j, k + 1) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront; - } - - if (wState(i, j - 1, k + 1) == isFluid) { - vAhelper6(i, j, k) += factorFront; - } - else if (wState(i, j - 1, k + 1) == isObstacle) { - vRhs(i, j, k) -= vel(i, j - 1, k + 1).z * factorFront; - } - - // w_y_back - if (wState(i, j, k) == isFluid) { - vAhelper7(i, j, k) += factorBack; - } - else if (wState(i, j, k) == isObstacle) { - vRhs(i, j, k) -= vel(i, j, k).z * factorBack; - } - - if (wState(i, j - 1, k) == isFluid) { - vAhelper8(i, j, k) += -factorBack; - } - else if (wState(i, j - 1, k) == isObstacle) { - vRhs(i, j, k) -= -vel(i, j - 1, k).z * factorBack; - } - } - - // W-terms: wxx+ wyy+ 2wzz + u_zx + v_zy - if (wState(i, j, k) == isFluid) { - - wRhs(i, j, k) = wVolLiquid(i, j, k) * vel(i, j, k).z; - wA0(i, j, k) = wVolLiquid(i, j, k); - - Real viscRight = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) + - viscosity(i + 1, j, k) + viscosity(i + 1, j, k - 1)); - Real viscLeft = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) + - viscosity(i - 1, j, k) + viscosity(i - 1, j, k - 1)); - Real volRight = eyVolLiquid(i + 1, j, k); - Real volLeft = eyVolLiquid(i, j, k); - - Real viscTop = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) + viscosity(i, j + 1, k) + - viscosity(i, j + 1, k - 1)); - Real viscBottom = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) + - viscosity(i, j - 1, k) + viscosity(i, j - 1, k - 1)); - Real volTop = exVolLiquid(i, j + 1, k); - Real volBottom = exVolLiquid(i, j, k); - - Real viscFront = viscosity(i, j, k); - Real viscBack = viscosity(i, j, k - 1); - Real volFront = cVolLiquid(i, j, k); - Real volBack = cVolLiquid(i, j, k - 1); - - Real factorRight = factor * viscRight * volRight; - Real factorLeft = factor * viscLeft * volLeft; - Real factorTop = factor * viscTop * volTop; - Real factorBottom = factor * viscBottom * volBottom; - Real factorFront = 2 * factor * viscFront * volFront; - Real factorBack = 2 * factor * viscBack * volBack; - - // w_x_right - wA0(i, j, k) += factorRight; - if (wState(i + 1, j, k) == isFluid) { - wAplusi(i, j, k) += -factorRight; - } - else if (wState(i + 1, j, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i + 1, j, k).z * factorRight; - } - - // w_x_left - wA0(i, j, k) += factorLeft; - if (wState(i - 1, j, k) == isFluid) { - wAminusi(i, j, k) += -factorLeft; - } - else if (wState(i - 1, j, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i - 1, j, k).z * factorLeft; - } - - // w_y_top - wA0(i, j, k) += factorTop; - if (wState(i, j + 1, k) == isFluid) { - wAplusj(i, j, k) += -factorTop; - } - else if (wState(i, j + 1, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j + 1, k).z * factorTop; - } - - // w_y_bottom - wA0(i, j, k) += factorBottom; - if (wState(i, j - 1, k) == isFluid) { - wAminusj(i, j, k) += -factorBottom; - } - else if (wState(i, j - 1, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j - 1, k).z * factorBottom; - } - - // w_z_front - wA0(i, j, k) += factorFront; - if (wState(i, j, k + 1) == isFluid) { - wAplusk(i, j, k) += -factorFront; - } - else if (wState(i, j, k + 1) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront; - } - - // w_z_back - wA0(i, j, k) += factorBack; - if (wState(i, j, k - 1) == isFluid) { - wAminusk(i, j, k) += -factorBack; - } - else if (wState(i, j, k - 1) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j, k - 1).z * factorBack; - } - - // u_z_right - if (uState(i + 1, j, k) == isFluid) { - wAhelper1(i, j, k) += -factorRight; - } - else if (uState(i + 1, j, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight; - } - - if (uState(i + 1, j, k - 1) == isFluid) { - wAhelper2(i, j, k) += factorRight; - } - else if (uState(i + 1, j, k - 1) == isObstacle) { - wRhs(i, j, k) -= vel(i + 1, j, k - 1).x * factorRight; - } - - // u_z_left - if (uState(i, j, k) == isFluid) { - wAhelper3(i, j, k) += factorLeft; - } - else if (uState(i, j, k) == isObstacle) { - wRhs(i, j, k) -= vel(i, j, k).x * factorLeft; - } - - if (uState(i, j, k - 1) == isFluid) { - wAhelper4(i, j, k) += -factorLeft; - } - else if (uState(i, j, k - 1) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j, k - 1).x * factorLeft; - } - - // v_z_top - if (vState(i, j + 1, k) == isFluid) { - wAhelper5(i, j, k) += -factorTop; - } - else if (vState(i, j + 1, k) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop; - } - - if (vState(i, j + 1, k - 1) == isFluid) { - wAhelper6(i, j, k) += factorTop; - } - else if (vState(i, j + 1, k - 1) == isObstacle) { - wRhs(i, j, k) -= vel(i, j + 1, k - 1).y * factorTop; - } - - // v_z_bottom - if (vState(i, j, k) == isFluid) { - wAhelper7(i, j, k) += factorBottom; - } - else if (vState(i, j, k) == isObstacle) { - wRhs(i, j, k) -= vel(i, j, k).y * factorBottom; - } - - if (vState(i, j, k - 1) == isFluid) { - wAhelper8(i, j, k) += -factorBottom; - } - else if (vState(i, j, k - 1) == isObstacle) { - wRhs(i, j, k) -= -vel(i, j, k - 1).y * factorBottom; - } - } - } - - // CG solver for U - if (flags.is3D()) { - vector<Grid<Real> *> uMatA{&uA0, - &uAplusi, - &uAplusj, - &uAplusk, - &uAminusi, - &uAminusj, - &uAminusk, - &uAhelper1, - &uAhelper2, - &uAhelper3, - &uAhelper4, - &uAhelper5, - &uAhelper6, - &uAhelper7, - &uAhelper8}; - vector<Grid<Real> *> uVecRhs{&vRhs, &wRhs}; - uGcg = new GridCg<ApplyMatrixViscosityU>( - uSolution, uRhs, uResidual, uSearch, flags, uTmp, uMatA, uVecRhs); - } - else { - errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver"); - } - - // CG solver for V - if (flags.is3D()) { - vector<Grid<Real> *> vMatA{&vA0, - &vAplusi, - &vAplusj, - &vAplusk, - &vAminusi, - &vAminusj, - &vAminusk, - &vAhelper1, - &vAhelper2, - &vAhelper3, - &vAhelper4, - &vAhelper5, - &vAhelper6, - &vAhelper7, - &vAhelper8}; - vector<Grid<Real> *> vVecRhs{&uRhs, &wRhs}; - vGcg = new GridCg<ApplyMatrixViscosityV>( - vSolution, vRhs, vResidual, vSearch, flags, vTmp, vMatA, vVecRhs); - } - else { - errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver"); - } - - // CG solver for W - if (flags.is3D()) { - vector<Grid<Real> *> wMatA{&wA0, - &wAplusi, - &wAplusj, - &wAplusk, - &wAminusi, - &wAminusj, - &wAminusk, - &wAhelper1, - &wAhelper2, - &wAhelper3, - &wAhelper4, - &wAhelper5, - &wAhelper6, - &wAhelper7, - &wAhelper8}; - vector<Grid<Real> *> wVecRhs{&uRhs, &vRhs}; - wGcg = new GridCg<ApplyMatrixViscosityW>( - wSolution, wRhs, wResidual, wSearch, flags, wTmp, wMatA, wVecRhs); - } - else { - errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver"); - } - - // Same accuracy for all dimensions - uGcg->setAccuracy(cgAccuracy); - vGcg->setAccuracy(cgAccuracy); - wGcg->setAccuracy(cgAccuracy); - - // CG solve. Preconditioning not supported yet. Instead, U, V, W can optionally be solved in - // parallel. - for (int uIter = 0, vIter = 0, wIter = 0; uIter < maxIter || vIter < maxIter || wIter < maxIter; - uIter++, vIter++, wIter++) { -#if ENABLE_PARALLEL == 1 - parallel_block do_parallel -#endif - if (uIter < maxIter && !uGcg->iterate()) uIter = maxIter; -#if ENABLE_PARALLEL == 1 - do_end do_parallel -#endif - if (vIter < maxIter && !vGcg->iterate()) vIter = maxIter; -#if ENABLE_PARALLEL == 1 - do_end do_parallel -#endif - if (wIter < maxIter && !wGcg->iterate()) wIter = maxIter; -#if ENABLE_PARALLEL == 1 - do_end block_end -#endif - - // Make sure that next CG iteration has updated rhs grids - uRhs.copyFrom(uSearch); - vRhs.copyFrom(vSearch); - wRhs.copyFrom(wSearch); - } - debMsg( - "Viscosity: solveViscosity() done. " - "Iterations (u,v,w): (" - << uGcg->getIterations() << "," << vGcg->getIterations() << "," << wGcg->getIterations() - << "), " - "Residual (u,v,w): (" - << uGcg->getResNorm() << "," << vGcg->getResNorm() << "," << wGcg->getResNorm() << ")", - 2); - - delete uGcg; - delete vGcg; - delete wGcg; - - // Apply solutions to global velocity grid - KnApplyVelocities(vel, uState, vState, wState, uSolution, vSolution, wSolution); -} - -//! To use the viscosity plugin, scenes must call this function before solving pressure. -//! Note that the 'volumes' grid uses 2x the base resolution - -void applyViscosity(const FlagGrid &flags, - const Grid<Real> &phi, - MACGrid &vel, - Grid<Real> &volumes, - Grid<Real> &viscosity, - const Real cgAccuracy = 1e-9, - const Real cgMaxIterFac = 1.5) -{ - const Real dx = flags.getParent()->getDx(); - const Real dt = flags.getParent()->getDt(); - - // Reserve temp grids for volume weight calculation - FluidSolver *parent = flags.getParent(); - Grid<Real> cVolLiquid(parent); - Grid<Real> uVolLiquid(parent); - Grid<Real> vVolLiquid(parent); - Grid<Real> wVolLiquid(parent); - Grid<Real> exVolLiquid(parent); - Grid<Real> eyVolLiquid(parent); - Grid<Real> ezVolLiquid(parent); - - // Ensure final weight grid gets cleared at every step - volumes.clear(); - - // Save viscous fluid volume in double-sized volumes grid - computeWeights(phi, - volumes, - cVolLiquid, - uVolLiquid, - vVolLiquid, - wVolLiquid, - exVolLiquid, - eyVolLiquid, - ezVolLiquid, - dx); - - // Set up A matrix and rhs. Solve with CG. Update velocity grid. - solveViscosity(flags, - vel, - cVolLiquid, - uVolLiquid, - vVolLiquid, - wVolLiquid, - exVolLiquid, - eyVolLiquid, - ezVolLiquid, - viscosity, - dt, - dx, - cgAccuracy, - cgMaxIterFac); -} -static PyObject *_W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds) -{ - try { - PbArgs _args(_linargs, _kwds); - FluidSolver *parent = _args.obtainParent(); - bool noTiming = _args.getOpt<bool>("notiming", -1, 0); - pbPreparePlugin(parent, "applyViscosity", !noTiming); - PyObject *_retval = nullptr; - { - ArgLocker _lock; - const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 0, &_lock); - const Grid<Real> &phi = *_args.getPtr<Grid<Real>>("phi", 1, &_lock); - MACGrid &vel = *_args.getPtr<MACGrid>("vel", 2, &_lock); - Grid<Real> &volumes = *_args.getPtr<Grid<Real>>("volumes", 3, &_lock); - Grid<Real> &viscosity = *_args.getPtr<Grid<Real>>("viscosity", 4, &_lock); - const Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 5, 1e-9, &_lock); - const Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 6, 1.5, &_lock); - _retval = getPyNone(); - applyViscosity(flags, phi, vel, volumes, viscosity, cgAccuracy, cgMaxIterFac); - _args.check(); - } - pbFinalizePlugin(parent, "applyViscosity", !noTiming); - return _retval; - } - catch (std::exception &e) { - pbSetError("applyViscosity", e.what()); - return 0; - } -} -static const Pb::Register _RP_applyViscosity("", "applyViscosity", _W_0); -extern "C" { -void PbRegister_applyViscosity() -{ - KEEP_UNUSED(_RP_applyViscosity); -} -} - -} // namespace Manta - -#if ENABLE_PARALLEL == 1 - -# undef parallel_block -# undef do_parallel -# undef do_end -# undef block_end -# undef parallel_for -# undef parallel_end - -#endif diff --git a/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp b/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp index 18222c4ccda..6386e835447 100644 --- a/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp +++ b/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp @@ -188,8 +188,7 @@ struct KnAcceleration : public KernelBase { runMessage(); run(); } - inline void op( - IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt) const + inline void op(IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt) { a[idx] = (v1[idx] - v0[idx]) * idt; } @@ -213,21 +212,17 @@ struct KnAcceleration : public KernelBase { return idt; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel KnAcceleration ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, a, v1, v0, idt); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, a, v1, v0, idt); + } } MACGrid &a; const MACGrid &v1; @@ -576,18 +571,17 @@ void VICintegration(VortexSheetMesh &mesh, // prepare CG solver const int maxIter = (int)(cgMaxIterFac * vel.getSize().max()); - vector<Grid<Real> *> matA{&A0, &Ai, &Aj, &Ak}; - GridCgInterface *gcg = new GridCg<ApplyMatrix>( - solution, rhs, residual, search, flags, temp1, matA); + solution, rhs, residual, search, flags, temp1, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); gcg->setUseL2Norm(true); gcg->setICPreconditioner( (GridCgInterface::PreconditionType)precondition, &pca0, &pca1, &pca2, &pca3); // iterations + Real time = 0; for (int iter = 0; iter < maxIter; iter++) { - if (!gcg->iterate()) + if (!gcg->iterate(time)) iter = maxIter; } debMsg("VICintegration CG iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), diff --git a/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp b/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp index 7d867542132..20e6a098d72 100644 --- a/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp +++ b/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp @@ -170,7 +170,7 @@ struct KnInterpolateMACGrid : public KernelBase { const MACGrid &source, const Vec3 &sourceFactor, const Vec3 &off, - int orderSpace) const + int orderSpace) { Vec3 pos = Vec3(i, j, k) * sourceFactor + off; @@ -207,37 +207,35 @@ struct KnInterpolateMACGrid : public KernelBase { return orderSpace; } typedef int type4; - void runMessage() - { - debMsg("Executing kernel KnInterpolateMACGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, source, sourceFactor, off, orderSpace); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, source, sourceFactor, off, orderSpace); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, target, source, sourceFactor, off, orderSpace); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, target, source, sourceFactor, off, orderSpace); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid ⌖ const MACGrid &source; const Vec3 &sourceFactor; @@ -319,7 +317,7 @@ struct knApplySimpleNoiseVec3 : public KernelBase { Grid<Vec3> &target, const WaveletNoiseField &noise, Real scale, - const Grid<Real> *weight) const + const Grid<Real> *weight) { if (!flags.isFluid(i, j, k)) return; @@ -353,37 +351,35 @@ struct knApplySimpleNoiseVec3 : public KernelBase { return weight; } typedef Grid<Real> type4; - void runMessage() - { - debMsg("Executing kernel knApplySimpleNoiseVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, noise, scale, weight); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, noise, scale, weight); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, noise, scale, weight); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, noise, scale, weight); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Vec3> ⌖ const WaveletNoiseField &noise; @@ -461,7 +457,7 @@ struct knApplySimpleNoiseReal : public KernelBase { Grid<Real> &target, const WaveletNoiseField &noise, Real scale, - const Grid<Real> *weight) const + const Grid<Real> *weight) { if (!flags.isFluid(i, j, k)) return; @@ -495,37 +491,35 @@ struct knApplySimpleNoiseReal : public KernelBase { return weight; } typedef Grid<Real> type4; - void runMessage() - { - debMsg("Executing kernel knApplySimpleNoiseReal ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, noise, scale, weight); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, noise, scale, weight); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, target, noise, scale, weight); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, target, noise, scale, weight); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Real> ⌖ const WaveletNoiseField &noise; @@ -615,7 +609,7 @@ struct knApplyNoiseVec3 : public KernelBase { const Grid<Real> *weight, const Grid<Vec3> *uv, bool uvInterpol, - const Vec3 &sourceFactor) const + const Vec3 &sourceFactor) { if (!flags.isFluid(i, j, k)) return; @@ -694,19 +688,40 @@ struct knApplyNoiseVec3 : public KernelBase { return sourceFactor; } typedef Vec3 type8; - void runMessage() - { - debMsg("Executing kernel knApplyNoiseVec3 ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, + j, + k, + flags, + target, + noise, + scale, + scaleSpatial, + weight, + uv, + uvInterpol, + sourceFactor); + } + } + else { + const int k = 0; +#pragma omp parallel + { + +#pragma omp for for (int j = 0; j < _maxY; j++) for (int i = 0; i < _maxX; i++) op(i, @@ -721,32 +736,9 @@ struct knApplyNoiseVec3 : public KernelBase { uv, uvInterpol, sourceFactor); - } - else { - const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, - j, - k, - flags, - target, - noise, - scale, - scaleSpatial, - weight, - uv, - uvInterpol, - sourceFactor); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; Grid<Vec3> ⌖ const WaveletNoiseField &noise; @@ -834,7 +826,7 @@ struct KnApplyComputeEnergy : public KernelBase { run(); } inline void op( - int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy) const + int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy) { Real e = 0.f; if (flags.isFluid(i, j, k)) { @@ -858,37 +850,35 @@ struct KnApplyComputeEnergy : public KernelBase { return energy; } typedef Grid<Real> type2; - void runMessage() - { - debMsg("Executing kernel KnApplyComputeEnergy ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, energy); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, energy); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, flags, vel, energy); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, flags, vel, energy); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const FlagGrid &flags; const MACGrid &vel; Grid<Real> &energy; @@ -1019,7 +1009,7 @@ struct KnComputeStrainRateMag : public KernelBase { run(); } inline void op( - int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod) const + int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod) { // compute Sij = 1/2 * (dU_i/dx_j + dU_j/dx_i) Vec3 diag = Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.) - vel(i, j, k); @@ -1056,37 +1046,35 @@ struct KnComputeStrainRateMag : public KernelBase { return prod; } typedef Grid<Real> type2; - void runMessage() - { - debMsg("Executing kernel KnComputeStrainRateMag ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, velCenter, prod); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, velCenter, prod); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, vel, velCenter, prod); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, vel, velCenter, prod); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const MACGrid &vel; const Grid<Vec3> &velCenter; Grid<Real> ∏ diff --git a/extern/mantaflow/preprocessed/plugin/waves.cpp b/extern/mantaflow/preprocessed/plugin/waves.cpp index 53c56b8c506..8becb5f6341 100644 --- a/extern/mantaflow/preprocessed/plugin/waves.cpp +++ b/extern/mantaflow/preprocessed/plugin/waves.cpp @@ -38,7 +38,7 @@ struct knCalcSecDeriv2d : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret) const + inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret) { ret(i, j, k) = (-4. * v(i, j, k) + v(i - 1, j, k) + v(i + 1, j, k) + v(i, j - 1, k) + v(i, j + 1, k)); @@ -53,37 +53,35 @@ struct knCalcSecDeriv2d : public KernelBase { return ret; } typedef Grid<Real> type1; - void runMessage() - { - debMsg("Executing kernel knCalcSecDeriv2d ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, v, ret); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, v, ret); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, v, ret); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, v, ret); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const Grid<Real> &v; Grid<Real> &ret; }; @@ -151,44 +149,43 @@ struct knTotalSum : public KernelBase { return h; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel knTotalSum ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, h, sum); + +#pragma omp parallel + { + double sum = 0; +#pragma omp for nowait + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, h, sum); +#pragma omp critical + { + this->sum += sum; + } + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, h, sum); +#pragma omp parallel + { + double sum = 0; +#pragma omp for nowait + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, h, sum); +#pragma omp critical + { + this->sum += sum; + } + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this); - } - knTotalSum(knTotalSum &o, tbb::split) : KernelBase(o), h(o.h), sum(0) - { - } - void join(const knTotalSum &o) - { - sum += o.sum; - } Grid<Real> &h; double sum; }; @@ -296,7 +293,7 @@ struct MakeRhsWE : public KernelBase { const Grid<Real> &ut, const Grid<Real> &utm1, Real s, - bool crankNic = false) const + bool crankNic = false) { rhs(i, j, k) = (2. * ut(i, j, k) - utm1(i, j, k)); if (crankNic) { @@ -334,37 +331,35 @@ struct MakeRhsWE : public KernelBase { return crankNic; } typedef bool type5; - void runMessage() - { - debMsg("Executing kernel MakeRhsWE ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 1; j < _maxY; j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, rhs, ut, utm1, s, crankNic); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, rhs, ut, utm1, s, crankNic); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 1; i < _maxX; i++) - op(i, j, k, flags, rhs, ut, utm1, s, crankNic); +#pragma omp parallel + { + +#pragma omp for + for (int j = 1; j < _maxY; j++) + for (int i = 1; i < _maxX; i++) + op(i, j, k, flags, rhs, ut, utm1, s, crankNic); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this); - } const FlagGrid &flags; Grid<Real> &rhs; const Grid<Real> &ut; @@ -423,21 +418,17 @@ void cgSolveWE(const FlagGrid &flags, const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); GridCgInterface *gcg; - vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; - - if (flags.is3D()) { - matA.push_back(&Ak); - gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, matA); - } - else { - gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, matA); - } + if (flags.is3D()) + gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); + else + gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak); gcg->setAccuracy(cgAccuracy); // no preconditioning for now... + Real time = 0; for (int iter = 0; iter < maxIter; iter++) { - if (!gcg->iterate()) + if (!gcg->iterate(time)) iter = maxIter; } debMsg("cgSolveWaveEq iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), 1); diff --git a/extern/mantaflow/preprocessed/registration.cpp b/extern/mantaflow/preprocessed/registration.cpp index fd32463b95f..dfbd4074d23 100644 --- a/extern/mantaflow/preprocessed/registration.cpp +++ b/extern/mantaflow/preprocessed/registration.cpp @@ -89,7 +89,6 @@ extern void PbRegister_processBurn(); extern void PbRegister_updateFlame(); extern void PbRegister_getSpiralVelocity(); extern void PbRegister_setGradientYWeight(); -extern void PbRegister_PD_fluid_guiding(); extern void PbRegister_releaseBlurPrecomp(); extern void PbRegister_KEpsilonComputeProduction(); extern void PbRegister_KEpsilonSources(); @@ -145,7 +144,6 @@ extern void PbRegister_flipComputeSurfaceNormals(); extern void PbRegister_flipUpdateNeighborRatio(); extern void PbRegister_particleSurfaceTurbulence(); extern void PbRegister_debugCheckParts(); -extern void PbRegister_applyViscosity(); extern void PbRegister_markAsFixed(); extern void PbRegister_texcoordInflow(); extern void PbRegister_meshSmokeInflow(); @@ -287,7 +285,6 @@ void MantaEnsureRegistration() PbRegister_updateFlame(); PbRegister_getSpiralVelocity(); PbRegister_setGradientYWeight(); - PbRegister_PD_fluid_guiding(); PbRegister_releaseBlurPrecomp(); PbRegister_KEpsilonComputeProduction(); PbRegister_KEpsilonSources(); @@ -343,7 +340,6 @@ void MantaEnsureRegistration() PbRegister_flipUpdateNeighborRatio(); PbRegister_particleSurfaceTurbulence(); PbRegister_debugCheckParts(); - PbRegister_applyViscosity(); PbRegister_markAsFixed(); PbRegister_texcoordInflow(); PbRegister_meshSmokeInflow(); diff --git a/extern/mantaflow/preprocessed/shapes.cpp b/extern/mantaflow/preprocessed/shapes.cpp index 4095758cbc0..546d39a7ed9 100644 --- a/extern/mantaflow/preprocessed/shapes.cpp +++ b/extern/mantaflow/preprocessed/shapes.cpp @@ -52,8 +52,7 @@ template<class T> struct ApplyShapeToGrid : public KernelBase { runMessage(); run(); } - inline void op( - int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags) const + inline void op(int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags) { if (respectFlags && respectFlags->isObstacle(i, j, k)) return; @@ -80,37 +79,35 @@ template<class T> struct ApplyShapeToGrid : public KernelBase { return respectFlags; } typedef FlagGrid type3; - void runMessage() - { - debMsg("Executing kernel ApplyShapeToGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, shape, value, respectFlags); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, shape, value, respectFlags); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, shape, value, respectFlags); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, shape, value, respectFlags); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> *grid; Shape *shape; T value; @@ -141,7 +138,7 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase { Real sigma, Real shift, T value, - FlagGrid *respectFlags) const + FlagGrid *respectFlags) { if (respectFlags && respectFlags->isObstacle(i, j, k)) return; @@ -181,37 +178,35 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase { return respectFlags; } typedef FlagGrid type5; - void runMessage() - { - debMsg("Executing kernel ApplyShapeToGridSmooth ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, phi, sigma, shift, value, respectFlags); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, phi, sigma, shift, value, respectFlags); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, phi, sigma, shift, value, respectFlags); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, phi, sigma, shift, value, respectFlags); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<T> *grid; Grid<Real> φ Real sigma; @@ -230,7 +225,7 @@ struct ApplyShapeToMACGrid : public KernelBase { run(); } inline void op( - int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags) const + int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags) { if (respectFlags && respectFlags->isObstacle(i, j, k)) return; @@ -261,37 +256,35 @@ struct ApplyShapeToMACGrid : public KernelBase { return respectFlags; } typedef FlagGrid type3; - void runMessage() - { - debMsg("Executing kernel ApplyShapeToMACGrid ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, shape, value, respectFlags); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, shape, value, respectFlags); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, grid, shape, value, respectFlags); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, grid, shape, value, respectFlags); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } MACGrid *grid; Shape *shape; Vec3 value; @@ -429,7 +422,7 @@ struct BoxSDF : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2) const + inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2) { const Vec3 p(i + 0.5, j + 0.5, k + 0.5); if (p.x <= p2.x && p.x >= p1.x && p.y <= p2.y && p.y >= p1.y && p.z <= p2.z && p.z >= p1.z) { @@ -505,37 +498,35 @@ struct BoxSDF : public KernelBase { return p2; } typedef Vec3 type2; - void runMessage() - { - debMsg("Executing kernel BoxSDF ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, p1, p2); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, p1, p2); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, p1, p2); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, p1, p2); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Real> φ const Vec3 &p1; const Vec3 &p2; @@ -647,7 +638,7 @@ struct SphereSDF : public KernelBase { runMessage(); run(); } - inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale) const + inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale) { phi(i, j, k) = norm((Vec3(i + 0.5, j + 0.5, k + 0.5) - center) / scale) - radius; } @@ -671,37 +662,35 @@ struct SphereSDF : public KernelBase { return scale; } typedef Vec3 type3; - void runMessage() - { - debMsg("Executing kernel SphereSDF ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, center, radius, scale); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, center, radius, scale); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, center, radius, scale); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, center, radius, scale); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Real> φ Vec3 center; Real radius; @@ -774,7 +763,7 @@ struct CylinderSDF : public KernelBase { run(); } inline void op( - int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz) const + int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz) { Vec3 p = Vec3(i + 0.5, j + 0.5, k + 0.5) - center; Real z = fabs(dot(p, zaxis)); @@ -820,37 +809,35 @@ struct CylinderSDF : public KernelBase { return maxz; } typedef Real type4; - void runMessage() - { - debMsg("Executing kernel CylinderSDF ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, center, radius, zaxis, maxz); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, center, radius, zaxis, maxz); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, phi, center, radius, zaxis, maxz); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, phi, center, radius, zaxis, maxz); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } Grid<Real> φ Vec3 center; Real radius; @@ -920,13 +907,8 @@ struct SlopeSDF : public KernelBase { runMessage(); run(); } - inline void op(int i, - int j, - int k, - const Vec3 &n, - Grid<Real> &phiObs, - const Real &fac, - const Real &origin) const + inline void op( + int i, int j, int k, const Vec3 &n, Grid<Real> &phiObs, const Real &fac, const Real &origin) { phiObs(i, j, k) = (n.x * (double)i + n.y * (double)j + n.z * (double)k - origin) * fac; @@ -951,37 +933,35 @@ struct SlopeSDF : public KernelBase { return origin; } typedef Real type3; - void runMessage() - { - debMsg("Executing kernel SlopeSDF ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const + void runMessage(){}; + void run() { const int _maxX = maxX; const int _maxY = maxY; if (maxZ > 1) { - for (int k = __r.begin(); k != (int)__r.end(); k++) - for (int j = 0; j < _maxY; j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, n, phiObs, fac, origin); + +#pragma omp parallel + { + +#pragma omp for + for (int k = minZ; k < maxZ; k++) + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, n, phiObs, fac, origin); + } } else { const int k = 0; - for (int j = __r.begin(); j != (int)__r.end(); j++) - for (int i = 0; i < _maxX; i++) - op(i, j, k, n, phiObs, fac, origin); +#pragma omp parallel + { + +#pragma omp for + for (int j = 0; j < _maxY; j++) + for (int i = 0; i < _maxX; i++) + op(i, j, k, n, phiObs, fac, origin); + } } } - void run() - { - if (maxZ > 1) - tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this); - else - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this); - } const Vec3 &n; Grid<Real> &phiObs; const Real &fac; diff --git a/extern/mantaflow/preprocessed/shapes.h b/extern/mantaflow/preprocessed/shapes.h index 5a400eaed09..fa645389bfe 100644 --- a/extern/mantaflow/preprocessed/shapes.h +++ b/extern/mantaflow/preprocessed/shapes.h @@ -269,7 +269,6 @@ class Shape : public PbClass { protected: GridType mType; - public: PbArgs _args; } @@ -320,7 +319,6 @@ class NullShape : public Shape { { gridSetConst<Real>(phi, 1000.0f); } - public: PbArgs _args; } @@ -396,7 +394,6 @@ class Box : public Shape { protected: Vec3 mP0, mP1; - public: PbArgs _args; } @@ -458,7 +455,6 @@ class Sphere : public Shape { protected: Vec3 mCenter, mScale; Real mRadius; - public: PbArgs _args; } @@ -583,7 +579,6 @@ class Cylinder : public Shape { protected: Vec3 mCenter, mZDir; Real mRadius, mZ; - public: PbArgs _args; } @@ -660,7 +655,6 @@ class Slope : public Shape { Real mAnglexy, mAngleyz; Real mOrigin; Vec3 mGs; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/test.cpp b/extern/mantaflow/preprocessed/test.cpp index b90c886efe7..ae9b533a7ba 100644 --- a/extern/mantaflow/preprocessed/test.cpp +++ b/extern/mantaflow/preprocessed/test.cpp @@ -50,28 +50,21 @@ struct reductionTest : public KernelBase { return v; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel reductionTest ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, v, sum); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - reductionTest(reductionTest &o, tbb::split) : KernelBase(o), v(o.v), sum(0) - { - } - void join(const reductionTest &o) - { - sum += o.sum; + const IndexInt _sz = size; +#pragma omp parallel + { + double sum = 0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, v, sum); +#pragma omp critical + { + this->sum += sum; + } + } } const Grid<Real> &v; double sum; @@ -101,28 +94,21 @@ struct minReduction : public KernelBase { return v; } typedef Grid<Real> type0; - void runMessage() - { - debMsg("Executing kernel minReduction ", 3); - debMsg("Kernel range" - << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, v, sum); - } + void runMessage(){}; void run() { - tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); - } - minReduction(minReduction &o, tbb::split) : KernelBase(o), v(o.v), sum(0) - { - } - void join(const minReduction &o) - { - sum = min(sum, o.sum); + const IndexInt _sz = size; +#pragma omp parallel + { + double sum = 0; +#pragma omp for nowait + for (IndexInt i = 0; i < _sz; i++) + op(i, v, sum); +#pragma omp critical + { + this->sum = min(sum, this->sum); + } + } } const Grid<Real> &v; double sum; diff --git a/extern/mantaflow/preprocessed/turbulencepart.cpp b/extern/mantaflow/preprocessed/turbulencepart.cpp index 168ae9cc2f2..0dddf8f9cc6 100644 --- a/extern/mantaflow/preprocessed/turbulencepart.cpp +++ b/extern/mantaflow/preprocessed/turbulencepart.cpp @@ -136,7 +136,7 @@ struct KnSynthesizeTurbulence : public KernelBase { int octaves, Real scale, Real invL0, - Real kmin) const + Real kmin) { const Real PERSISTENCE = 0.56123f; @@ -217,21 +217,17 @@ struct KnSynthesizeTurbulence : public KernelBase { return kmin; } typedef Real type9; - void runMessage() - { - debMsg("Executing kernel KnSynthesizeTurbulence ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin); - } + void runMessage(){}; void run() { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin); + } } TurbulenceParticleSystem &p; FlagGrid &flags; diff --git a/extern/mantaflow/preprocessed/turbulencepart.h b/extern/mantaflow/preprocessed/turbulencepart.h index 5177aeb2d96..81c94d77722 100644 --- a/extern/mantaflow/preprocessed/turbulencepart.h +++ b/extern/mantaflow/preprocessed/turbulencepart.h @@ -199,7 +199,6 @@ class TurbulenceParticleSystem : public ParticleSystem<TurbulenceParticleData> { private: WaveletNoiseField &noise; - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/vortexpart.cpp b/extern/mantaflow/preprocessed/vortexpart.cpp index 0eba2743ee8..db22ff85208 100644 --- a/extern/mantaflow/preprocessed/vortexpart.cpp +++ b/extern/mantaflow/preprocessed/vortexpart.cpp @@ -60,56 +60,24 @@ inline Vec3 VortexKernel(const Vec3 &p, const vector<VortexParticleData> &vp, Re return u; } -struct _KnVpAdvectMesh : public KernelBase { - _KnVpAdvectMesh(const KernelBase &base, - vector<Node> &nodes, - const vector<VortexParticleData> &vp, - Real scale, - vector<Vec3> &u) - : KernelBase(base), nodes(nodes), vp(vp), scale(scale), u(u) +struct KnVpAdvectMesh : public KernelBase { + KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale) + : KernelBase(nodes.size()), nodes(nodes), vp(vp), scale(scale), u((size)) { + runMessage(); + run(); } inline void op(IndexInt idx, vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale, - vector<Vec3> &u) const + vector<Vec3> &u) { if (nodes[idx].flags & Mesh::NfFixed) u[idx] = 0.0; else u[idx] = VortexKernel(nodes[idx].pos, vp, scale); } - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, nodes, vp, scale, u); - } - void run() - { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); - } - vector<Node> &nodes; - const vector<VortexParticleData> &vp; - Real scale; - vector<Vec3> &u; -}; -struct KnVpAdvectMesh : public KernelBase { - KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale) - : KernelBase(nodes.size()), - _inner(KernelBase(nodes.size()), nodes, vp, scale, u), - nodes(nodes), - vp(vp), - scale(scale), - u((size)) - { - runMessage(); - run(); - } - void run() - { - _inner.run(); - } inline operator vector<Vec3>() { return u; @@ -133,62 +101,37 @@ struct KnVpAdvectMesh : public KernelBase { return scale; } typedef Real type2; - void runMessage() + void runMessage(){}; + void run() { - debMsg("Executing kernel KnVpAdvectMesh ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - _KnVpAdvectMesh _inner; + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, nodes, vp, scale, u); + } + } vector<Node> &nodes; const vector<VortexParticleData> &vp; Real scale; vector<Vec3> u; }; -struct _KnVpAdvectSelf : public KernelBase { - _KnVpAdvectSelf(const KernelBase &base, - vector<VortexParticleData> &vp, - Real scale, - vector<Vec3> &u) - : KernelBase(base), vp(vp), scale(scale), u(u) - { - } - inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u) const - { - if (vp[idx].flag & ParticleBase::PDELETE) - u[idx] = 0.0; - else - u[idx] = VortexKernel(vp[idx].pos, vp, scale); - } - void operator()(const tbb::blocked_range<IndexInt> &__r) const - { - for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++) - op(idx, vp, scale, u); - } - void run() - { - tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); - } - vector<VortexParticleData> &vp; - Real scale; - vector<Vec3> &u; -}; struct KnVpAdvectSelf : public KernelBase { KnVpAdvectSelf(vector<VortexParticleData> &vp, Real scale) - : KernelBase(vp.size()), - _inner(KernelBase(vp.size()), vp, scale, u), - vp(vp), - scale(scale), - u((size)) + : KernelBase(vp.size()), vp(vp), scale(scale), u((size)) { runMessage(); run(); } - void run() + inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u) { - _inner.run(); + if (vp[idx].flag & ParticleBase::PDELETE) + u[idx] = 0.0; + else + u[idx] = VortexKernel(vp[idx].pos, vp, scale); } inline operator vector<Vec3>() { @@ -208,14 +151,18 @@ struct KnVpAdvectSelf : public KernelBase { return scale; } typedef Real type1; - void runMessage() + void runMessage(){}; + void run() { - debMsg("Executing kernel KnVpAdvectSelf ", 3); - debMsg("Kernel range" - << " size " << size << " ", - 4); - }; - _KnVpAdvectSelf _inner; + const IndexInt _sz = size; +#pragma omp parallel + { + +#pragma omp for + for (IndexInt i = 0; i < _sz; i++) + op(i, vp, scale, u); + } + } vector<VortexParticleData> &vp; Real scale; vector<Vec3> u; diff --git a/extern/mantaflow/preprocessed/vortexpart.h b/extern/mantaflow/preprocessed/vortexpart.h index 8f80cf910eb..e48fbc7f507 100644 --- a/extern/mantaflow/preprocessed/vortexpart.h +++ b/extern/mantaflow/preprocessed/vortexpart.h @@ -127,7 +127,6 @@ class VortexParticleSystem : public ParticleSystem<VortexParticleData> { } virtual ParticleBase *clone(); - public: PbArgs _args; } diff --git a/extern/mantaflow/preprocessed/vortexsheet.h b/extern/mantaflow/preprocessed/vortexsheet.h index 0fc0f3a1258..01c32e4e806 100644 --- a/extern/mantaflow/preprocessed/vortexsheet.h +++ b/extern/mantaflow/preprocessed/vortexsheet.h @@ -240,7 +240,6 @@ class VortexSheetMesh : public Mesh { VorticityChannel mVorticity; TexCoord3Channel mTex1, mTex2; TurbulenceChannel mTurb; - public: PbArgs _args; } diff --git a/intern/mantaflow/intern/MANTA_main.cpp b/intern/mantaflow/intern/MANTA_main.cpp index 6c8e45ceeb4..0959e017352 100644 --- a/intern/mantaflow/intern/MANTA_main.cpp +++ b/intern/mantaflow/intern/MANTA_main.cpp @@ -59,8 +59,8 @@ int MANTA::with_debug(0); MANTA::MANTA(int *res, FluidModifierData *fmd) : mCurrentID(++solverID), mMaxRes(fmd->domain->maxres) { - if (with_debug) - cout << "FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2] + //if (with_debug) + cout << "============= FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2] << ")" << endl; FluidDomainSettings *fds = fmd->domain; @@ -279,6 +279,7 @@ MANTA::MANTA(int *res, FluidModifierData *fmd) } /* All requested initializations must not fail in constructor. */ BLI_assert(initSuccess); + UNUSED_VARS(initSuccess); updatePointers(fmd); } diff --git a/intern/mantaflow/intern/strings/fluid_script.h b/intern/mantaflow/intern/strings/fluid_script.h index 3bf8e66c110..eb29b95affb 100644 --- a/intern/mantaflow/intern/strings/fluid_script.h +++ b/intern/mantaflow/intern/strings/fluid_script.h @@ -273,13 +273,32 @@ def fluid_adapt_time_step_$ID$():\n\ const std::string fluid_alloc = "\n\ mantaMsg('Fluid alloc data')\n\ -flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$')\n\ -vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True)\n\ +flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$', offload=True)\n\ +mantaMsg('Fluid alloc data vel')\n\ +vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True, offload=True)\n\ velTmp_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITYTMP$', sparse=True)\n\ x_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_X$')\n\ y_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Y$')\n\ z_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Z$')\n\ -pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$')\n\ +mantaMsg('Fluid alloc data pressure')\n\ +pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$', offload=True)\n\ +mantaMsg('Fluid alloc data rhs')\n\ +rhs_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data A0')\n\ +A0_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data Ai')\n\ +Ai_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data Aj')\n\ +Aj_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data Ak')\n\ +Ak_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data search')\n\ +search_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data residual')\n\ +residual_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data tmp')\n\ +tmp_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\ +mantaMsg('Fluid alloc data 6')\n\ phiObs_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIOBS$')\n\ phiSIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHISIN$') # helper for static flow objects\n\ phiIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIIN$')\n\ @@ -298,7 +317,8 @@ phiOut_s$ID$.setConst(9999)\n\ \n\ # Keep track of important objects in dict to load them later on\n\ fluid_data_dict_final_s$ID$ = { 'vel' : vel_s$ID$ }\n\ -fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n"; +fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n\ +mantaMsg('Fluid alloc DONE')\n"; const std::string fluid_alloc_obstacle = "\n\ @@ -477,7 +497,8 @@ mantaMsg('Delete guiding solver')\n\ if 'sg$ID$' in globals(): del sg$ID$\n\ \n\ # Release unreferenced memory (if there is some left)\n\ -gc.collect()\n"; +gc.collect()\n\ +mantaMsg('Done deleting')\n"; ////////////////////////////////////////////////////////////////////// // BAKE diff --git a/intern/mantaflow/intern/strings/liquid_script.h b/intern/mantaflow/intern/strings/liquid_script.h index c44727bd47e..1773d4869aa 100644 --- a/intern/mantaflow/intern/strings/liquid_script.h +++ b/intern/mantaflow/intern/strings/liquid_script.h @@ -274,9 +274,14 @@ def liquid_step_$ID$():\n\ velTmp_s$ID$.copyFrom(vel_s$ID$)\n\ \n\ mantaMsg('Advecting phi')\n\ + #phi_s$ID$.updateToOmp()\n\ + #vel_s$ID$.updateToOmp()\n\ advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=phi_s$ID$, order=1) # first order is usually enough\n\ + \n\ mantaMsg('Advecting velocity')\n\ advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=vel_s$ID$, order=2)\n\ + #phi_s$ID$.updateFromOmp()\n\ + #vel_s$ID$.updateFromOmp()\n\ \n\ # create level set of particles\n\ gridParticleIndex(parts=pp_s$ID$, flags=flags_s$ID$, indexSys=pindex_s$ID$, index=gpi_s$ID$)\n\ @@ -323,21 +328,45 @@ def liquid_step_$ID$():\n\ getLaplacian(laplacian=curvature_s$ID$, grid=phi_s$ID$)\n\ curvature_s$ID$.clamp(-1.0, 1.0)\n\ \n\ + #vel_s$ID$.updateToOmp()\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ + #vel_s$ID$.updateFromOmp()\n\ + \n\ if using_viscosity_s$ID$:\n\ viscosity_s$ID$.setConst(viscosityValue_s$ID$)\n\ applyViscosity(flags=flags_s$ID$, phi=phi_s$ID$, vel=vel_s$ID$, volumes=volumes_s$ID$, viscosity=viscosity_s$ID$)\n\ \n\ + #vel_s$ID$.updateToOmp()\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ + #vel_s$ID$.updateFromOmp()\n\ + \n\ if using_guiding_s$ID$:\n\ mantaMsg('Guiding and pressure')\n\ PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\ else:\n\ - mantaMsg('Pressure')\n\ - solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\ + print('Pressure')\n\ + # openmp sync to device\n\ + flags_s$ID$.updateToOmp()\n\ + vel_s$ID$.updateToOmp()\n\ + print('Pressure 2')\n\ + \n\ + #solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\ + computePressureRhs(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\ + print('Pressure 21')\n\ + solvePressureSystem(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, useL2Norm=True, preconditioner=PcNone, residual=residual_s$ID$, search=search_s$ID$, A0=A0_s$ID$, Ai=Ai_s$ID$, Aj=Aj_s$ID$, Ak=Ak_s$ID$, tmp=tmp_s$ID$)\n\ + print('Pressure 22')\n\ + correctVelocity(vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\ + \n\ + print('Pressure 3')\n\ + # openmp sync from device\n\ + pressure_s$ID$.updateFromOmp()\n\ + vel_s$ID$.updateFromOmp()\n\ \n\ extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$, distance=4, intoObs=True if using_fractions_s$ID$ else False)\n\ + \n\ + #vel_s$ID$.updateToOmp()\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ + #vel_s$ID$.updateFromOmp()\n\ \n\ if not using_fractions_s$ID$:\n\ extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$)\n\ diff --git a/source/blender/makesdna/DNA_fluid_defaults.h b/source/blender/makesdna/DNA_fluid_defaults.h index 4135c4d40a8..ddb1248741e 100644 --- a/source/blender/makesdna/DNA_fluid_defaults.h +++ b/source/blender/makesdna/DNA_fluid_defaults.h @@ -76,7 +76,7 @@ .adapt_margin = 4, \ .adapt_res = 0, \ .adapt_threshold = 0.02f, \ - .maxres = 32, \ + .maxres = 64, \ .solver_res = 3, \ .border_collisions = 0, \ .flags = FLUID_DOMAIN_USE_DISSOLVE_LOG | FLUID_DOMAIN_USE_ADAPTIVE_TIME, \ diff --git a/source/creator/CMakeLists.txt b/source/creator/CMakeLists.txt index 47fb2642da1..cf05bdca72f 100644 --- a/source/creator/CMakeLists.txt +++ b/source/creator/CMakeLists.txt @@ -688,6 +688,13 @@ if(UNIX AND NOT APPLE) DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages ) endif() + if(WITH_MOD_FLUID) + message(STATUS "============== Here") + install( + PROGRAMS $<TARGET_FILE:extern_mantaflow> + DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages + ) + endif() elseif(WIN32) set(BLENDER_TEXT_FILES_DESTINATION ".") |