Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastián Barschkis <sebbas@sebbas.org>2021-09-13 16:03:52 +0300
committerSebastián Barschkis <sebbas@sebbas.org>2021-09-13 16:03:52 +0300
commit063ce7f550f1612ab0e34c4ecb4b57f8401b84b4 (patch)
tree53584b6c514510b0bab33a480b3ec85274b48a6b
parent4b06420e65040c642d2b0a7a1c9bf7515d3cec0c (diff)
Fluid: Initial changes for OpenMP GPU supportfluid-mantaflow-gpu
Contains basic support for OpenMP GPU offloading. That is, offloading of fluid KERNEL loops to the GPU. This branch offloads pressure and advection calls only - the 2 most expensive operation per step. In theory though, any function can be offloaded. For now, this branch needs to be build with a compiler that supports Nvidia GPU offloading. Exact GPU models need to be specified via CMake.
-rw-r--r--CMakeLists.txt16
-rw-r--r--build_files/build_environment/cmake/llvm.cmake14
-rw-r--r--build_files/cmake/platform/platform_unix.cmake3
-rw-r--r--extern/curve_fit_nd/intern/curve_fit_corners_detect.c2
-rw-r--r--extern/curve_fit_nd/intern/curve_fit_cubic.c10
-rw-r--r--extern/curve_fit_nd/intern/curve_fit_inline.h4
-rw-r--r--extern/draco/draco/src/draco/animation/keyframe_animation.cc1
-rw-r--r--extern/mantaflow/CMakeLists.txt297
-rw-r--r--extern/mantaflow/UPDATE.sh18
-rw-r--r--extern/mantaflow/helper/util/rcmatrix.h2
-rw-r--r--extern/mantaflow/preprocessed/commonkernels.h853
-rw-r--r--extern/mantaflow/preprocessed/conjugategrad.cpp328
-rw-r--r--extern/mantaflow/preprocessed/conjugategrad.h599
-rw-r--r--extern/mantaflow/preprocessed/fastmarch.cpp289
-rw-r--r--extern/mantaflow/preprocessed/fileio/iogrids.cpp48
-rw-r--r--extern/mantaflow/preprocessed/fileio/ioutil.cpp2
-rw-r--r--extern/mantaflow/preprocessed/fileio/iovdb.cpp55
-rw-r--r--extern/mantaflow/preprocessed/fluidsolver.h1
-rw-r--r--extern/mantaflow/preprocessed/general.h33
-rw-r--r--extern/mantaflow/preprocessed/gitinfo.h2
-rw-r--r--extern/mantaflow/preprocessed/grid.cpp1538
-rw-r--r--extern/mantaflow/preprocessed/grid.h479
-rw-r--r--extern/mantaflow/preprocessed/grid.h.reg.cpp190
-rw-r--r--extern/mantaflow/preprocessed/grid4d.cpp624
-rw-r--r--extern/mantaflow/preprocessed/grid4d.h287
-rw-r--r--extern/mantaflow/preprocessed/kernel.h13
-rw-r--r--extern/mantaflow/preprocessed/levelset.cpp232
-rw-r--r--extern/mantaflow/preprocessed/levelset.h6
-rw-r--r--extern/mantaflow/preprocessed/mesh.cpp780
-rw-r--r--extern/mantaflow/preprocessed/mesh.h3
-rw-r--r--extern/mantaflow/preprocessed/movingobs.h1
-rw-r--r--extern/mantaflow/preprocessed/multigrid.cpp351
-rw-r--r--extern/mantaflow/preprocessed/noisefield.h1
-rw-r--r--extern/mantaflow/preprocessed/particle.cpp596
-rw-r--r--extern/mantaflow/preprocessed/particle.h194
-rw-r--r--extern/mantaflow/preprocessed/plugin/advection.cpp445
-rw-r--r--extern/mantaflow/preprocessed/plugin/apic.cpp32
-rw-r--r--extern/mantaflow/preprocessed/plugin/extforces.cpp398
-rw-r--r--extern/mantaflow/preprocessed/plugin/fire.cpp118
-rw-r--r--extern/mantaflow/preprocessed/plugin/flip.cpp600
-rw-r--r--extern/mantaflow/preprocessed/plugin/fluidguiding.cpp409
-rw-r--r--extern/mantaflow/preprocessed/plugin/initplugins.cpp488
-rw-r--r--extern/mantaflow/preprocessed/plugin/kepsilon.cpp92
-rw-r--r--extern/mantaflow/preprocessed/plugin/pressure.cpp538
-rw-r--r--extern/mantaflow/preprocessed/plugin/ptsplugins.cpp96
-rw-r--r--extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp514
-rw-r--r--extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp394
-rw-r--r--extern/mantaflow/preprocessed/plugin/viscosity.cpp1428
-rw-r--r--extern/mantaflow/preprocessed/plugin/vortexplugins.cpp32
-rw-r--r--extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp286
-rw-r--r--extern/mantaflow/preprocessed/plugin/waves.cpp161
-rw-r--r--extern/mantaflow/preprocessed/registration.cpp4
-rw-r--r--extern/mantaflow/preprocessed/shapes.cpp316
-rw-r--r--extern/mantaflow/preprocessed/shapes.h6
-rw-r--r--extern/mantaflow/preprocessed/test.cpp66
-rw-r--r--extern/mantaflow/preprocessed/turbulencepart.cpp24
-rw-r--r--extern/mantaflow/preprocessed/turbulencepart.h1
-rw-r--r--extern/mantaflow/preprocessed/vortexpart.cpp121
-rw-r--r--extern/mantaflow/preprocessed/vortexpart.h1
-rw-r--r--extern/mantaflow/preprocessed/vortexsheet.h1
-rw-r--r--intern/mantaflow/intern/MANTA_main.cpp5
-rw-r--r--intern/mantaflow/intern/strings/fluid_script.h31
-rw-r--r--intern/mantaflow/intern/strings/liquid_script.h33
-rw-r--r--source/blender/makesdna/DNA_fluid_defaults.h2
-rw-r--r--source/creator/CMakeLists.txt7
65 files changed, 6167 insertions, 8354 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 47712f0ac1e..a257068801a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -747,7 +747,7 @@ set_and_warn_dependency(WITH_TBB WITH_CYCLES OFF)
set_and_warn_dependency(WITH_TBB WITH_USD OFF)
set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF)
set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF)
-set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF)
+#set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF)
# NanoVDB requires OpenVDB to convert the data structure
set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF)
@@ -1239,15 +1239,29 @@ endif()
if(WITH_OPENMP)
if(NOT OPENMP_CUSTOM)
find_package(OpenMP)
+
+ list(APPEND CMAKE_MODULE_PATH "${LLVM_LIBPATH}/cmake/openmp")
+ find_package(OpenMPTarget REQUIRED NVPTX)
endif()
if(OPENMP_FOUND)
if(NOT WITH_OPENMP_STATIC)
+ message(STATUS "============ No Static OpenMP")
+ message(${OpenMPTarget_NVPTX_FLAGS})
+
string(APPEND CMAKE_C_FLAGS " ${OpenMP_C_FLAGS}")
string(APPEND CMAKE_CXX_FLAGS " ${OpenMP_CXX_FLAGS}")
string(APPEND CMAKE_EXE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}")
string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}")
+
+ string(APPEND CMAKE_C_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda")
+ string(APPEND CMAKE_CXX_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda")
+ string(APPEND CMAKE_EXE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda")
+ string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda")
+
+ set(PLATFORM_LINKLIBS "${PLATFORM_LINKLIBS};-fopenmp=libomp;-fopenmp-targets=nvptx64-nvidia-cuda")
else()
+ message(STATUS "============= Static OpenMP")
# Typically avoid adding flags as defines but we can't
# pass OpenMP flags to the linker for static builds, meaning
# we can't add any OpenMP related flags to CFLAGS variables
diff --git a/build_files/build_environment/cmake/llvm.cmake b/build_files/build_environment/cmake/llvm.cmake
index 7a8ce2ddfec..7f54e83c92e 100644
--- a/build_files/build_environment/cmake/llvm.cmake
+++ b/build_files/build_environment/cmake/llvm.cmake
@@ -30,6 +30,10 @@ if(APPLE)
set(BUILD_CLANG_TOOLS ON)
endif()
+if(UNIX AND NOT APPLE)
+ set(LLVM_BUILD_OPENMP ^^openmp)
+ set(LLVM_TARGETS ${LLVM_TARGETS} ^^NVPTX)
+endif()
set(LLVM_EXTRA_ARGS
-DLLVM_USE_CRT_RELEASE=MD
@@ -40,10 +44,18 @@ set(LLVM_EXTRA_ARGS
-DLLVM_ENABLE_TERMINFO=OFF
-DLLVM_BUILD_LLVM_C_DYLIB=OFF
-DLLVM_ENABLE_UNWIND_TABLES=OFF
- -DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA}
+ -DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA}${LLVM_BUILD_OPENMP}
${LLVM_XML2_ARGS}
)
+if(UNIX AND NOT APPLE)
+ list(APPEND LLVM_EXTRA_ARGS
+ -DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_61
+ -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=61
+ )
+endif()
+
+
if(WIN32)
set(LLVM_GENERATOR "Ninja")
else()
diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake
index fc0c37e4c8b..065c0b48416 100644
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -63,7 +63,8 @@ if(EXISTS ${LIBDIR})
# OpenMP usually can't be statically linked into shared libraries,
# due to not being compiled with position independent code.
if(NOT WITH_PYTHON_MODULE)
- set(WITH_OPENMP_STATIC ON)
+ #message(STATUS "============= ENABLING Static OpenMP")
+ #set(WITH_OPENMP_STATIC ON)
endif()
set(Boost_NO_BOOST_CMAKE ON)
set(BOOST_ROOT ${LIBDIR}/boost)
diff --git a/extern/curve_fit_nd/intern/curve_fit_corners_detect.c b/extern/curve_fit_nd/intern/curve_fit_corners_detect.c
index 415ef40fdd5..b600a41f9cd 100644
--- a/extern/curve_fit_nd/intern/curve_fit_corners_detect.c
+++ b/extern/curve_fit_nd/intern/curve_fit_corners_detect.c
@@ -81,7 +81,7 @@ static double cos_vnvnvn(
normalize_vn_vnvn(dvec1, v1, v2, dims);
double d = dot_vnvn(dvec0, dvec1, dims);
/* sanity check */
- d = max(-1.0, min(1.0, d));
+ d = maxV(-1.0, minV(1.0, d));
return d;
}
diff --git a/extern/curve_fit_nd/intern/curve_fit_cubic.c b/extern/curve_fit_nd/intern/curve_fit_cubic.c
index 47c5344c821..65042d7dbef 100644
--- a/extern/curve_fit_nd/intern/curve_fit_cubic.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic.c
@@ -29,9 +29,9 @@
* \ingroup curve_fit
*/
-#ifdef _MSC_VER
+//#ifdef _MSC_VER
# define _USE_MATH_DEFINES
-#endif
+//#endif
#include <math.h>
#include <float.h>
@@ -456,7 +456,7 @@ static double points_calc_circumference_factor(
const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims);
if (len_tangent > DBL_EPSILON) {
/* only clamp to avoid precision error */
- double angle = acos(max(-fabs(dot), -1.0));
+ double angle = acos(maxV(-fabs(dot), -1.0));
/* Angle may be less than the length when the tangents define >180 degrees of the circle,
* (tangents that point away from each other).
* We could try support this but will likely cause extreme >1 scales which could cause other issues. */
@@ -607,7 +607,7 @@ static void cubic_from_points_offset_fallback(
for (uint k = 0; k < 2; k++) {
sub_vn_vnvn(tmp, p0, pt, dims);
project_vn_vnvn_normalized(tmp, tmp, a[k], dims);
- dists[k] = max(dists[k], dot_vnvn(tmp, a[k], dims));
+ dists[k] = maxV(dists[k], dot_vnvn(tmp, a[k], dims));
}
}
@@ -796,7 +796,7 @@ static void cubic_from_points(
dist_sq_test += sq((pt[j] - center[j]) * clamp_scale);
}
#endif
- dist_sq_max = max(dist_sq_max, dist_sq_test);
+ dist_sq_max = maxV(dist_sq_max, dist_sq_test);
}
}
diff --git a/extern/curve_fit_nd/intern/curve_fit_inline.h b/extern/curve_fit_nd/intern/curve_fit_inline.h
index f9eaa4c647c..6b47d3c12b0 100644
--- a/extern/curve_fit_nd/intern/curve_fit_inline.h
+++ b/extern/curve_fit_nd/intern/curve_fit_inline.h
@@ -45,12 +45,12 @@ MINLINE double sq(const double d)
}
#ifndef _MSC_VER
-MINLINE double min(const double a, const double b)
+MINLINE double minV(const double a, const double b)
{
return b < a ? b : a;
}
-MINLINE double max(const double a, const double b)
+MINLINE double maxV(const double a, const double b)
{
return a < b ? b : a;
}
diff --git a/extern/draco/draco/src/draco/animation/keyframe_animation.cc b/extern/draco/draco/src/draco/animation/keyframe_animation.cc
index eaf94a3305d..e602140ea0f 100644
--- a/extern/draco/draco/src/draco/animation/keyframe_animation.cc
+++ b/extern/draco/draco/src/draco/animation/keyframe_animation.cc
@@ -13,6 +13,7 @@
// limitations under the License.
//
#include "draco/animation/keyframe_animation.h"
+#include <iostream>
namespace draco {
diff --git a/extern/mantaflow/CMakeLists.txt b/extern/mantaflow/CMakeLists.txt
index 9b047eb1a3e..cdb3afcff4a 100644
--- a/extern/mantaflow/CMakeLists.txt
+++ b/extern/mantaflow/CMakeLists.txt
@@ -55,26 +55,30 @@ if(NOT WITH_MANTA_DEPENDENCIES)
add_definitions(-DNO_CNPY=1)
endif()
-set(MANTA_HLP
- helper
-)
-set(MANTA_PP
- preprocessed
-)
if(WITH_MANTA_DEPENDENCIES)
set(MANTA_DEP
dependencies
)
endif()
-if(WITH_TBB)
- add_definitions(-DTBB=1)
+if(WITH_OPENMP)
+ add_definitions(-DOPENMP=1)
+
+ if(WITH_OPENMP_STATIC)
+ list(APPEND LIB
+ ${OpenMP_LIBRARIES}
+ )
+ endif()
+endif()
+
+if(WITH_OPENMP)
+ add_definitions(-DOPENMP_OFFLOAD=1)
endif()
if(WITH_OPENVDB)
add_definitions(-DOPENVDB=1)
# OpenVDB headers use deprecated TBB headers, silence warning.
- add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1)
+ #add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1)
endif()
if(WITH_OPENVDB_BLOSC)
@@ -90,12 +94,12 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
endif()
set(INC
- ${MANTA_PP}
- ${MANTA_PP}/fileio
- ${MANTA_PP}/python
- ${MANTA_PP}/plugin
- ${MANTA_HLP}/pwrapper
- ${MANTA_HLP}/util
+ preprocessed
+ preprocessed/fileio
+ preprocessed/python
+ preprocessed/plugin
+ helper/pwrapper
+ helper/util
)
if(WITH_MANTA_DEPENDENCIES)
@@ -115,15 +119,6 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
)
endif()
-if(WITH_TBB)
- list(APPEND INC_SYS
- ${TBB_INCLUDE_DIRS}
- )
- list(APPEND LIB
- ${TBB_LIBRARIES}
- )
-endif()
-
if(WITH_OPENVDB)
list(APPEND INC_SYS
${OPENVDB_INCLUDE_DIRS}
@@ -142,120 +137,120 @@ if(WITH_OPENVDB)
endif()
set(SRC
- ${MANTA_PP}/commonkernels.h
- ${MANTA_PP}/commonkernels.h.reg.cpp
- ${MANTA_PP}/conjugategrad.cpp
- ${MANTA_PP}/conjugategrad.h
- ${MANTA_PP}/conjugategrad.h.reg.cpp
- ${MANTA_PP}/edgecollapse.cpp
- ${MANTA_PP}/edgecollapse.h
- ${MANTA_PP}/edgecollapse.h.reg.cpp
- ${MANTA_PP}/fastmarch.cpp
- ${MANTA_PP}/fastmarch.h
- ${MANTA_PP}/fastmarch.h.reg.cpp
- ${MANTA_PP}/fileio/iogrids.cpp
- ${MANTA_PP}/fileio/iomeshes.cpp
- ${MANTA_PP}/fileio/ioparticles.cpp
- ${MANTA_PP}/fileio/ioutil.cpp
- ${MANTA_PP}/fileio/iovdb.cpp
- ${MANTA_PP}/fileio/mantaio.cpp
- ${MANTA_PP}/fileio/mantaio.h
- ${MANTA_PP}/fileio/mantaio.h.reg.cpp
- ${MANTA_PP}/fluidsolver.cpp
- ${MANTA_PP}/fluidsolver.h
- ${MANTA_PP}/fluidsolver.h.reg.cpp
- ${MANTA_PP}/general.cpp
- ${MANTA_PP}/general.h
- ${MANTA_PP}/general.h.reg.cpp
- ${MANTA_PP}/gitinfo.h
- ${MANTA_PP}/grid.cpp
- ${MANTA_PP}/grid.h
- ${MANTA_PP}/grid.h.reg.cpp
- ${MANTA_PP}/grid4d.cpp
- ${MANTA_PP}/grid4d.h
- ${MANTA_PP}/grid4d.h.reg.cpp
- ${MANTA_PP}/kernel.cpp
- ${MANTA_PP}/kernel.h
- ${MANTA_PP}/kernel.h.reg.cpp
- ${MANTA_PP}/levelset.cpp
- ${MANTA_PP}/levelset.h
- ${MANTA_PP}/levelset.h.reg.cpp
- ${MANTA_PP}/mesh.cpp
- ${MANTA_PP}/mesh.h
- ${MANTA_PP}/mesh.h.reg.cpp
- ${MANTA_PP}/movingobs.cpp
- ${MANTA_PP}/movingobs.h
- ${MANTA_PP}/movingobs.h.reg.cpp
- ${MANTA_PP}/multigrid.cpp
- ${MANTA_PP}/multigrid.h
- ${MANTA_PP}/multigrid.h.reg.cpp
- ${MANTA_PP}/noisefield.cpp
- ${MANTA_PP}/noisefield.h
- ${MANTA_PP}/noisefield.h.reg.cpp
- ${MANTA_PP}/particle.cpp
- ${MANTA_PP}/particle.h
- ${MANTA_PP}/particle.h.reg.cpp
- ${MANTA_PP}/plugin/advection.cpp
- ${MANTA_PP}/plugin/apic.cpp
- ${MANTA_PP}/plugin/extforces.cpp
- ${MANTA_PP}/plugin/fire.cpp
- ${MANTA_PP}/plugin/flip.cpp
- ${MANTA_PP}/plugin/fluidguiding.cpp
- ${MANTA_PP}/plugin/initplugins.cpp
- ${MANTA_PP}/plugin/kepsilon.cpp
- ${MANTA_PP}/plugin/meshplugins.cpp
- ${MANTA_PP}/plugin/pressure.cpp
- ${MANTA_PP}/plugin/ptsplugins.cpp
- ${MANTA_PP}/plugin/secondaryparticles.cpp
- ${MANTA_PP}/plugin/surfaceturbulence.cpp
- ${MANTA_PP}/plugin/viscosity.cpp
- ${MANTA_PP}/plugin/vortexplugins.cpp
- ${MANTA_PP}/plugin/waveletturbulence.cpp
- ${MANTA_PP}/plugin/waves.cpp
- ${MANTA_PP}/python/defines.py
- ${MANTA_PP}/python/defines.py.reg.cpp
- ${MANTA_PP}/registration.cpp
- ${MANTA_PP}/shapes.cpp
- ${MANTA_PP}/shapes.h
- ${MANTA_PP}/shapes.h.reg.cpp
- ${MANTA_PP}/test.cpp
- ${MANTA_PP}/timing.cpp
- ${MANTA_PP}/timing.h
- ${MANTA_PP}/timing.h.reg.cpp
- ${MANTA_PP}/turbulencepart.cpp
- ${MANTA_PP}/turbulencepart.h
- ${MANTA_PP}/turbulencepart.h.reg.cpp
- ${MANTA_PP}/vortexpart.cpp
- ${MANTA_PP}/vortexpart.h
- ${MANTA_PP}/vortexpart.h.reg.cpp
- ${MANTA_PP}/vortexsheet.cpp
- ${MANTA_PP}/vortexsheet.h
- ${MANTA_PP}/vortexsheet.h.reg.cpp
+ preprocessed/commonkernels.h
+ preprocessed/commonkernels.h.reg.cpp
+ preprocessed/conjugategrad.cpp
+ preprocessed/conjugategrad.h
+ preprocessed/conjugategrad.h.reg.cpp
+ preprocessed/edgecollapse.cpp
+ preprocessed/edgecollapse.h
+ preprocessed/edgecollapse.h.reg.cpp
+ preprocessed/fastmarch.cpp
+ preprocessed/fastmarch.h
+ preprocessed/fastmarch.h.reg.cpp
+ preprocessed/fileio/iogrids.cpp
+ preprocessed/fileio/iomeshes.cpp
+ preprocessed/fileio/ioparticles.cpp
+ preprocessed/fileio/ioutil.cpp
+ preprocessed/fileio/iovdb.cpp
+ preprocessed/fileio/mantaio.cpp
+ preprocessed/fileio/mantaio.h
+ preprocessed/fileio/mantaio.h.reg.cpp
+ preprocessed/fluidsolver.cpp
+ preprocessed/fluidsolver.h
+ preprocessed/fluidsolver.h.reg.cpp
+ preprocessed/general.cpp
+ preprocessed/general.h
+ preprocessed/general.h.reg.cpp
+ preprocessed/gitinfo.h
+ preprocessed/grid.cpp
+ preprocessed/grid.h
+ preprocessed/grid.h.reg.cpp
+ preprocessed/grid4d.cpp
+ preprocessed/grid4d.h
+ preprocessed/grid4d.h.reg.cpp
+ preprocessed/kernel.cpp
+ preprocessed/kernel.h
+ preprocessed/kernel.h.reg.cpp
+ preprocessed/levelset.cpp
+ preprocessed/levelset.h
+ preprocessed/levelset.h.reg.cpp
+ preprocessed/mesh.cpp
+ preprocessed/mesh.h
+ preprocessed/mesh.h.reg.cpp
+ preprocessed/movingobs.cpp
+ preprocessed/movingobs.h
+ preprocessed/movingobs.h.reg.cpp
+ preprocessed/multigrid.cpp
+ preprocessed/multigrid.h
+ preprocessed/multigrid.h.reg.cpp
+ preprocessed/noisefield.cpp
+ preprocessed/noisefield.h
+ preprocessed/noisefield.h.reg.cpp
+ preprocessed/particle.cpp
+ preprocessed/particle.h
+ preprocessed/particle.h.reg.cpp
+ preprocessed/plugin/advection.cpp
+ preprocessed/plugin/apic.cpp
+ preprocessed/plugin/extforces.cpp
+ preprocessed/plugin/fire.cpp
+ preprocessed/plugin/flip.cpp
+ preprocessed/plugin/fluidguiding.cpp
+ preprocessed/plugin/initplugins.cpp
+ preprocessed/plugin/kepsilon.cpp
+ preprocessed/plugin/meshplugins.cpp
+ preprocessed/plugin/pressure.cpp
+ preprocessed/plugin/ptsplugins.cpp
+ preprocessed/plugin/secondaryparticles.cpp
+ preprocessed/plugin/surfaceturbulence.cpp
+# preprocessed/plugin/viscosity.cpp
+ preprocessed/plugin/vortexplugins.cpp
+ preprocessed/plugin/waveletturbulence.cpp
+ preprocessed/plugin/waves.cpp
+ preprocessed/python/defines.py
+ preprocessed/python/defines.py.reg.cpp
+ preprocessed/registration.cpp
+ preprocessed/shapes.cpp
+ preprocessed/shapes.h
+ preprocessed/shapes.h.reg.cpp
+ preprocessed/test.cpp
+ preprocessed/timing.cpp
+ preprocessed/timing.h
+ preprocessed/timing.h.reg.cpp
+ preprocessed/turbulencepart.cpp
+ preprocessed/turbulencepart.h
+ preprocessed/turbulencepart.h.reg.cpp
+ preprocessed/vortexpart.cpp
+ preprocessed/vortexpart.h
+ preprocessed/vortexpart.h.reg.cpp
+ preprocessed/vortexsheet.cpp
+ preprocessed/vortexsheet.h
+ preprocessed/vortexsheet.h.reg.cpp
- ${MANTA_HLP}/pwrapper/manta.h
- ${MANTA_HLP}/pwrapper/pclass.cpp
- ${MANTA_HLP}/pwrapper/pclass.h
- ${MANTA_HLP}/pwrapper/pconvert.cpp
- ${MANTA_HLP}/pwrapper/pconvert.h
- ${MANTA_HLP}/pwrapper/pvec3.cpp
- ${MANTA_HLP}/pwrapper/pythonInclude.h
- ${MANTA_HLP}/pwrapper/registry.cpp
- ${MANTA_HLP}/pwrapper/registry.h
- ${MANTA_HLP}/util/integrator.h
- ${MANTA_HLP}/util/interpol.h
- ${MANTA_HLP}/util/interpolHigh.h
- ${MANTA_HLP}/util/matrixbase.h
- ${MANTA_HLP}/util/mcubes.h
- ${MANTA_HLP}/util/quaternion.h
- ${MANTA_HLP}/util/randomstream.h
- ${MANTA_HLP}/util/rcmatrix.h
- ${MANTA_HLP}/util/simpleimage.cpp
- ${MANTA_HLP}/util/simpleimage.h
- ${MANTA_HLP}/util/solvana.h
- ${MANTA_HLP}/util/vector4d.cpp
- ${MANTA_HLP}/util/vector4d.h
- ${MANTA_HLP}/util/vectorbase.cpp
- ${MANTA_HLP}/util/vectorbase.h
+ helper/pwrapper/manta.h
+ helper/pwrapper/pclass.cpp
+ helper/pwrapper/pclass.h
+ helper/pwrapper/pconvert.cpp
+ helper/pwrapper/pconvert.h
+ helper/pwrapper/pvec3.cpp
+ helper/pwrapper/pythonInclude.h
+ helper/pwrapper/registry.cpp
+ helper/pwrapper/registry.h
+ helper/util/integrator.h
+ helper/util/interpol.h
+ helper/util/interpolHigh.h
+ helper/util/matrixbase.h
+ helper/util/mcubes.h
+ helper/util/quaternion.h
+ helper/util/randomstream.h
+ helper/util/rcmatrix.h
+ helper/util/simpleimage.cpp
+ helper/util/simpleimage.h
+ helper/util/solvana.h
+ helper/util/vector4d.cpp
+ helper/util/vector4d.h
+ helper/util/vectorbase.cpp
+ helper/util/vectorbase.h
)
if(WITH_MANTA_DEPENDENCIES)
@@ -266,16 +261,34 @@ if(WITH_MANTA_DEPENDENCIES)
endif()
if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
list(APPEND SRC
- ${MANTA_PP}/plugin/numpyconvert.cpp
- ${MANTA_PP}/plugin/tfplugins.cpp
- ${MANTA_HLP}/pwrapper/numpyWrap.cpp
- ${MANTA_HLP}/pwrapper/numpyWrap.h
+ preprocessed/plugin/numpyconvert.cpp
+ preprocessed/plugin/tfplugins.cpp
+ helper/pwrapper/numpyWrap.cpp
+ helper/pwrapper/numpyWrap.h
)
endif()
set(LIB
${PYTHON_LINKFLAGS}
${PYTHON_LIBRARIES}
+ ${OPENVDB_LIBRARIES}
)
-blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
+#blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
+
+add_library(extern_mantaflow SHARED "${SRC}")
+
+include_directories(preprocessed)
+include_directories(preprocessed/fileio)
+include_directories(preprocessed/python)
+include_directories(preprocessed/plugin)
+include_directories(helper/pwrapper)
+include_directories(helper/util)
+include_directories(${PYTHON_INCLUDE_DIRS})
+include_directories(${ZLIB_INCLUDE_DIRS})
+include_directories(${OPENVDB_INCLUDE_DIRS})
+
+#target_include_directories(extern_mantaflow PRIVATE "${INC}")
+target_link_libraries(extern_mantaflow PRIVATE "${LIB}")
+
+blender_source_group(extern_mantaflow "${SRC}")
diff --git a/extern/mantaflow/UPDATE.sh b/extern/mantaflow/UPDATE.sh
index 1158ff13455..83895d8c84d 100644
--- a/extern/mantaflow/UPDATE.sh
+++ b/extern/mantaflow/UPDATE.sh
@@ -7,8 +7,11 @@
# ==================== 1) ENVIRONMENT SETUP =============================================
# YOUR INSTALLATION PATHS GO HERE:
-MANTA_INSTALLATION=/Users/sebbas/Developer/Mantaflow/mantaflowDevelop
-BLENDER_INSTALLATION=/Users/sebbas/Developer/Blender
+MANTA_INSTALLATION=/home/sebbas/Developer/Mantaflow
+BLENDER_INSTALLATION=/home/sebbas/Developer/Blender
+
+CC=/home/sebbas/Developer/LLVM-Project/install/bin/clang
+CXX=/home/sebbas/Developer/LLVM-Project/install/bin/clang++
# Try to check out Mantaflow repository before building?
CLEAN_REPOSITORY=0
@@ -20,8 +23,13 @@ WITH_DEPENDENCIES=0
USE_NUMPY=0
# Choose which multithreading platform to use for Mantaflow preprocessing
-USE_OMP=0
-USE_TBB=1
+USE_OMP=1
+USE_TBB=0
+
+# Use OpenMP offloading too?
+if [[ "$USE_OMP" -eq "1" ]]; then
+ USE_OMP_OFFLOAD=1
+fi
if [[ "$USE_OMP" -eq "1" && "$USE_TBB" -eq "1" ]]; then
echo "Cannot build Mantaflow for OpenMP and TBB at the same time"
@@ -56,7 +64,7 @@ fi
MANTA_BUILD_PATH=$MANTA_INSTALLATION/build_blender/
mkdir -p $MANTA_BUILD_PATH
cd $MANTA_BUILD_PATH
-cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY && make -j8
+cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DOPENMP_OFFLOAD=$USE_OMP_OFFLOAD -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY -DPYTHON_VERSION=3 -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX && make -j8
# ==================== 3) COPY MANTAFLOW FILES TO BLENDER ROOT ===========================
diff --git a/extern/mantaflow/helper/util/rcmatrix.h b/extern/mantaflow/helper/util/rcmatrix.h
index 330fd1f64f7..f1f0efe6416 100644
--- a/extern/mantaflow/helper/util/rcmatrix.h
+++ b/extern/mantaflow/helper/util/rcmatrix.h
@@ -1035,7 +1035,7 @@ template<class N, class T> struct RCFixedMatrix {
typedef RCMatrix<int, Real> Matrix;
typedef RCFixedMatrix<int, Real> FixedMatrix;
-}
+} // namespace Manta
#undef parallel_for
#undef parallel_end
diff --git a/extern/mantaflow/preprocessed/commonkernels.h b/extern/mantaflow/preprocessed/commonkernels.h
index 7fa6f185146..7ac13fbdfc4 100644
--- a/extern/mantaflow/preprocessed/commonkernels.h
+++ b/extern/mantaflow/preprocessed/commonkernels.h
@@ -34,7 +34,7 @@ struct InvertCheckFluid : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const FlagGrid &flags, Grid<Real> &grid) const
+ inline void op(IndexInt idx, const FlagGrid &flags, Grid<Real> &grid)
{
if (flags.isFluid(idx) && grid[idx] > 0)
grid[idx] = 1.0 / grid[idx];
@@ -49,21 +49,17 @@ struct InvertCheckFluid : public KernelBase {
return grid;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel InvertCheckFluid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, grid);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, grid);
+ }
}
const FlagGrid &flags;
Grid<Real> &grid;
@@ -77,9 +73,9 @@ struct GridSumSqr : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid<Real> &grid, double &sum)
+ inline void op(int i, int j, int k, const Grid<Real> &grid, double &sum)
{
- sum += square((double)grid[idx]);
+ sum += square((double)grid(i, j, k));
}
inline operator double()
{
@@ -94,28 +90,37 @@ struct GridSumSqr : public KernelBase {
return grid;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel GridSumSqr ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, sum);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- GridSumSqr(GridSumSqr &o, tbb::split) : KernelBase(o), grid(o.grid), sum(0)
- {
- }
- void join(const GridSumSqr &o)
- {
- sum += o.sum;
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ const Grid<Real> &grid = getArg0();
+#pragma omp target teams distribute parallel for reduction(+ : sum) collapse(2) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, sum);
+ }
+ {
+ this->sum = sum;
+ }
+ }
+ else {
+ const int k = 0;
+ const Grid<Real> &grid = getArg0();
+#pragma omp target teams distribute parallel for reduction(+ : sum) collapse(1) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, sum);
+ }
+ {
+ this->sum = sum;
+ }
+ }
}
const Grid<Real> &grid;
double sum;
@@ -129,7 +134,7 @@ struct CurlOp : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Vec3> &dst) const
+ inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Vec3> &dst)
{
Vec3 v = Vec3(0.,
0.,
@@ -153,37 +158,35 @@ struct CurlOp : public KernelBase {
return dst;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel CurlOp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, grid, dst);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, grid, dst);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, grid, dst);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, grid, dst);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<Vec3> &grid;
Grid<Vec3> &dst;
};
@@ -197,7 +200,7 @@ struct DivergenceOpMAC : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Real> &div, const MACGrid &grid) const
+ inline void op(int i, int j, int k, Grid<Real> &div, const MACGrid &grid)
{
Vec3 del = Vec3(grid(i + 1, j, k).x, grid(i, j + 1, k).y, 0.) - grid(i, j, k);
if (grid.is3D())
@@ -216,37 +219,35 @@ struct DivergenceOpMAC : public KernelBase {
return grid;
}
typedef MACGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel DivergenceOpMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, div, grid);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, div, grid);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, div, grid);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, div, grid);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &div;
const MACGrid &grid;
};
@@ -259,7 +260,7 @@ struct GradientOpMAC : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &gradient, const Grid<Real> &grid) const
+ inline void op(int i, int j, int k, MACGrid &gradient, const Grid<Real> &grid)
{
Vec3 grad = (Vec3(grid(i, j, k)) - Vec3(grid(i - 1, j, k), grid(i, j - 1, k), 0.));
if (grid.is3D())
@@ -278,37 +279,35 @@ struct GradientOpMAC : public KernelBase {
return grid;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel GradientOpMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, gradient, grid);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, gradient, grid);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, gradient, grid);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, gradient, grid);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &gradient;
const Grid<Real> &grid;
};
@@ -321,7 +320,7 @@ struct GradientOp : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &gradient, const Grid<Real> &grid) const
+ inline void op(int i, int j, int k, Grid<Vec3> &gradient, const Grid<Real> &grid)
{
Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k),
grid(i, j + 1, k) - grid(i, j - 1, k),
@@ -340,37 +339,35 @@ struct GradientOp : public KernelBase {
return grid;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel GradientOp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, gradient, grid);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, gradient, grid);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, gradient, grid);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, gradient, grid);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Vec3> &gradient;
const Grid<Real> &grid;
};
@@ -383,7 +380,7 @@ struct LaplaceOp : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Real> &laplace, const Grid<Real> &grid) const
+ inline void op(int i, int j, int k, Grid<Real> &laplace, const Grid<Real> &grid)
{
laplace(i, j, k) = grid(i + 1, j, k) - 2.0 * grid(i, j, k) + grid(i - 1, j, k);
laplace(i, j, k) += grid(i, j + 1, k) - 2.0 * grid(i, j, k) + grid(i, j - 1, k);
@@ -401,37 +398,35 @@ struct LaplaceOp : public KernelBase {
return grid;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel LaplaceOp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, laplace, grid);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, laplace, grid);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, laplace, grid);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, laplace, grid);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &laplace;
const Grid<Real> &grid;
};
@@ -444,7 +439,7 @@ struct CurvatureOp : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Real> &curv, const Grid<Real> &grid, const Real h) const
+ inline void op(int i, int j, int k, Grid<Real> &curv, const Grid<Real> &grid, const Real h)
{
const Real over_h = 1.0 / h;
const Real x = 0.5 * (grid(i + 1, j, k) - grid(i - 1, j, k)) * over_h;
@@ -492,37 +487,35 @@ struct CurvatureOp : public KernelBase {
return h;
}
typedef Real type2;
- void runMessage()
- {
- debMsg("Executing kernel CurvatureOp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, curv, grid, h);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, curv, grid, h);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, curv, grid, h);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, curv, grid, h);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &curv;
const Grid<Real> &grid;
const Real h;
@@ -536,7 +529,7 @@ struct GetShiftedComponent : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) const
+ inline void op(int i, int j, int k, const Grid<Vec3> &grid, Grid<Real> &comp, int dim)
{
Vec3i ishift(i, j, k);
ishift[dim]--;
@@ -557,37 +550,35 @@ struct GetShiftedComponent : public KernelBase {
return dim;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel GetShiftedComponent ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, grid, comp, dim);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, grid, comp, dim);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, grid, comp, dim);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, grid, comp, dim);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<Vec3> &grid;
Grid<Real> &comp;
int dim;
@@ -602,7 +593,7 @@ struct GetComponent : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid<Vec3> &grid, Grid<Real> &comp, int dim) const
+ inline void op(IndexInt idx, const Grid<Vec3> &grid, Grid<Real> &comp, int dim)
{
comp[idx] = grid[idx][dim];
}
@@ -621,21 +612,17 @@ struct GetComponent : public KernelBase {
return dim;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel GetComponent ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, comp, dim);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, grid, comp, dim);
+ }
}
const Grid<Vec3> &grid;
Grid<Real> &comp;
@@ -650,7 +637,7 @@ struct GridNorm : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Real> &n, const Grid<Vec3> &grid) const
+ inline void op(IndexInt idx, Grid<Real> &n, const Grid<Vec3> &grid)
{
n[idx] = norm(grid[idx]);
}
@@ -664,21 +651,17 @@ struct GridNorm : public KernelBase {
return grid;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel GridNorm ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, n, grid);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, n, grid);
+ }
}
Grid<Real> &n;
const Grid<Vec3> &grid;
@@ -693,7 +676,7 @@ struct SetComponent : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Vec3> &grid, const Grid<Real> &comp, int dim) const
+ inline void op(IndexInt idx, Grid<Vec3> &grid, const Grid<Real> &comp, int dim)
{
grid[idx][dim] = comp[idx];
}
@@ -712,21 +695,17 @@ struct SetComponent : public KernelBase {
return dim;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel SetComponent ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, comp, dim);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, grid, comp, dim);
+ }
}
Grid<Vec3> &grid;
const Grid<Real> &comp;
@@ -742,7 +721,7 @@ struct GetCentered : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &center, const MACGrid &vel) const
+ inline void op(int i, int j, int k, Grid<Vec3> &center, const MACGrid &vel)
{
Vec3 v = 0.5 * (vel(i, j, k) + Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.));
if (vel.is3D())
@@ -761,37 +740,35 @@ struct GetCentered : public KernelBase {
return vel;
}
typedef MACGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel GetCentered ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, center, vel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, center, vel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, center, vel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, center, vel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Vec3> &center;
const MACGrid &vel;
};
@@ -804,7 +781,7 @@ struct GetMAC : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &vel, const Grid<Vec3> &center) const
+ inline void op(int i, int j, int k, MACGrid &vel, const Grid<Vec3> &center)
{
Vec3 v = 0.5 * (center(i, j, k) + Vec3(center(i - 1, j, k).x, center(i, j - 1, k).y, 0.));
if (vel.is3D())
@@ -823,37 +800,35 @@ struct GetMAC : public KernelBase {
return center;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel GetMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, center);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, center);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, center);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, center);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &vel;
const Grid<Vec3> &center;
};
@@ -866,7 +841,7 @@ struct FillInBoundary : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &grid, int g) const
+ inline void op(int i, int j, int k, Grid<Vec3> &grid, int g)
{
if (i == 0)
grid(i, j, k) = grid(i + 1, j, k);
@@ -891,37 +866,35 @@ struct FillInBoundary : public KernelBase {
return g;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel FillInBoundary ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, g);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, g);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, g);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, g);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Vec3> &grid;
int g;
};
@@ -939,7 +912,7 @@ struct kn_conv_mex_in_to_MAC : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const double *p_lin_array, MACGrid *p_result) const
+ inline void op(int i, int j, int k, const double *p_lin_array, MACGrid *p_result)
{
int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY();
const int n = p_result->getSizeX() * p_result->getSizeY() * p_result->getSizeZ();
@@ -958,37 +931,35 @@ struct kn_conv_mex_in_to_MAC : public KernelBase {
return p_result;
}
typedef MACGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_mex_in_to_MAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const double *p_lin_array;
MACGrid *p_result;
};
@@ -1000,7 +971,7 @@ struct kn_conv_MAC_to_mex_out : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const MACGrid *p_mac, double *p_result) const
+ inline void op(int i, int j, int k, const MACGrid *p_mac, double *p_result)
{
int ijk = i + j * p_mac->getSizeX() + k * p_mac->getSizeX() * p_mac->getSizeY();
const int n = p_mac->getSizeX() * p_mac->getSizeY() * p_mac->getSizeZ();
@@ -1019,37 +990,35 @@ struct kn_conv_MAC_to_mex_out : public KernelBase {
return p_result;
}
typedef double type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_MAC_to_mex_out ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_mac, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_mac, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_mac, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_mac, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const MACGrid *p_mac;
double *p_result;
};
@@ -1063,7 +1032,7 @@ struct kn_conv_mex_in_to_Vec3 : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const double *p_lin_array, Grid<Vec3> *p_result) const
+ inline void op(int i, int j, int k, const double *p_lin_array, Grid<Vec3> *p_result)
{
int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY();
const int n = p_result->getSizeX() * p_result->getSizeY() * p_result->getSizeZ();
@@ -1082,37 +1051,35 @@ struct kn_conv_mex_in_to_Vec3 : public KernelBase {
return p_result;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_mex_in_to_Vec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const double *p_lin_array;
Grid<Vec3> *p_result;
};
@@ -1124,7 +1091,7 @@ struct kn_conv_Vec3_to_mex_out : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Vec3> *p_Vec3, double *p_result) const
+ inline void op(int i, int j, int k, const Grid<Vec3> *p_Vec3, double *p_result)
{
int ijk = i + j * p_Vec3->getSizeX() + k * p_Vec3->getSizeX() * p_Vec3->getSizeY();
const int n = p_Vec3->getSizeX() * p_Vec3->getSizeY() * p_Vec3->getSizeZ();
@@ -1143,37 +1110,35 @@ struct kn_conv_Vec3_to_mex_out : public KernelBase {
return p_result;
}
typedef double type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_Vec3_to_mex_out ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_Vec3, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_Vec3, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_Vec3, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_Vec3, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const Grid<Vec3> *p_Vec3;
double *p_result;
};
@@ -1187,7 +1152,7 @@ struct kn_conv_mex_in_to_Real : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const double *p_lin_array, Grid<Real> *p_result) const
+ inline void op(int i, int j, int k, const double *p_lin_array, Grid<Real> *p_result)
{
int ijk = i + j * p_result->getSizeX() + k * p_result->getSizeX() * p_result->getSizeY();
@@ -1203,37 +1168,35 @@ struct kn_conv_mex_in_to_Real : public KernelBase {
return p_result;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_mex_in_to_Real ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_lin_array, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_lin_array, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const double *p_lin_array;
Grid<Real> *p_result;
};
@@ -1245,7 +1208,7 @@ struct kn_conv_Real_to_mex_out : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Real> *p_grid, double *p_result) const
+ inline void op(int i, int j, int k, const Grid<Real> *p_grid, double *p_result)
{
int ijk = i + j * p_grid->getSizeX() + k * p_grid->getSizeX() * p_grid->getSizeY();
@@ -1261,37 +1224,35 @@ struct kn_conv_Real_to_mex_out : public KernelBase {
return p_result;
}
typedef double type1;
- void runMessage()
- {
- debMsg("Executing kernel kn_conv_Real_to_mex_out ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_grid, p_result);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_grid, p_result);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, p_grid, p_result);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, p_grid, p_result);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const Grid<Real> *p_grid;
double *p_result;
};
diff --git a/extern/mantaflow/preprocessed/conjugategrad.cpp b/extern/mantaflow/preprocessed/conjugategrad.cpp
index bdcceb29520..df184f654b6 100644
--- a/extern/mantaflow/preprocessed/conjugategrad.cpp
+++ b/extern/mantaflow/preprocessed/conjugategrad.cpp
@@ -18,6 +18,8 @@
#include "conjugategrad.h"
#include "commonkernels.h"
+#include <chrono>
+using namespace std::chrono;
using namespace std;
namespace Manta {
@@ -213,9 +215,9 @@ struct GridDotProduct : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid<Real> &a, const Grid<Real> &b, double &result)
+ inline void op(int i, int j, int k, const Grid<Real> &a, const Grid<Real> &b, double &result)
{
- result += (a[idx] * b[idx]);
+ result += (a(i, j, k) * b(i, j, k));
}
inline operator double()
{
@@ -235,28 +237,39 @@ struct GridDotProduct : public KernelBase {
return b;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel GridDotProduct ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- GridDotProduct(GridDotProduct &o, tbb::split) : KernelBase(o), a(o.a), b(o.b), result(0.0)
- {
- }
- void join(const GridDotProduct &o)
- {
- result += o.result;
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ const Grid<Real> &a = getArg0();
+ const Grid<Real> &b = getArg1();
+#pragma omp target teams distribute parallel for reduction(+:result) collapse(2) schedule(static,1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, a, b, result);
+ }
+ {
+ this->result = result;
+ }
+ }
+ else {
+ const int k = 0;
+ const Grid<Real> &a = getArg0();
+ const Grid<Real> &b = getArg1();
+#pragma omp target teams distribute parallel for reduction(+:result) collapse(1) schedule(static,1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, a, b, result);
+ }
+ {
+ this->result = result;
+ }
+ }
}
const Grid<Real> &a;
const Grid<Real> &b;
@@ -315,29 +328,21 @@ struct InitSigma : public KernelBase {
return temp;
}
typedef Grid<Real> type3;
- void runMessage()
- {
- debMsg("Executing kernel InitSigma ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, dst, rhs, temp, sigma);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- InitSigma(InitSigma &o, tbb::split)
- : KernelBase(o), flags(o.flags), dst(o.dst), rhs(o.rhs), temp(o.temp), sigma(0)
- {
- }
- void join(const InitSigma &o)
- {
- sigma += o.sigma;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ double sigma = 0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, dst, rhs, temp, sigma);
+#pragma omp critical
+ {
+ this->sigma += sigma;
+ }
+ }
}
const FlagGrid &flags;
Grid<Real> &dst;
@@ -356,8 +361,9 @@ struct UpdateSearchVec : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Real> &dst, Grid<Real> &src, Real factor) const
+ inline void op(int i, int j, int k, Grid<Real> &dst, Grid<Real> &src, Real factor)
{
+ const IndexInt idx = dst.index(i, j, k);
dst[idx] = src[idx] + factor * dst[idx];
}
inline Grid<Real> &getArg0()
@@ -375,21 +381,35 @@ struct UpdateSearchVec : public KernelBase {
return factor;
}
typedef Real type2;
- void runMessage()
- {
- debMsg("Executing kernel UpdateSearchVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, dst, src, factor);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ Grid<Real> &dst = getArg0();
+ Grid<Real> &src = getArg1();
+ Real &factor = getArg2();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dst, src, factor);
+ }
+ }
+ else {
+ const int k = 0;
+ Grid<Real> &dst = getArg0();
+ Grid<Real> &src = getArg1();
+ Real &factor = getArg2();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dst, src, factor);
+ }
+ }
}
Grid<Real> &dst;
Grid<Real> &src;
@@ -406,8 +426,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst,
Grid<Real> &search,
const FlagGrid &flags,
Grid<Real> &tmp,
- std::vector<Grid<Real> *> matrixAVec,
- std::vector<Grid<Real> *> rhsVec)
+ Grid<Real> *pA0,
+ Grid<Real> *pAi,
+ Grid<Real> *pAj,
+ Grid<Real> *pAk)
: GridCgInterface(),
mInited(false),
mIterations(0),
@@ -417,8 +439,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst,
mSearch(search),
mFlags(flags),
mTmp(tmp),
- mMatrixA(matrixAVec),
- mVecRhs(rhsVec),
+ mpA0(pA0),
+ mpAi(pAi),
+ mpAj(pAj),
+ mpAk(pAk),
mPcMethod(PC_None),
mpPCA0(nullptr),
mpPCAi(nullptr),
@@ -436,54 +460,37 @@ template<class APPLYMAT> void GridCg<APPLYMAT>::doInit()
mInited = true;
mIterations = 0;
- mDst.clear();
- mResidual.copyFrom(mRhs); // p=0, residual = b
+ mDst.clear(1);
+ mResidual.copyFrom(mRhs, true, 1); // p=0, residual = b
if (mPcMethod == PC_ICP) {
- assertMsg(mDst.is3D(), "ICP only supports 3D grids so far");
- InitPreconditionIncompCholesky(mFlags,
- *mpPCA0,
- *mpPCAi,
- *mpPCAj,
- *mpPCAk,
- *mMatrixA[0],
- *mMatrixA[1],
- *mMatrixA[2],
- *mMatrixA[3]);
- ApplyPreconditionIncompCholesky(mTmp,
- mResidual,
- mFlags,
- *mpPCA0,
- *mpPCAi,
- *mpPCAj,
- *mpPCAk,
- *mMatrixA[0],
- *mMatrixA[1],
- *mMatrixA[2],
- *mMatrixA[3]);
+ // assertMsg(mDst.is3D(), "ICP only supports 3D grids so far");
+ InitPreconditionIncompCholesky(
+ mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
+ ApplyPreconditionIncompCholesky(
+ mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
}
else if (mPcMethod == PC_mICP) {
- assertMsg(mDst.is3D(), "mICP only supports 3D grids so far");
- InitPreconditionModifiedIncompCholesky2(
- mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]);
+ // assertMsg(mDst.is3D(), "mICP only supports 3D grids so far");
+ InitPreconditionModifiedIncompCholesky2(mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
ApplyPreconditionModifiedIncompCholesky2(
- mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]);
+ mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
}
else if (mPcMethod == PC_MGP) {
- InitPreconditionMultigrid(
- mMG, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3], mAccuracy);
+ InitPreconditionMultigrid(mMG, *mpA0, *mpAi, *mpAj, *mpAk, mAccuracy);
ApplyPreconditionMultigrid(mMG, mTmp, mResidual);
}
else {
- mTmp.copyFrom(mResidual);
+ mTmp.copyFrom(mResidual, true, 1);
}
- mSearch.copyFrom(mTmp);
+ mSearch.copyFrom(mTmp, true, 1);
mSigma = GridDotProduct(mTmp, mResidual);
}
-template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
+template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate(Real &time)
{
+ auto start = high_resolution_clock::now();
if (!mInited)
doInit();
@@ -493,7 +500,14 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
// this could reinterpret the mpA pointers (not so clean right now)
// tmp = applyMat(search)
- APPLYMAT(mFlags, mTmp, mSearch, mMatrixA, mVecRhs);
+ APPLYMAT(mFlags, mTmp, mSearch, *mpA0, *mpAi, *mpAj, *mpAk);
+
+ auto stop = high_resolution_clock::now();
+ auto duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "APPLYMAT Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
// alpha = sigma/dot(tmp, search)
Real dp = GridDotProduct(mTmp, mSearch);
@@ -501,35 +515,49 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
if (fabs(dp) > 0.)
alpha = mSigma / (Real)dp;
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
+
gridScaledAdd<Real, Real>(mDst, mSearch, alpha); // dst += search * alpha
gridScaledAdd<Real, Real>(mResidual, mTmp, -alpha); // residual += tmp * -alpha
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "gridScaledAdd Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
+
if (mPcMethod == PC_ICP)
- ApplyPreconditionIncompCholesky(mTmp,
- mResidual,
- mFlags,
- *mpPCA0,
- *mpPCAi,
- *mpPCAj,
- *mpPCAk,
- *mMatrixA[0],
- *mMatrixA[1],
- *mMatrixA[2],
- *mMatrixA[3]);
+ ApplyPreconditionIncompCholesky(
+ mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
else if (mPcMethod == PC_mICP)
ApplyPreconditionModifiedIncompCholesky2(
- mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]);
+ mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
else if (mPcMethod == PC_MGP)
ApplyPreconditionMultigrid(mMG, mTmp, mResidual);
else
- mTmp.copyFrom(mResidual);
+ mTmp.copyFrom(mResidual, true, 1);
+
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "copyFrom Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
// use the l2 norm of the residual for convergence check? (usually max norm is recommended
// instead)
if (this->mUseL2Norm) {
+ // std::cout << "USING L2" << std::endl;
mResNorm = GridSumSqr(mResidual).sum;
}
else {
+ // std::cout << "NOT USING L2" << std::endl;
mResNorm = mResidual.getMaxAbs();
}
@@ -539,27 +567,43 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
return false;
}
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "GridSumSqr Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
+
Real sigmaNew = GridDotProduct(mTmp, mResidual);
Real beta = sigmaNew / mSigma;
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl;
+
+ start = high_resolution_clock::now();
+
// search = tmp + beta * search
UpdateSearchVec(mSearch, mTmp, beta);
- debMsg("GridCg::iterate i=" << mIterations << " sigmaNew=" << sigmaNew << " sigmaLast=" << mSigma
- << " alpha=" << alpha << " beta=" << beta << " ",
- CG_DEBUGLEVEL);
+ stop = high_resolution_clock::now();
+ duration = duration_cast<microseconds>(stop - start);
+ time += duration.count();
+ // std::cout << "UpdateSearchVec Time taken: " << duration.count() << std::endl;
+
+ // debMsg("GridCg::iterate i="<<mIterations<<" sigmaNew="<<sigmaNew<<" sigmaLast="<<mSigma<<"
+ // alpha="<<alpha<<" beta="<<beta<<" ", CG_DEBUGLEVEL);
mSigma = sigmaNew;
if (!(mResNorm < 1e35)) {
if (mPcMethod == PC_MGP) {
// diverging solves can be caused by the static multigrid mode, we cannot detect this here,
// though only the pressure solve call "knows" whether the MG is static or dynamics...
- debMsg(
- "GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode of "
- "the MG preconditioner. If the static mode is active, try switching to dynamic.",
- 1);
+ // debMsg("GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode
+ // of the MG preconditioner. If the static mode is active, try switching to dynamic.", 1);
}
- errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping.");
+ // errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping.");
}
// debMsg("PB-CG-Norms::p"<<sqrt( GridOpNormNosqrt(mpDst, mpFlags).getValue() ) <<"
@@ -571,8 +615,9 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
template<class APPLYMAT> void GridCg<APPLYMAT>::solve(int maxIter)
{
+ Real time = 0;
for (int iter = 0; iter < maxIter; iter++) {
- if (!iterate())
+ if (!iterate(time))
iter = maxIter;
}
return;
@@ -583,13 +628,13 @@ template<class APPLYMAT>
void GridCg<APPLYMAT>::setICPreconditioner(
PreconditionType method, Grid<Real> *A0, Grid<Real> *Ai, Grid<Real> *Aj, Grid<Real> *Ak)
{
- assertMsg(method == PC_ICP || method == PC_mICP,
- "GridCg<APPLYMAT>::setICPreconditioner: Invalid method specified.");
+ // assertMsg(method==PC_ICP || method==PC_mICP, "GridCg<APPLYMAT>::setICPreconditioner: Invalid
+ // method specified.");
mPcMethod = method;
if ((!A0->is3D())) {
if (gPrint2dWarning) {
- debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1);
+ // debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1);
gPrint2dWarning = false;
}
mPcMethod = PC_None;
@@ -603,7 +648,7 @@ void GridCg<APPLYMAT>::setICPreconditioner(
template<class APPLYMAT>
void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG)
{
- assertMsg(method == PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified.");
+ // assertMsg(method==PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified.");
mPcMethod = method;
mMG = MG;
@@ -612,9 +657,6 @@ void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG)
// explicit instantiation
template class GridCg<ApplyMatrix>;
template class GridCg<ApplyMatrix2D>;
-template class GridCg<ApplyMatrixViscosityU>;
-template class GridCg<ApplyMatrixViscosityV>;
-template class GridCg<ApplyMatrixViscosityW>;
//*****************************************************************************
// diffusion for real and vec grids, e.g. for viscosity
@@ -655,44 +697,33 @@ void cgSolveDiffusion(const FlagGrid &flags,
}
}
- GridCgInterface *gcg;
+ GridCgInterface *gcg = nullptr;
// note , no preconditioning for now...
const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4);
if (grid.getType() & GridBase::TypeReal) {
Grid<Real> &u = ((Grid<Real> &)grid);
rhs.copyFrom(u);
- vector<Grid<Real> *> matA{&A0, &Ai, &Aj};
-
- if (flags.is3D()) {
- matA.push_back(&Ak);
- gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA);
- }
- else {
- gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA);
- }
+ if (flags.is3D())
+ gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
+ else
+ gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg->setAccuracy(cgAccuracy);
gcg->solve(maxIter);
- debMsg("FluidSolver::solveDiffusion iterations:" << gcg->getIterations()
- << ", res:" << gcg->getSigma(),
- CG_DEBUGLEVEL);
+ // debMsg("FluidSolver::solveDiffusion iterations:"<<gcg->getIterations()<<",
+ // res:"<<gcg->getSigma(), CG_DEBUGLEVEL);
}
else if ((grid.getType() & GridBase::TypeVec3) || (grid.getType() & GridBase::TypeMAC)) {
Grid<Vec3> &vec = ((Grid<Vec3> &)grid);
Grid<Real> u(parent);
- vector<Grid<Real> *> matA{&A0, &Ai, &Aj};
// core solve is same as for a regular real grid
- if (flags.is3D()) {
- matA.push_back(&Ak);
- gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA);
- }
- else {
- gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA);
- }
-
+ if (flags.is3D())
+ gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
+ else
+ gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg->setAccuracy(cgAccuracy);
// diffuse every component separately
@@ -702,15 +733,14 @@ void cgSolveDiffusion(const FlagGrid &flags,
rhs.copyFrom(u);
gcg->solve(maxIter);
- debMsg("FluidSolver::solveDiffusion vec3, iterations:" << gcg->getIterations()
- << ", res:" << gcg->getSigma(),
- CG_DEBUGLEVEL);
+ // debMsg("FluidSolver::solveDiffusion vec3, iterations:"<<gcg->getIterations()<<",
+ // res:"<<gcg->getSigma(), CG_DEBUGLEVEL);
setComponent(u, vec, component);
}
}
else {
- errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)");
+ // errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)");
}
delete gcg;
diff --git a/extern/mantaflow/preprocessed/conjugategrad.h b/extern/mantaflow/preprocessed/conjugategrad.h
index 35cb3960656..189f85de689 100644
--- a/extern/mantaflow/preprocessed/conjugategrad.h
+++ b/extern/mantaflow/preprocessed/conjugategrad.h
@@ -37,7 +37,7 @@ class GridCgInterface {
virtual ~GridCgInterface(){};
// solving functions
- virtual bool iterate() = 0;
+ virtual bool iterate(Real &time) = 0;
virtual void solve(int maxIter) = 0;
// precond
@@ -78,12 +78,16 @@ template<class APPLYMAT> class GridCg : public GridCgInterface {
Grid<Real> &search,
const FlagGrid &flags,
Grid<Real> &tmp,
- std::vector<Grid<Real> *> matrixAVec,
- std::vector<Grid<Real> *> rhsVec = {});
- ~GridCg(){};
+ Grid<Real> *A0,
+ Grid<Real> *pAi,
+ Grid<Real> *pAj,
+ Grid<Real> *pAk);
+ ~GridCg()
+ {
+ }
void doInit();
- bool iterate();
+ bool iterate(Real &time);
void solve(int maxIter);
//! init pointers, and copy values from "normal" matrix
void setICPreconditioner(
@@ -129,10 +133,7 @@ template<class APPLYMAT> class GridCg : public GridCgInterface {
const FlagGrid &mFlags;
Grid<Real> &mTmp;
- //! shape of A matrix defined here (e.g. diagonal, positive neighbor cells, etc)
- std::vector<Grid<Real> *> mMatrixA;
- //! shape of rhs vector defined here (e.g. 1 rhs for regular fluids solve, 3 rhs for viscosity)
- std::vector<Grid<Real> *> mVecRhs;
+ Grid<Real> *mpA0, *mpAi, *mpAj, *mpAk;
PreconditionType mPcMethod;
//! preconditioning grids
@@ -153,33 +154,32 @@ struct ApplyMatrix : public KernelBase {
ApplyMatrix(const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs)
- : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
+ Grid<Real> &A0,
+ Grid<Real> &Ai,
+ Grid<Real> &Aj,
+ Grid<Real> &Ak)
+ : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak)
{
runMessage();
run();
}
- inline void op(IndexInt idx,
+ inline void op(int i,
+ int j,
+ int k,
const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs) const
+ Grid<Real> &A0,
+ Grid<Real> &Ai,
+ Grid<Real> &Aj,
+ Grid<Real> &Ak)
{
- unusedParameter(vecRhs); // Not needed in this matrix application
-
- if (matrixA.size() != 4)
- errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
- Grid<Real> &A0 = *matrixA[0];
- Grid<Real> &Ai = *matrixA[1];
- Grid<Real> &Aj = *matrixA[2];
- Grid<Real> &Ak = *matrixA[3];
-
+ const IndexInt idx = dst.index(i, j, k);
if (!flags.isFluid(idx)) {
dst[idx] = src[idx];
return;
}
+ const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] +
src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx] + src[idx - Z] * Ak[idx - Z] +
@@ -200,37 +200,71 @@ struct ApplyMatrix : public KernelBase {
return src;
}
typedef Grid<Real> type2;
- inline const std::vector<Grid<Real> *> &getArg3()
+ inline Grid<Real> &getArg3()
{
- return matrixA;
+ return A0;
}
- typedef std::vector<Grid<Real> *> type3;
- inline const std::vector<Grid<Real> *> &getArg4()
+ typedef Grid<Real> type3;
+ inline Grid<Real> &getArg4()
{
- return vecRhs;
+ return Ai;
}
- typedef std::vector<Grid<Real> *> type4;
- void runMessage()
+ typedef Grid<Real> type4;
+ inline Grid<Real> &getArg5()
{
- debMsg("Executing kernel ApplyMatrix ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ return Aj;
+ }
+ typedef Grid<Real> type5;
+ inline Grid<Real> &getArg6()
{
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, dst, src, matrixA, vecRhs);
+ return Ak;
}
+ typedef Grid<Real> type6;
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &dst = getArg1();
+ const Grid<Real> &src = getArg2();
+ Grid<Real> &A0 = getArg3();
+ Grid<Real> &Ai = getArg4();
+ Grid<Real> &Aj = getArg5();
+ Grid<Real> &Ak = getArg6();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
+ }
+ }
+ else {
+ const int k = 0;
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &dst = getArg1();
+ const Grid<Real> &src = getArg2();
+ Grid<Real> &A0 = getArg3();
+ Grid<Real> &Ai = getArg4();
+ Grid<Real> &Aj = getArg5();
+ Grid<Real> &Ak = getArg6();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
+ }
+ }
}
const FlagGrid &flags;
Grid<Real> &dst;
const Grid<Real> &src;
- const std::vector<Grid<Real> *> matrixA;
- const std::vector<Grid<Real> *> vecRhs;
+ Grid<Real> &A0;
+ Grid<Real> &Ai;
+ Grid<Real> &Aj;
+ Grid<Real> &Ak;
};
//! Kernel: Apply symmetric stored Matrix. 2D version
@@ -239,32 +273,34 @@ struct ApplyMatrix2D : public KernelBase {
ApplyMatrix2D(const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs)
- : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
+ Grid<Real> &A0,
+ Grid<Real> &Ai,
+ Grid<Real> &Aj,
+ Grid<Real> &Ak)
+ : KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak)
{
runMessage();
run();
}
- inline void op(IndexInt idx,
+ inline void op(int i,
+ int j,
+ int k,
const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs) const
+ Grid<Real> &A0,
+ Grid<Real> &Ai,
+ Grid<Real> &Aj,
+ Grid<Real> &Ak)
{
- unusedParameter(vecRhs); // Not needed in this matrix application
-
- if (matrixA.size() != 3)
- errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
- Grid<Real> &A0 = *matrixA[0];
- Grid<Real> &Ai = *matrixA[1];
- Grid<Real> &Aj = *matrixA[2];
+ unusedParameter(Ak); // only there for parameter compatibility with ApplyMatrix
+ const IndexInt idx = dst.index(i, j, k);
if (!flags.isFluid(idx)) {
dst[idx] = src[idx];
return;
}
+ const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] +
src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx];
@@ -284,387 +320,73 @@ struct ApplyMatrix2D : public KernelBase {
return src;
}
typedef Grid<Real> type2;
- inline const std::vector<Grid<Real> *> &getArg3()
- {
- return matrixA;
- }
- typedef std::vector<Grid<Real> *> type3;
- inline const std::vector<Grid<Real> *> &getArg4()
- {
- return vecRhs;
- }
- typedef std::vector<Grid<Real> *> type4;
- void runMessage()
- {
- debMsg("Executing kernel ApplyMatrix2D ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, dst, src, matrixA, vecRhs);
- }
- void run()
- {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- const FlagGrid &flags;
- Grid<Real> &dst;
- const Grid<Real> &src;
- const std::vector<Grid<Real> *> matrixA;
- const std::vector<Grid<Real> *> vecRhs;
-};
-
-struct ApplyMatrixViscosityU : public KernelBase {
- ApplyMatrixViscosityU(const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs)
- : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs) const
- {
- if (matrixA.size() != 15)
- errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
- Grid<Real> &A0 = *matrixA[0];
- Grid<Real> &Aplusi = *matrixA[1];
- Grid<Real> &Aplusj = *matrixA[2];
- Grid<Real> &Aplusk = *matrixA[3];
- Grid<Real> &Aminusi = *matrixA[4];
- Grid<Real> &Aminusj = *matrixA[5];
- Grid<Real> &Aminusk = *matrixA[6];
-
- if (vecRhs.size() != 2)
- errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
- Grid<Real> &srcV = *vecRhs[0];
- Grid<Real> &srcW = *vecRhs[1];
-
- dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
- src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
- src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
- src(i, j, k - 1) * Aminusk(i, j, k);
-
- dst(i, j, k) += srcV(i, j + 1, k) * (*matrixA[7])(i, j, k) +
- srcV(i - 1, j + 1, k) * (*matrixA[8])(i, j, k) +
- srcV(i, j, k) * (*matrixA[9])(i, j, k) +
- srcV(i - 1, j, k) * (*matrixA[10])(i, j, k) +
- srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) +
- srcW(i - 1, j, k + 1) * (*matrixA[12])(i, j, k) +
- srcW(i, j, k) * (*matrixA[13])(i, j, k) +
- srcW(i - 1, j, k) * (*matrixA[14])(i, j, k);
- }
- inline const FlagGrid &getArg0()
- {
- return flags;
- }
- typedef FlagGrid type0;
- inline Grid<Real> &getArg1()
- {
- return dst;
- }
- typedef Grid<Real> type1;
- inline const Grid<Real> &getArg2()
- {
- return src;
- }
- typedef Grid<Real> type2;
- inline const std::vector<Grid<Real> *> &getArg3()
- {
- return matrixA;
- }
- typedef std::vector<Grid<Real> *> type3;
- inline const std::vector<Grid<Real> *> &getArg4()
- {
- return vecRhs;
- }
- typedef std::vector<Grid<Real> *> type4;
- void runMessage()
- {
- debMsg("Executing kernel ApplyMatrixViscosityU ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
- const FlagGrid &flags;
- Grid<Real> &dst;
- const Grid<Real> &src;
- const std::vector<Grid<Real> *> matrixA;
- const std::vector<Grid<Real> *> vecRhs;
-};
-
-struct ApplyMatrixViscosityV : public KernelBase {
- ApplyMatrixViscosityV(const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs)
- : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs) const
- {
- if (matrixA.size() != 15)
- errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
- Grid<Real> &A0 = *matrixA[0];
- Grid<Real> &Aplusi = *matrixA[1];
- Grid<Real> &Aplusj = *matrixA[2];
- Grid<Real> &Aplusk = *matrixA[3];
- Grid<Real> &Aminusi = *matrixA[4];
- Grid<Real> &Aminusj = *matrixA[5];
- Grid<Real> &Aminusk = *matrixA[6];
-
- if (vecRhs.size() != 2)
- errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
- Grid<Real> &srcU = *vecRhs[0];
- Grid<Real> &srcW = *vecRhs[1];
-
- dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
- src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
- src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
- src(i, j, k - 1) * Aminusk(i, j, k);
-
- dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) +
- srcU(i + 1, j - 1, k) * (*matrixA[8])(i, j, k) +
- srcU(i, j, k) * (*matrixA[9])(i, j, k) +
- srcU(i, j - 1, k) * (*matrixA[10])(i, j, k) +
- srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) +
- srcW(i, j - 1, k + 1) * (*matrixA[12])(i, j, k) +
- srcW(i, j, k) * (*matrixA[13])(i, j, k) +
- srcW(i, j - 1, k) * (*matrixA[14])(i, j, k);
- }
- inline const FlagGrid &getArg0()
- {
- return flags;
- }
- typedef FlagGrid type0;
- inline Grid<Real> &getArg1()
- {
- return dst;
- }
- typedef Grid<Real> type1;
- inline const Grid<Real> &getArg2()
+ inline Grid<Real> &getArg3()
{
- return src;
+ return A0;
}
- typedef Grid<Real> type2;
- inline const std::vector<Grid<Real> *> &getArg3()
+ typedef Grid<Real> type3;
+ inline Grid<Real> &getArg4()
{
- return matrixA;
+ return Ai;
}
- typedef std::vector<Grid<Real> *> type3;
- inline const std::vector<Grid<Real> *> &getArg4()
+ typedef Grid<Real> type4;
+ inline Grid<Real> &getArg5()
{
- return vecRhs;
+ return Aj;
}
- typedef std::vector<Grid<Real> *> type4;
- void runMessage()
+ typedef Grid<Real> type5;
+ inline Grid<Real> &getArg6()
{
- debMsg("Executing kernel ApplyMatrixViscosityV ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
- }
+ return Ak;
}
+ typedef Grid<Real> type6;
+ void runMessage(){};
void run()
{
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
- const FlagGrid &flags;
- Grid<Real> &dst;
- const Grid<Real> &src;
- const std::vector<Grid<Real> *> matrixA;
- const std::vector<Grid<Real> *> vecRhs;
-};
-
-struct ApplyMatrixViscosityW : public KernelBase {
- ApplyMatrixViscosityW(const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs)
- : KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- const FlagGrid &flags,
- Grid<Real> &dst,
- const Grid<Real> &src,
- const std::vector<Grid<Real> *> matrixA,
- const std::vector<Grid<Real> *> vecRhs) const
- {
- if (matrixA.size() != 15)
- errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
- Grid<Real> &A0 = *matrixA[0];
- Grid<Real> &Aplusi = *matrixA[1];
- Grid<Real> &Aplusj = *matrixA[2];
- Grid<Real> &Aplusk = *matrixA[3];
- Grid<Real> &Aminusi = *matrixA[4];
- Grid<Real> &Aminusj = *matrixA[5];
- Grid<Real> &Aminusk = *matrixA[6];
-
- if (vecRhs.size() != 2)
- errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
- Grid<Real> &srcU = *vecRhs[0];
- Grid<Real> &srcV = *vecRhs[1];
-
- dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
- src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
- src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
- src(i, j, k - 1) * Aminusk(i, j, k);
-
- dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) +
- srcU(i + 1, j, k - 1) * (*matrixA[8])(i, j, k) +
- srcU(i, j, k) * (*matrixA[9])(i, j, k) +
- srcU(i, j, k - 1) * (*matrixA[10])(i, j, k) +
- srcV(i, j + 1, k) * (*matrixA[11])(i, j, k) +
- srcV(i, j + 1, k - 1) * (*matrixA[12])(i, j, k) +
- srcV(i, j, k) * (*matrixA[13])(i, j, k) +
- srcV(i, j, k - 1) * (*matrixA[14])(i, j, k);
- }
- inline const FlagGrid &getArg0()
- {
- return flags;
- }
- typedef FlagGrid type0;
- inline Grid<Real> &getArg1()
- {
- return dst;
- }
- typedef Grid<Real> type1;
- inline const Grid<Real> &getArg2()
- {
- return src;
- }
- typedef Grid<Real> type2;
- inline const std::vector<Grid<Real> *> &getArg3()
- {
- return matrixA;
- }
- typedef std::vector<Grid<Real> *> type3;
- inline const std::vector<Grid<Real> *> &getArg4()
- {
- return vecRhs;
- }
- typedef std::vector<Grid<Real> *> type4;
- void runMessage()
- {
- debMsg("Executing kernel ApplyMatrixViscosityW ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &dst = getArg1();
+ const Grid<Real> &src = getArg2();
+ Grid<Real> &A0 = getArg3();
+ Grid<Real> &Ai = getArg4();
+ Grid<Real> &Aj = getArg5();
+ Grid<Real> &Ak = getArg6();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, dst, src, matrixA, vecRhs);
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &dst = getArg1();
+ const Grid<Real> &src = getArg2();
+ Grid<Real> &A0 = getArg3();
+ Grid<Real> &Ai = getArg4();
+ Grid<Real> &Aj = getArg5();
+ Grid<Real> &Ak = getArg6();
+#pragma omp target teams distribute parallel for collapse(1) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &dst;
const Grid<Real> &src;
- const std::vector<Grid<Real> *> matrixA;
- const std::vector<Grid<Real> *> vecRhs;
+ Grid<Real> &A0;
+ Grid<Real> &Ai;
+ Grid<Real> &Aj;
+ Grid<Real> &Ak;
};
-/* NOTE: Use this template for new matrix application kernels
-
-//! Template for matrix application kernels
-KERNEL()
-void ApplyMatrixTemplate (const FlagGrid& flags, Grid<Real>& dst, const Grid<Real>& src,
- const std::vector<Grid<Real> *> matrixA, const std::vector<Grid<Real> *> vecRhs)
-{
- // The kernel must define how to use the grids from the matrixA and vecRhs lists
-}
-
-*/
-
//! Kernel: Construct the matrix for the poisson equation
struct MakeLaplaceMatrix : public KernelBase {
@@ -687,7 +409,7 @@ struct MakeLaplaceMatrix : public KernelBase {
Grid<Real> &Ai,
Grid<Real> &Aj,
Grid<Real> &Ak,
- const MACGrid *fractions = 0) const
+ const MACGrid *fractions = 0)
{
if (!flags.isFluid(i, j, k))
return;
@@ -765,37 +487,42 @@ struct MakeLaplaceMatrix : public KernelBase {
return fractions;
}
typedef MACGrid type5;
- void runMessage()
- {
- debMsg("Executing kernel MakeLaplaceMatrix ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &A0 = getArg1();
+ Grid<Real> &Ai = getArg2();
+ Grid<Real> &Aj = getArg3();
+ Grid<Real> &Ak = getArg4();
+ const MACGrid *fractions = getArg5();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &A0 = getArg1();
+ Grid<Real> &Ai = getArg2();
+ Grid<Real> &Aj = getArg3();
+ Grid<Real> &Ak = getArg4();
+ const MACGrid *fractions = getArg5();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &A0;
Grid<Real> &Ai;
diff --git a/extern/mantaflow/preprocessed/fastmarch.cpp b/extern/mantaflow/preprocessed/fastmarch.cpp
index 31e43483b49..5499ec9cbdf 100644
--- a/extern/mantaflow/preprocessed/fastmarch.cpp
+++ b/extern/mantaflow/preprocessed/fastmarch.cpp
@@ -236,13 +236,7 @@ struct SetLevelsetBoundaries : public KernelBase {
return phi;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel SetLevelsetBoundaries ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const int _maxX = maxX;
@@ -297,14 +291,8 @@ struct knExtrapolateMACSimple : public KernelBase {
runMessage();
run();
}
- inline void op(int i,
- int j,
- int k,
- MACGrid &vel,
- int distance,
- Grid<int> &tmp,
- const int d,
- const int c) const
+ inline void op(
+ int i, int j, int k, MACGrid &vel, int distance, Grid<int> &tmp, const int d, const int c)
{
static const Vec3i nb[6] = {Vec3i(1, 0, 0),
Vec3i(-1, 0, 0),
@@ -359,37 +347,35 @@ struct knExtrapolateMACSimple : public KernelBase {
return c;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel knExtrapolateMACSimple ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, distance, tmp, d, c);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, distance, tmp, d, c);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, distance, tmp, d, c);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, distance, tmp, d, c);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &vel;
int distance;
Grid<int> &tmp;
@@ -405,7 +391,7 @@ struct knExtrapolateIntoBnd : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp) const
+ inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp)
{
int c = 0;
Vec3 v(0, 0, 0);
@@ -467,37 +453,35 @@ struct knExtrapolateIntoBnd : public KernelBase {
return velTmp;
}
typedef MACGrid type2;
- void runMessage()
- {
- debMsg("Executing kernel knExtrapolateIntoBnd ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velTmp);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velTmp);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velTmp);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velTmp);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
FlagGrid &flags;
MACGrid &vel;
const MACGrid &velTmp;
@@ -538,8 +522,7 @@ struct knUnprojectNormalComp : public KernelBase {
runMessage();
run();
}
- inline void op(
- int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist) const
+ inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist)
{
// apply inside, within range near obstacle surface
if (phi(i, j, k) > 0. || phi(i, j, k) < -maxDist)
@@ -573,37 +556,35 @@ struct knUnprojectNormalComp : public KernelBase {
return maxDist;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel knUnprojectNormalComp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, phi, maxDist);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, phi, maxDist);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, phi, maxDist);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, phi, maxDist);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
FlagGrid &flags;
MACGrid &vel;
Grid<Real> &phi;
@@ -712,7 +693,7 @@ struct knExtrapolateMACFromWeight : public KernelBase {
Grid<Vec3> &weight,
int distance,
const int d,
- const int c) const
+ const int c)
{
static const Vec3i nb[6] = {Vec3i(1, 0, 0),
Vec3i(-1, 0, 0),
@@ -766,37 +747,35 @@ struct knExtrapolateMACFromWeight : public KernelBase {
return c;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel knExtrapolateMACFromWeight ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, weight, distance, d, c);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, weight, distance, d, c);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, weight, distance, d, c);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, weight, distance, d, c);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &vel;
Grid<Vec3> &weight;
int distance;
@@ -1011,14 +990,8 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase {
runMessage();
run();
}
- inline void op(int i,
- int j,
- int k,
- Grid<S> &val,
- int distance,
- Grid<int> &tmp,
- const int d,
- S direction) const
+ inline void op(
+ int i, int j, int k, Grid<S> &val, int distance, Grid<int> &tmp, const int d, S direction)
{
const int dim = (val.is3D() ? 3 : 2);
if (tmp(i, j, k) != 0)
@@ -1065,37 +1038,35 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase {
return direction;
}
typedef S type4;
- void runMessage()
- {
- debMsg("Executing kernel knExtrapolateLsSimple ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, val, distance, tmp, d, direction);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, val, distance, tmp, d, direction);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, val, distance, tmp, d, direction);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, val, distance, tmp, d, direction);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<S> &val;
int distance;
Grid<int> &tmp;
@@ -1110,7 +1081,7 @@ template<class S> struct knSetRemaining : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance) const
+ inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance)
{
if (tmp(i, j, k) != 0)
return;
@@ -1131,37 +1102,35 @@ template<class S> struct knSetRemaining : public KernelBase {
return distance;
}
typedef S type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetRemaining ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, phi, tmp, distance);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, phi, tmp, distance);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, phi, tmp, distance);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, phi, tmp, distance);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<S> &phi;
Grid<int> &tmp;
S distance;
diff --git a/extern/mantaflow/preprocessed/fileio/iogrids.cpp b/extern/mantaflow/preprocessed/fileio/iogrids.cpp
index d138cd2925f..77e3e89ed26 100644
--- a/extern/mantaflow/preprocessed/fileio/iogrids.cpp
+++ b/extern/mantaflow/preprocessed/fileio/iogrids.cpp
@@ -1256,7 +1256,7 @@ struct knQuantize : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Real> &grid, Real step) const
+ inline void op(IndexInt idx, Grid<Real> &grid, Real step)
{
quantizeReal(grid(idx), step);
}
@@ -1270,21 +1270,17 @@ struct knQuantize : public KernelBase {
return step;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knQuantize ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, step);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, grid, step);
+ }
}
Grid<Real> &grid;
Real step;
@@ -1331,7 +1327,7 @@ struct knQuantizeVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Vec3> &grid, Real step) const
+ inline void op(IndexInt idx, Grid<Vec3> &grid, Real step)
{
for (int c = 0; c < 3; ++c)
quantizeReal(grid(idx)[c], step);
@@ -1346,21 +1342,17 @@ struct knQuantizeVec3 : public KernelBase {
return step;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knQuantizeVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, step);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, grid, step);
+ }
}
Grid<Vec3> &grid;
Real step;
diff --git a/extern/mantaflow/preprocessed/fileio/ioutil.cpp b/extern/mantaflow/preprocessed/fileio/ioutil.cpp
index 2bac34c2a65..409760b0a0f 100644
--- a/extern/mantaflow/preprocessed/fileio/ioutil.cpp
+++ b/extern/mantaflow/preprocessed/fileio/ioutil.cpp
@@ -66,7 +66,7 @@ void *safeGzopen(const char *filename, const char *mode)
#endif // NO_ZLIB != 1
}
-#if defined(OPENVDB)
+#if OPENVDB == 1
// Convert from OpenVDB value to Manta value.
template<class S, class T> void convertFrom(S &in, T *out)
{
diff --git a/extern/mantaflow/preprocessed/fileio/iovdb.cpp b/extern/mantaflow/preprocessed/fileio/iovdb.cpp
index 1846ef7ecbb..815bfa19aaa 100644
--- a/extern/mantaflow/preprocessed/fileio/iovdb.cpp
+++ b/extern/mantaflow/preprocessed/fileio/iovdb.cpp
@@ -29,10 +29,10 @@
#if OPENVDB == 1
# include "openvdb/openvdb.h"
-# include "openvdb/points/PointConversion.h"
-# include "openvdb/points/PointCount.h"
-# include "openvdb/tools/Clip.h"
-# include "openvdb/tools/Dense.h"
+# include <openvdb/points/PointConversion.h>
+# include <openvdb/points/PointCount.h>
+# include <openvdb/tools/Clip.h>
+# include <openvdb/tools/Dense.h>
#endif
#define POSITION_NAME "P"
@@ -433,9 +433,9 @@ int writeObjectsVDB(const string &filename,
debMsg("Writing int grid '" << mantaGrid->getName() << "' to vdb file " << filename, 1);
Grid<int> *mantaIntGrid = (Grid<int> *)mantaGrid;
if (clipGrid && mantaIntGrid->saveSparse()) {
- assertMsg(clipGrid->getSize() == mantaGrid->getSize(),
- "writeObjectsVDB: Clip grid and exported grid must have the same size "
- << clipGrid->getSize() << " vs " << mantaGrid->getSize());
+ // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
+ // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
+ // mantaGrid->getSize());
}
vdbGrid = exportVDB<int, openvdb::Int32Grid>(mantaIntGrid, clip, vdbClipGrid);
gridsVDB.push_back(vdbGrid);
@@ -448,9 +448,9 @@ int writeObjectsVDB(const string &filename,
// Only supply clip grid if real grid is not equal to the clip grid
openvdb::FloatGrid::Ptr tmpClipGrid = (mantaRealGrid == clipGrid) ? nullptr : vdbClipGrid;
if (clipGrid && mantaRealGrid->saveSparse()) {
- assertMsg(clipGrid->getSize() == mantaGrid->getSize(),
- "writeObjectsVDB: Clip grid and exported grid must have the same size "
- << clipGrid->getSize() << " vs " << mantaGrid->getSize());
+ // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
+ // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
+ // mantaGrid->getSize());
}
vdbGrid = exportVDB<Real, openvdb::FloatGrid>(mantaRealGrid, clip, tmpClipGrid);
gridsVDB.push_back(vdbGrid);
@@ -461,9 +461,9 @@ int writeObjectsVDB(const string &filename,
openvdb::GRID_UNKNOWN;
Grid<Vec3> *mantaVec3Grid = (Grid<Vec3> *)mantaGrid;
if (clipGrid && mantaVec3Grid->saveSparse()) {
- assertMsg(clipGrid->getSize() == mantaGrid->getSize(),
- "writeObjectsVDB: Clip grid and exported grid must have the same size "
- << clipGrid->getSize() << " vs " << mantaGrid->getSize());
+ // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
+ // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
+ // mantaGrid->getSize());
}
vdbGrid = exportVDB<Vec3, openvdb::Vec3SGrid>(mantaVec3Grid, clip, vdbClipGrid);
gridsVDB.push_back(vdbGrid);
@@ -519,7 +519,7 @@ int writeObjectsVDB(const string &filename,
}
}
- // Write only if there is at least one grid, optionally write with compression.
+ // Write only if the is at least one grid, optionally write with compression.
if (gridsVDB.size()) {
int vdb_flags = openvdb::io::COMPRESS_ACTIVE_MASK;
switch (compression) {
@@ -534,8 +534,7 @@ int writeObjectsVDB(const string &filename,
}
case COMPRESSION_BLOSC: {
# if OPENVDB_BLOSC == 1
- // Cannot use |= here, causes segfault with blosc 1.5.0 (== recommended version)
- vdb_flags = openvdb::io::COMPRESS_BLOSC;
+ vdb_flags |= openvdb::io::COMPRESS_BLOSC;
# else
debMsg("OpenVDB was built without Blosc support, using Zip compression instead", 1);
vdb_flags |= openvdb::io::COMPRESS_ZIP;
@@ -696,36 +695,28 @@ int readObjectsVDB(const string &filename, std::vector<PbClass *> *objects, floa
// Compare metadata with allocated grid setup. This prevents invalid index access.
if (notZero(metaRes) && metaRes != origRes) {
- debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName()
- << "' has not been read. Meta grid res " << metaRes
- << " vs " << origRes << " current grid size",
- 1);
+ // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
+ // Meta grid res " << metaRes << " vs " << origRes << " current grid size", 1);
readFailure++;
break;
}
if (notZero(metaVoxelSize) && metaVoxelSize != voxelSize) {
- debMsg("readObjectsVDB Warning: Grid '"
- << vdbGrid->getName() << "' has not been read. Meta voxel size "
- << metaVoxelSize << " vs " << voxelSize << " current voxel size",
- 1);
+ // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
+ // Meta voxel size " << metaVoxelSize << " vs " << voxelSize << " current voxel size", 1);
readFailure++;
break;
}
if (metaBBoxMax.x > origRes.x || metaBBoxMax.y > origRes.y || metaBBoxMax.z > origRes.z) {
- debMsg("readObjectsVDB Warning: Grid '"
- << vdbGrid->getName() << "' has not been read. Vdb bbox max " << metaBBoxMax
- << " vs " << origRes << " current grid size",
- 1);
+ // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
+ // Vdb bbox max " << metaBBoxMax << " vs " << origRes << " current grid size", 1);
readFailure++;
break;
}
const Vec3i origOrigin(0);
if (metaBBoxMin.x < origOrigin.x || metaBBoxMin.y < origOrigin.y ||
metaBBoxMin.z < origOrigin.z) {
- debMsg("readObjectsVDB Warning: Grid '"
- << vdbGrid->getName() << "' has not been read. Vdb bbox min " << metaBBoxMin
- << " vs " << origOrigin << " current grid origin",
- 1);
+ // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
+ // Vdb bbox min " << metaBBoxMin << " vs " << origOrigin << " current grid origin", 1);
readFailure++;
break;
}
diff --git a/extern/mantaflow/preprocessed/fluidsolver.h b/extern/mantaflow/preprocessed/fluidsolver.h
index 6770f8b7b05..0c871bca3a1 100644
--- a/extern/mantaflow/preprocessed/fluidsolver.h
+++ b/extern/mantaflow/preprocessed/fluidsolver.h
@@ -384,7 +384,6 @@ class FluidSolver : public PbClass {
GridStorage<Real> mGrids4dReal;
GridStorage<Vec3> mGrids4dVec;
GridStorage<Vec4> mGrids4dVec4;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/general.h b/extern/mantaflow/preprocessed/general.h
index 8bf1c2e25de..7a840517cef 100644
--- a/extern/mantaflow/preprocessed/general.h
+++ b/extern/mantaflow/preprocessed/general.h
@@ -242,39 +242,6 @@ inline bool c_isnan(float c)
return d != d;
}
-//! Swap so that a<b
-template<class T> inline void sort(T &a, T &b)
-{
- if (a > b)
- std::swap(a, b);
-}
-
-//! Swap so that a<b<c
-template<class T> inline void sort(T &a, T &b, T &c)
-{
- if (a > b)
- std::swap(a, b);
- if (a > c)
- std::swap(a, c);
- if (b > c)
- std::swap(b, c);
-}
-
-//! Swap so that a<b<c<d
-template<class T> inline void sort(T &a, T &b, T &c, T &d)
-{
- if (a > b)
- std::swap(a, b);
- if (c > d)
- std::swap(c, d);
- if (a > c)
- std::swap(a, c);
- if (b > d)
- std::swap(b, d);
- if (b > c)
- std::swap(b, c);
-}
-
} // namespace Manta
#endif
diff --git a/extern/mantaflow/preprocessed/gitinfo.h b/extern/mantaflow/preprocessed/gitinfo.h
index 6d367b764af..206d13bdf00 100644
--- a/extern/mantaflow/preprocessed/gitinfo.h
+++ b/extern/mantaflow/preprocessed/gitinfo.h
@@ -1,3 +1,3 @@
-#define MANTA_GIT_VERSION "commit d5d9a6c28daa8f21426d7a285f48639c0d8fd13f"
+#define MANTA_GIT_VERSION "commit 39b7a415721ecbf6643612a24e8eadd221aeb934" \ No newline at end of file
diff --git a/extern/mantaflow/preprocessed/grid.cpp b/extern/mantaflow/preprocessed/grid.cpp
index e0ea3830fae..cda3c5b213d 100644
--- a/extern/mantaflow/preprocessed/grid.cpp
+++ b/extern/mantaflow/preprocessed/grid.cpp
@@ -60,7 +60,8 @@ template<> inline GridBase::GridType typeList<Vec3>()
}
template<class T>
-Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse) : GridBase(parent), mExternalData(false)
+Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse, bool offload)
+ : GridBase(parent), mExternalData(false)
{
mType = typeList<T>();
mSize = parent->getGridSize();
@@ -78,6 +79,34 @@ Grid<T>::Grid(FluidSolver *parent, bool show, bool sparse) : GridBase(parent), m
debMsg("Cannot enable sparse save option without OpenVDB", 1);
mSaveSparse = false;
#endif
+
+#if OPENMP && OPENMP_OFFLOAD
+ if (offload) {
+ Grid<T> *grid = this;
+ printf("OMP Allocating %s (%d,%d,%d)\n",
+ this->getName().c_str(),
+ this->getSizeX(),
+ this->getSizeY(),
+ this->getSizeZ());
+ int size = this->getSizeX() * this->getSizeY() * this->getSizeZ();
+
+ printf("Size OMP is %d\n", size);
+# pragma omp target enter data map(to \
+ : grid, \
+ grid->mSize, \
+ grid->mStrideZ, \
+ grid->mType, \
+ grid->mDx, \
+ grid->m3D, \
+ grid->mData [0:size])
+ //#pragma omp target enter data map(alloc: grid, grid->mData[0:size])
+ }
+ mOffload = offload;
+#else
+ if (offload)
+ debMsg("Cannot offload grid without OpenMP and OpenMP offloading support", 2);
+ mOffload = false;
+#endif
}
template<class T>
@@ -99,14 +128,119 @@ template<class T> Grid<T>::Grid(const Grid<T> &a) : GridBase(a.getParent()), mEx
template<class T> Grid<T>::~Grid()
{
+#if OPENMP && OPENMP_OFFLOAD
+ if (mOffload) {
+ this->mapDeleteOmp();
+ }
+#endif
if (!mExternalData) {
mParent->freeGridPointer<T>(mData);
}
}
-template<class T> void Grid<T>::clear()
+template<class T> struct knGridClear : public KernelBase {
+ knGridClear(Grid<T> &dest) : KernelBase(&dest, 0), dest(dest)
+ {
+ runMessage();
+ run();
+ }
+ inline void op(int i, int j, int k, Grid<T> &dest)
+ {
+ dest(i, j, k) = T(0.);
+ }
+ inline Grid<T> &getArg0()
+ {
+ return dest;
+ }
+ typedef Grid<T> type0;
+ void runMessage(){};
+ void run()
+ {
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ Grid<T> &dest = getArg0();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest);
+ }
+ }
+ else {
+ const int k = 0;
+ Grid<T> &dest = getArg0();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest);
+ }
+ }
+ }
+ Grid<T> &dest;
+};
+template<class T> struct knGridClear2 : public KernelBase {
+ knGridClear2(Grid<T> &dest) : KernelBase(&dest, 0), dest(dest)
+ {
+ runMessage();
+ run();
+ }
+ inline void op(int i, int j, int k, Grid<T> &dest)
+ {
+ dest(i, j, k) = T(0.);
+ }
+ inline Grid<T> &getArg0()
+ {
+ return dest;
+ }
+ typedef Grid<T> type0;
+ void runMessage(){};
+ void run()
+ {
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest);
+ }
+ }
+ }
+ Grid<T> &dest;
+};
+
+template<class T> void Grid<T>::clear(bool isOmp)
{
+
+#if OPENMP && OPENMP_OFFLOAD
+ if (isOmp) {
+ knGridClear<T>(*this);
+ }
+ else {
+ knGridClear2<T>(*this);
+ }
+#else
memset(mData, 0, sizeof(T) * mSize.x * mSize.y * mSize.z);
+#endif
}
template<class T> void Grid<T>::swap(Grid<T> &other)
@@ -201,29 +335,21 @@ struct CompMinReal : public KernelBase {
return val;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMinReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMinReal(CompMinReal &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMinReal &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const Grid<Real> &val;
Real minVal;
@@ -256,29 +382,21 @@ struct CompMaxReal : public KernelBase {
return val;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMaxReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMaxReal(CompMaxReal &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMaxReal &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const Grid<Real> &val;
Real maxVal;
@@ -311,29 +429,21 @@ struct CompMinInt : public KernelBase {
return val;
}
typedef Grid<int> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMinInt ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMinInt(CompMinInt &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<int>::max())
- {
- }
- void join(const CompMinInt &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int minVal = std::numeric_limits<int>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const Grid<int> &val;
int minVal;
@@ -366,29 +476,21 @@ struct CompMaxInt : public KernelBase {
return val;
}
typedef Grid<int> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMaxInt ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMaxInt(CompMaxInt &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<int>::max())
- {
- }
- void join(const CompMaxInt &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int maxVal = -std::numeric_limits<int>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const Grid<int> &val;
int maxVal;
@@ -422,29 +524,21 @@ struct CompMinVec : public KernelBase {
return val;
}
typedef Grid<Vec3> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMinVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMinVec(CompMinVec &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMinVec &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const Grid<Vec3> &val;
Real minVal;
@@ -478,38 +572,143 @@ struct CompMaxVec : public KernelBase {
return val;
}
typedef Grid<Vec3> type0;
- void runMessage()
+ void runMessage(){};
+ void run()
+ {
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
+ }
+ const Grid<Vec3> &val;
+ Real maxVal;
+};
+
+template<class T> struct knGridCopyGrid : public KernelBase {
+ knGridCopyGrid(Grid<T> &dest, const Grid<T> &source)
+ : KernelBase(&dest, 0), dest(dest), source(source)
+ {
+ runMessage();
+ run();
+ }
+ inline void op(int i, int j, int k, Grid<T> &dest, const Grid<T> &source)
+ {
+ dest(i, j, k) = source(i, j, k);
+ }
+ inline Grid<T> &getArg0()
{
- debMsg("Executing kernel CompMaxVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
+ return dest;
+ }
+ typedef Grid<T> type0;
+ inline const Grid<T> &getArg1()
{
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
+ return source;
}
+ typedef Grid<T> type1;
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ Grid<T> &dest = getArg0();
+ const Grid<T> &source = getArg1();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest, source);
+ }
+ }
+ else {
+ const int k = 0;
+ Grid<T> &dest = getArg0();
+ const Grid<T> &source = getArg1();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest, source);
+ }
+ }
}
- CompMaxVec(CompMaxVec &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
+ Grid<T> &dest;
+ const Grid<T> &source;
+};
+template<class T> struct knGridCopyGrid2 : public KernelBase {
+ knGridCopyGrid2(Grid<T> &dest, const Grid<T> &source)
+ : KernelBase(&dest, 0), dest(dest), source(source)
{
+ runMessage();
+ run();
}
- void join(const CompMaxVec &o)
+ inline void op(int i, int j, int k, Grid<T> &dest, const Grid<T> &source)
{
- maxVal = max(maxVal, o.maxVal);
+ dest(i, j, k) = source(i, j, k);
}
- const Grid<Vec3> &val;
- Real maxVal;
+ inline Grid<T> &getArg0()
+ {
+ return dest;
+ }
+ typedef Grid<T> type0;
+ inline const Grid<T> &getArg1()
+ {
+ return source;
+ }
+ typedef Grid<T> type1;
+ void runMessage(){};
+ void run()
+ {
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest, source);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, dest, source);
+ }
+ }
+ }
+ Grid<T> &dest;
+ const Grid<T> &source;
};
-template<class T> Grid<T> &Grid<T>::copyFrom(const Grid<T> &a, bool copyType)
+template<class T> Grid<T> &Grid<T>::copyFrom(const Grid<T> &a, bool copyType, bool isOmp)
{
assertMsg(a.mSize == mSize, "different grid resolutions " << a.mSize << " vs " << this->mSize);
+#if OPENMP && OPENMP_OFFLOAD
+ if (isOmp)
+ knGridCopyGrid<T>(*this, a);
+ else
+ knGridCopyGrid2<T>(*this, a);
+#else
memcpy(mData, a.mData, sizeof(T) * mSize.x * mSize.y * mSize.z);
+#endif
if (copyType)
mType = a.mType; // copy type marker
return *this;
@@ -524,7 +723,7 @@ template<class T> struct knGridSetConstReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, T val) const
+ inline void op(IndexInt idx, Grid<T> &me, T val)
{
me[idx] = val;
}
@@ -538,21 +737,17 @@ template<class T> struct knGridSetConstReal : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridSetConstReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid<T> &me;
T val;
@@ -563,7 +758,7 @@ template<class T> struct knGridAddConstReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, T val) const
+ inline void op(IndexInt idx, Grid<T> &me, T val)
{
me[idx] += val;
}
@@ -577,21 +772,17 @@ template<class T> struct knGridAddConstReal : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridAddConstReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid<T> &me;
T val;
@@ -602,7 +793,7 @@ template<class T> struct knGridMultConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, T val) const
+ inline void op(IndexInt idx, Grid<T> &me, T val)
{
me[idx] *= val;
}
@@ -616,21 +807,17 @@ template<class T> struct knGridMultConst : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridMultConst ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid<T> &me;
T val;
@@ -642,7 +829,7 @@ template<class T> struct knGridSafeDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other)
{
me[idx] = safeDivide(me[idx], other[idx]);
}
@@ -656,21 +843,17 @@ template<class T> struct knGridSafeDiv : public KernelBase {
return other;
}
typedef Grid<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridSafeDiv ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const Grid<T> &other;
@@ -685,7 +868,7 @@ template<class T> struct knGridClamp : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const T &min, const T &max) const
+ inline void op(IndexInt idx, Grid<T> &me, const T &min, const T &max)
{
me[idx] = clamp(me[idx], min, max);
}
@@ -704,21 +887,17 @@ template<class T> struct knGridClamp : public KernelBase {
return max;
}
typedef T type2;
- void runMessage()
- {
- debMsg("Executing kernel knGridClamp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, min, max);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, min, max);
+ }
}
Grid<T> &me;
const T &min;
@@ -745,7 +924,7 @@ template<class T> struct knGridStomp : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const T &threshold) const
+ inline void op(IndexInt idx, Grid<T> &me, const T &threshold)
{
stomp(me[idx], threshold);
}
@@ -759,21 +938,17 @@ template<class T> struct knGridStomp : public KernelBase {
return threshold;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridStomp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, threshold);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, threshold);
+ }
}
Grid<T> &me;
const T &threshold;
@@ -787,7 +962,7 @@ template<class T> struct knPermuteAxes : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, Grid<T> &self, Grid<T> &target, int axis0, int axis1, int axis2) const
+ int i, int j, int k, Grid<T> &self, Grid<T> &target, int axis0, int axis1, int axis2)
{
int i0 = axis0 == 0 ? i : (axis0 == 1 ? j : k);
int i1 = axis1 == 0 ? i : (axis1 == 1 ? j : k);
@@ -819,37 +994,35 @@ template<class T> struct knPermuteAxes : public KernelBase {
return axis2;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel knPermuteAxes ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, self, target, axis0, axis1, axis2);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, self, target, axis0, axis1, axis2);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, self, target, axis0, axis1, axis2);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, self, target, axis0, axis1, axis2);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> &self;
Grid<T> &target;
int axis0;
@@ -864,7 +1037,7 @@ struct knJoinVec : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Vec3> &a, const Grid<Vec3> &b, bool keepMax) const
+ inline void op(IndexInt idx, Grid<Vec3> &a, const Grid<Vec3> &b, bool keepMax)
{
Real a1 = normSquare(a[idx]);
Real b1 = normSquare(b[idx]);
@@ -885,21 +1058,17 @@ struct knJoinVec : public KernelBase {
return keepMax;
}
typedef bool type2;
- void runMessage()
- {
- debMsg("Executing kernel knJoinVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b, keepMax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, b, keepMax);
+ }
}
Grid<Vec3> &a;
const Grid<Vec3> &b;
@@ -912,7 +1081,7 @@ struct knJoinInt : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<int> &a, const Grid<int> &b, bool keepMax) const
+ inline void op(IndexInt idx, Grid<int> &a, const Grid<int> &b, bool keepMax)
{
a[idx] = (keepMax) ? max(a[idx], b[idx]) : min(a[idx], b[idx]);
}
@@ -931,21 +1100,17 @@ struct knJoinInt : public KernelBase {
return keepMax;
}
typedef bool type2;
- void runMessage()
- {
- debMsg("Executing kernel knJoinInt ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b, keepMax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, b, keepMax);
+ }
}
Grid<int> &a;
const Grid<int> &b;
@@ -958,7 +1123,7 @@ struct knJoinReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b, bool keepMax) const
+ inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b, bool keepMax)
{
a[idx] = (keepMax) ? max(a[idx], b[idx]) : min(a[idx], b[idx]);
}
@@ -977,21 +1142,17 @@ struct knJoinReal : public KernelBase {
return keepMax;
}
typedef bool type2;
- void runMessage()
- {
- debMsg("Executing kernel knJoinReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b, keepMax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, b, keepMax);
+ }
}
Grid<Real> &a;
const Grid<Real> &b;
@@ -1218,45 +1379,43 @@ struct knCountCells : public KernelBase {
return mask;
}
typedef Grid<Real> type3;
- void runMessage()
- {
- debMsg("Executing kernel knCountCells ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, flag, bnd, mask, cnt);
+
+#pragma omp parallel
+ {
+ int cnt = 0;
+#pragma omp for nowait
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, flag, bnd, mask, cnt);
+#pragma omp critical
+ {
+ this->cnt += cnt;
+ }
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, flag, bnd, mask, cnt);
+#pragma omp parallel
+ {
+ int cnt = 0;
+#pragma omp for nowait
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, flag, bnd, mask, cnt);
+#pragma omp critical
+ {
+ this->cnt += cnt;
+ }
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
- knCountCells(knCountCells &o, tbb::split)
- : KernelBase(o), flags(o.flags), flag(o.flag), bnd(o.bnd), mask(o.mask), cnt(0)
- {
- }
- void join(const knCountCells &o)
- {
- cnt += o.cnt;
- }
const FlagGrid &flags;
int flag;
int bnd;
@@ -1408,7 +1567,7 @@ struct knCopyMacToVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const
+ inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target)
{
target(i, j, k) = source(i, j, k);
}
@@ -1422,37 +1581,35 @@ struct knCopyMacToVec3 : public KernelBase {
return target;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel knCopyMacToVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, target);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, target);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &source;
Grid<Vec3> &target;
};
@@ -1539,7 +1696,7 @@ struct knResampleVec3ToMac : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &source, MACGrid &target) const
+ inline void op(int i, int j, int k, Grid<Vec3> &source, MACGrid &target)
{
target(i, j, k)[0] = 0.5 * (source(i - 1, j, k)[0] + source(i, j, k))[0];
target(i, j, k)[1] = 0.5 * (source(i, j - 1, k)[1] + source(i, j, k))[1];
@@ -1557,37 +1714,35 @@ struct knResampleVec3ToMac : public KernelBase {
return target;
}
typedef MACGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel knResampleVec3ToMac ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, source, target);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, source, target);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Vec3> &source;
MACGrid &target;
};
@@ -1636,7 +1791,7 @@ struct knResampleMacToVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const
+ inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target)
{
target(i, j, k) = source.getCentered(i, j, k);
}
@@ -1650,37 +1805,35 @@ struct knResampleMacToVec3 : public KernelBase {
return target;
}
typedef Grid<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel knResampleMacToVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, source, target);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, source, target);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &source;
Grid<Vec3> &target;
};
@@ -1729,7 +1882,7 @@ struct knCopyLevelsetToReal : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, LevelsetGrid &source, Grid<Real> &target) const
+ inline void op(int i, int j, int k, LevelsetGrid &source, Grid<Real> &target)
{
target(i, j, k) = source(i, j, k);
}
@@ -1743,37 +1896,35 @@ struct knCopyLevelsetToReal : public KernelBase {
return target;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel knCopyLevelsetToReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, target);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, target);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, target);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
LevelsetGrid &source;
Grid<Real> &target;
};
@@ -1834,7 +1985,7 @@ struct knCopyVec3ToReal : public KernelBase {
Grid<Vec3> &source,
Grid<Real> &targetX,
Grid<Real> &targetY,
- Grid<Real> &targetZ) const
+ Grid<Real> &targetZ)
{
targetX(i, j, k) = source(i, j, k).x;
targetY(i, j, k) = source(i, j, k).y;
@@ -1860,37 +2011,35 @@ struct knCopyVec3ToReal : public KernelBase {
return targetZ;
}
typedef Grid<Real> type3;
- void runMessage()
- {
- debMsg("Executing kernel knCopyVec3ToReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, targetX, targetY, targetZ);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, targetX, targetY, targetZ);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, source, targetX, targetY, targetZ);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, source, targetX, targetY, targetZ);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Vec3> &source;
Grid<Real> &targetX;
Grid<Real> &targetY;
@@ -1958,7 +2107,7 @@ struct knCopyRealToVec3 : public KernelBase {
Grid<Real> &sourceX,
Grid<Real> &sourceY,
Grid<Real> &sourceZ,
- Grid<Vec3> &target) const
+ Grid<Vec3> &target)
{
target(i, j, k).x = sourceX(i, j, k);
target(i, j, k).y = sourceY(i, j, k);
@@ -1984,37 +2133,35 @@ struct knCopyRealToVec3 : public KernelBase {
return target;
}
typedef Grid<Vec3> type3;
- void runMessage()
- {
- debMsg("Executing kernel knCopyRealToVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, sourceX, sourceY, sourceZ, target);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, sourceX, sourceY, sourceZ, target);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, sourceX, sourceY, sourceZ, target);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, sourceX, sourceY, sourceZ, target);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Real> &sourceX;
Grid<Real> &sourceY;
Grid<Real> &sourceZ;
@@ -2228,7 +2375,7 @@ struct knResetUvGrid : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &target, const Vec3 *offset) const
+ inline void op(int i, int j, int k, Grid<Vec3> &target, const Vec3 *offset)
{
Vec3 coord = Vec3((Real)i, (Real)j, (Real)k);
if (offset)
@@ -2245,37 +2392,35 @@ struct knResetUvGrid : public KernelBase {
return offset;
}
typedef Vec3 type1;
- void runMessage()
- {
- debMsg("Executing kernel knResetUvGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, offset);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, offset);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, offset);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, offset);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Vec3> &target;
const Vec3 *offset;
};
@@ -2392,7 +2537,7 @@ template<class T> struct knSetBoundary : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<T> &grid, T value, int w) const
+ inline void op(int i, int j, int k, Grid<T> &grid, T value, int w)
{
bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w ||
(grid.is3D() && (k <= w || k >= grid.getSizeZ() - 1 - w)));
@@ -2414,37 +2559,35 @@ template<class T> struct knSetBoundary : public KernelBase {
return w;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetBoundary ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> &grid;
T value;
int w;
@@ -2461,7 +2604,7 @@ template<class T> struct knSetBoundaryNeumann : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<T> &grid, int w) const
+ inline void op(int i, int j, int k, Grid<T> &grid, int w)
{
bool set = false;
int si = i, sj = j, sk = k;
@@ -2504,37 +2647,35 @@ template<class T> struct knSetBoundaryNeumann : public KernelBase {
return w;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel knSetBoundaryNeumann ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, w);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, w);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, w);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> &grid;
int w;
};
@@ -2552,7 +2693,7 @@ struct knSetBoundaryMAC : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) const
+ inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w)
{
if (i <= w || i >= grid.getSizeX() - w || j <= w - 1 || j >= grid.getSizeY() - 1 - w ||
(grid.is3D() && (k <= w - 1 || k >= grid.getSizeZ() - 1 - w)))
@@ -2579,37 +2720,35 @@ struct knSetBoundaryMAC : public KernelBase {
return w;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetBoundaryMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Vec3> &grid;
Vec3 value;
int w;
@@ -2623,7 +2762,7 @@ struct knSetBoundaryMACNorm : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w) const
+ inline void op(int i, int j, int k, Grid<Vec3> &grid, Vec3 value, int w)
{
if (i <= w || i >= grid.getSizeX() - w)
grid(i, j, k).x = value.x;
@@ -2647,37 +2786,35 @@ struct knSetBoundaryMACNorm : public KernelBase {
return w;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetBoundaryMACNorm ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, value, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, value, w);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Vec3> &grid;
Vec3 value;
int w;
@@ -2730,29 +2867,21 @@ struct knGridTotalSum : public KernelBase {
return flags;
}
typedef FlagGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel knGridTotalSum ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, flags, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- knGridTotalSum(knGridTotalSum &o, tbb::split)
- : KernelBase(o), a(o.a), flags(o.flags), result(0.0)
- {
- }
- void join(const knGridTotalSum &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ double result = 0.0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, flags, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const Grid<Real> &a;
FlagGrid *flags;
@@ -2783,28 +2912,21 @@ struct knCountFluidCells : public KernelBase {
return flags;
}
typedef FlagGrid type0;
- void runMessage()
- {
- debMsg("Executing kernel knCountFluidCells ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, numEmpty);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- knCountFluidCells(knCountFluidCells &o, tbb::split) : KernelBase(o), flags(o.flags), numEmpty(0)
- {
- }
- void join(const knCountFluidCells &o)
- {
- numEmpty += o.numEmpty;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int numEmpty = 0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, numEmpty);
+#pragma omp critical
+ {
+ this->numEmpty += numEmpty;
+ }
+ }
}
FlagGrid &flags;
int numEmpty;
@@ -2870,7 +2992,7 @@ struct knGetComponent : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid<Vec3> &source, Grid<Real> &target, int component) const
+ inline void op(IndexInt idx, const Grid<Vec3> &source, Grid<Real> &target, int component)
{
target[idx] = source[idx][component];
}
@@ -2889,21 +3011,17 @@ struct knGetComponent : public KernelBase {
return component;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knGetComponent ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, source, target, component);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, source, target, component);
+ }
}
const Grid<Vec3> &source;
Grid<Real> &target;
@@ -2953,7 +3071,7 @@ struct knSetComponent : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid<Real> &source, Grid<Vec3> &target, int component) const
+ inline void op(IndexInt idx, const Grid<Real> &source, Grid<Vec3> &target, int component)
{
target[idx][component] = source[idx];
}
@@ -2972,21 +3090,17 @@ struct knSetComponent : public KernelBase {
return component;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetComponent ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, source, target, component);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, source, target, component);
+ }
}
const Grid<Real> &source;
Grid<Vec3> &target;
@@ -3327,7 +3441,7 @@ struct knMarkIsolatedFluidCell : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, FlagGrid &flags, const int mark) const
+ inline void op(IndexInt idx, FlagGrid &flags, const int mark)
{
if (isIsolatedFluidCell(idx, flags))
flags[idx] = mark;
@@ -3342,21 +3456,17 @@ struct knMarkIsolatedFluidCell : public KernelBase {
return mark;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel knMarkIsolatedFluidCell ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, mark);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, mark);
+ }
}
FlagGrid &flags;
const int mark;
@@ -3454,6 +3564,156 @@ void PbRegister_copyMACData()
}
}
+template<class T> void Grid<T>::mapToOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Mapping grid (pointer: %p) to device\n", grid);
+ int size = this->getSizeX() * this->getSizeY() * this->getSizeZ();
+
+# pragma omp target enter data map(to \
+ : grid, \
+ grid->mData [0:size], \
+ grid->mSize, \
+ grid->mStrideZ, \
+ grid->mType, \
+ grid->mDx, \
+ grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+template<class T> void Grid<T>::mapAllocOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Mapping grid (pointer: %p) to device\n", grid);
+ int size = this->getSizeX() * this->getSizeY() * this->getSizeZ();
+
+# pragma omp target enter data map(alloc \
+ : grid, \
+ grid->mData [0:size], \
+ grid->mSize, \
+ grid->mStrideZ, \
+ grid->mType, \
+ grid->mDx, \
+ grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+template<class T> void Grid<T>::mapFromOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Mapping grid (pointer: %p) from device\n", grid);
+ int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ();
+
+# pragma omp target exit data map(from \
+ : grid, \
+ grid->mData [0:size], \
+ grid->mSize, \
+ grid->mStrideZ, \
+ grid->mType, \
+ grid->mDx, \
+ grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+template<class T> void Grid<T>::mapDeleteOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Mapping grid (pointer: %p) from device\n", grid);
+ int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ();
+
+# pragma omp target exit data map(delete \
+ : grid, \
+ grid->mData [0:size], \
+ grid->mSize, \
+ grid->mStrideZ, \
+ grid->mType, \
+ grid->mDx, \
+ grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+template<class T> void Grid<T>::updateFromOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Updating grid (pointer: %p) from device\n", grid);
+ int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ();
+
+//#pragma omp target update from(grid, grid->mData[0:size], grid->mSize, grid->mStrideZ)
+//#pragma omp target update from(grid)
+# pragma omp target update from( \
+ grid->mData [0:size], grid->mSize, grid->mStrideZ, grid->mType, grid->mDx, grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+template<class T> void Grid<T>::updateToOmp()
+{
+#if OPENMP && OPENMP_OFFLOAD
+ Grid<T> *grid = this;
+ // printf("Updating grid (pointer: %p) from device\n", grid);
+ int size = grid->getSizeX() * grid->getSizeY() * grid->getSizeZ();
+
+//#pragma omp target update to(grid, grid->mData[0:size], grid->mSize, grid->mStrideZ)
+//#pragma omp target update to(grid)
+# pragma omp target update to( \
+ grid->mData [0:size], grid->mSize, grid->mStrideZ, grid->mType, grid->mDx, grid->m3D)
+#else
+ // std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+ // -DOPENMP_OFFLOAD=ON" << std::endl;
+#endif
+}
+
+// template<class T> void Grid<T>::mapDeclare() {
+// #if OPENMP && OPENMP_OFFLOAD
+// if (this->getType() & GridBase::TypeReal) {
+// printf("Map declare for Real\n");
+// Grid<Real> *grid = (Grid<Real> *)this;
+// #pragma omp declare mapper(Grid<Real> grid) map(to: \
+// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \
+// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D)
+// #pragma omp target enter data map(to: grid)
+// }
+// else if (this->getType() & GridBase::TypeInt) {
+// printf("Map declare for Int\n");
+// Grid<int> *grid = (Grid<int> *)this;
+// #pragma omp declare mapper(Grid<int> grid) map(to: \
+// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \
+// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D)
+// #pragma omp target enter data map(to: grid)
+// }
+// else if (this->getType() & GridBase::TypeVec3) {
+// printf("Map declare for Vec3\n");
+// Grid<Vec3> *grid = (Grid<Vec3> *)this;
+// #pragma omp declare mapper(Grid<Vec3> grid) map(to: \
+// grid.mData[0:grid.getSizeX()*grid.getSizeY()*grid.getSizeZ()], \
+// grid.mSize, grid.mStrideZ, grid.mType, grid.mDx, grid.m3D)
+// #pragma omp target enter data map(to: grid)
+// }
+
+// #else
+// //std::cout << "Error! Device offloading not enabled. Compile with -DOPENMP=ON and
+// -DOPENMP_OFFLOAD=ON" << std::endl; #endif
+// }
+
// explicit instantiation
template class Grid<int>;
template class Grid<Real>;
diff --git a/extern/mantaflow/preprocessed/grid.h b/extern/mantaflow/preprocessed/grid.h
index 2c4296e78dd..3f0bcb9dfae 100644
--- a/extern/mantaflow/preprocessed/grid.h
+++ b/extern/mantaflow/preprocessed/grid.h
@@ -383,13 +383,15 @@ class GridBase : public PbClass {
}
}
- protected:
+ // TODO (sebbas): Moved attributes to public for now
GridType mType;
Vec3i mSize;
Real mDx;
- bool m3D; // precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy !
+ bool m3D;
+ // precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy !
IndexInt mStrideZ;
+ protected:
public:
PbArgs _args;
}
@@ -401,7 +403,7 @@ class GridBase : public PbClass {
template<class T> class Grid : public GridBase {
public:
//! init new grid, values are set to zero
- Grid(FluidSolver *parent, bool show = true, bool sparse = false);
+ Grid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false);
static int _W_10(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
PbClass *obj = Pb::objFromPy(_self);
@@ -416,7 +418,8 @@ template<class T> class Grid : public GridBase {
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock);
- obj = new Grid(parent, show, sparse);
+ bool offload = _args.getOpt<bool>("offload", 3, false, &_lock);
+ obj = new Grid(parent, show, sparse, offload);
obj->registerObject(_self, &_args);
_args.check();
}
@@ -490,7 +493,7 @@ template<class T> class Grid : public GridBase {
}
//! set all cells to zero
- void clear();
+ void clear(bool isOmp = false);
static PyObject *_W_13(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
@@ -501,9 +504,10 @@ template<class T> class Grid : public GridBase {
PyObject *_retval = nullptr;
{
ArgLocker _lock;
+ bool isOmp = _args.getOpt<bool>("isOmp", 0, false, &_lock);
pbo->_args.copy(_args);
_retval = getPyNone();
- pbo->clear();
+ pbo->clear(isOmp);
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::clear", !noTiming);
@@ -591,6 +595,11 @@ template<class T> class Grid : public GridBase {
{
return mSaveSparse;
}
+ //! query if this grid should be saved as a sparse grid
+ inline bool isOffload()
+ {
+ return mOffload;
+ }
//! set data
inline void set(int i, int j, int k, T &val)
@@ -616,9 +625,8 @@ template<class T> class Grid : public GridBase {
return interpol<T>(mData, mSize, mStrideZ, pos);
case 2:
return interpolCubic<T>(mData, mSize, mStrideZ, pos);
- default:
- assertMsg(false, "Unknown interpolation order " << order);
}
+ // default: assertMsg(false, "Unknown interpolation order "<<order); }
return T(0.); // should never be reached, just to prevent compiler warnings
}
@@ -628,7 +636,7 @@ template<class T> class Grid : public GridBase {
//! content...
// Grid<T>& operator=(const Grid<T>& a);
//! copy content from other grid (use this one instead of operator= !)
- Grid<T> &copyFrom(const Grid<T> &a, bool copyType = true);
+ Grid<T> &copyFrom(const Grid<T> &a, bool copyType = true, bool isOmp = false);
static PyObject *_W_14(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
@@ -641,8 +649,9 @@ template<class T> class Grid : public GridBase {
ArgLocker _lock;
const Grid<T> &a = *_args.getPtr<Grid<T>>("a", 0, &_lock);
bool copyType = _args.getOpt<bool>("copyType", 1, true, &_lock);
+ bool isOmp = _args.getOpt<bool>("isOmp", 2, false, &_lock);
pbo->_args.copy(_args);
- _retval = toPy(pbo->copyFrom(a, copyType));
+ _retval = toPy(pbo->copyFrom(a, copyType, isOmp));
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::copyFrom", !noTiming);
@@ -1276,6 +1285,157 @@ template<class T> class Grid : public GridBase {
}
}
+ //! OpenMP data mapping from / to target device
+ void mapToOmp();
+ static PyObject *_W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->mapToOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::mapToOmp", e.what());
+ return 0;
+ }
+ }
+
+ void mapFromOmp();
+ static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->mapFromOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::mapFromOmp", e.what());
+ return 0;
+ }
+ }
+
+ void mapAllocOmp();
+ static PyObject *_W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->mapAllocOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::mapAllocOmp", e.what());
+ return 0;
+ }
+ }
+
+ void mapDeleteOmp();
+ static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->mapDeleteOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::mapDeleteOmp", e.what());
+ return 0;
+ }
+ }
+
+ void updateToOmp();
+ static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->updateToOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::updateToOmp", e.what());
+ return 0;
+ }
+ }
+
+ void updateFromOmp();
+ static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ {
+ try {
+ PbArgs _args(_linargs, _kwds);
+ Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
+ bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
+ pbPreparePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming);
+ PyObject *_retval = nullptr;
+ {
+ ArgLocker _lock;
+ pbo->_args.copy(_args);
+ _retval = getPyNone();
+ pbo->updateFromOmp();
+ pbo->_args.check();
+ }
+ pbFinalizePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming);
+ return _retval;
+ }
+ catch (std::exception &e) {
+ pbSetError("Grid::updateFromOmp", e.what());
+ return 0;
+ }
+ }
+
// c++ only operators
template<class S> Grid<T> &operator+=(const Grid<S> &a);
template<class S> Grid<T> &operator+=(const S &a);
@@ -1299,10 +1459,12 @@ template<class T> class Grid : public GridBase {
return mData[index(i, j, k)];
}
- protected:
+ // TODO (sebbas): Moved attributes to public for now
T *mData;
+ protected:
bool mExternalData; // True if mData is managed outside of the Fluidsolver
bool mSaveSparse; // True if this grid may be cached in a sparse structure
+ bool mOffload; // True if this grid shall be allocated on an OpenMP offload device too
public:
PbArgs _args;
}
@@ -1314,12 +1476,12 @@ template<class T> class Grid : public GridBase {
//! Special function for staggered grids
class MACGrid : public Grid<Vec3> {
public:
- MACGrid(FluidSolver *parent, bool show = true, bool sparse = false)
- : Grid<Vec3>(parent, show, sparse)
+ MACGrid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false)
+ : Grid<Vec3>(parent, show, sparse, offload)
{
mType = (GridType)(TypeMAC | TypeVec3);
}
- static int _W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static int _W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
PbClass *obj = Pb::objFromPy(_self);
if (obj)
@@ -1333,7 +1495,8 @@ class MACGrid : public Grid<Vec3> {
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock);
- obj = new MACGrid(parent, show, sparse);
+ bool offload = _args.getOpt<bool>("offload", 3, false, &_lock);
+ obj = new MACGrid(parent, show, sparse, offload);
obj->registerObject(_self, &_args);
_args.check();
}
@@ -1392,17 +1555,16 @@ class MACGrid : public Grid<Vec3> {
case 1:
return interpolComponent<comp>(mData, mSize, mStrideZ, pos);
case 2:
- return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp]; // warning - not yet optimized
- default:
- assertMsg(false, "Unknown interpolation order " << order);
- }
+ return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp];
+ } // warning - not yet optimized
+ // default: assertMsg(false, "Unknown interpolation order "<<order); }
return 0.; // should never be reached, just to prevent compiler warnings
}
//! set all boundary cells of a MAC grid to certain value (Dirchlet). Respects staggered grid
//! locations optionally, only set normal components
void setBoundMAC(Vec3 value, int boundaryWidth, bool normalOnly = false);
- static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static PyObject *_W_45(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -1439,12 +1601,16 @@ class MACGrid : public Grid<Vec3> {
//! Special functions for FlagGrid
class FlagGrid : public Grid<int> {
public:
- FlagGrid(FluidSolver *parent, int dim = 3, bool show = true, bool sparse = false)
- : Grid<int>(parent, show, sparse)
+ FlagGrid(FluidSolver *parent,
+ int dim = 3,
+ bool show = true,
+ bool sparse = false,
+ bool offload = false)
+ : Grid<int>(parent, show, sparse, offload)
{
mType = (GridType)(TypeFlags | TypeInt);
}
- static int _W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static int _W_46(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
PbClass *obj = Pb::objFromPy(_self);
if (obj)
@@ -1459,7 +1625,8 @@ class FlagGrid : public Grid<int> {
int dim = _args.getOpt<int>("dim", 1, 3, &_lock);
bool show = _args.getOpt<bool>("show", 2, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 3, false, &_lock);
- obj = new FlagGrid(parent, dim, show, sparse);
+ bool offload = _args.getOpt<bool>("offload", 4, false, &_lock);
+ obj = new FlagGrid(parent, dim, show, sparse, offload);
obj->registerObject(_self, &_args);
_args.check();
}
@@ -1625,7 +1792,7 @@ class FlagGrid : public Grid<int> {
const std::string &inflow = " ",
const std::string &outflow = " ",
Grid<Real> *phiWalls = 0x00);
- static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static PyObject *_W_47(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -1659,7 +1826,7 @@ class FlagGrid : public Grid<int> {
//! set fluid flags inside levelset (liquids)
void updateFromLevelset(LevelsetGrid &levelset);
- static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static PyObject *_W_48(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -1686,7 +1853,7 @@ class FlagGrid : public Grid<int> {
//! set all cells (except obs/in/outflow) to type (fluid by default)
void fillGrid(int type = TypeFluid);
- static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static PyObject *_W_49(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -1715,7 +1882,7 @@ class FlagGrid : public Grid<int> {
//! warning for large grids! only regular int returned (due to python interface)
//! optionally creates mask in RealGrid (1 where flag matches, 0 otherwise)
int countCells(int flag, int bnd = 0, Grid<Real> *mask = nullptr);
- static PyObject *_W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+ static PyObject *_W_50(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -1868,7 +2035,7 @@ template<class T, class S> struct gridAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{
me[idx] += other[idx];
}
@@ -1882,21 +2049,17 @@ template<class T, class S> struct gridAdd : public KernelBase {
return other;
}
typedef Grid<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel gridAdd ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const Grid<S> &other;
@@ -1907,7 +2070,7 @@ template<class T, class S> struct gridSub : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{
me[idx] -= other[idx];
}
@@ -1921,21 +2084,17 @@ template<class T, class S> struct gridSub : public KernelBase {
return other;
}
typedef Grid<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel gridSub ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const Grid<S> &other;
@@ -1946,7 +2105,7 @@ template<class T, class S> struct gridMult : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{
me[idx] *= other[idx];
}
@@ -1960,21 +2119,17 @@ template<class T, class S> struct gridMult : public KernelBase {
return other;
}
typedef Grid<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel gridMult ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const Grid<S> &other;
@@ -1985,7 +2140,7 @@ template<class T, class S> struct gridDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{
me[idx] /= other[idx];
}
@@ -1999,21 +2154,17 @@ template<class T, class S> struct gridDiv : public KernelBase {
return other;
}
typedef Grid<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel gridDiv ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const Grid<S> &other;
@@ -2024,7 +2175,7 @@ template<class T, class S> struct gridAddScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const S &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const S &other)
{
me[idx] += other;
}
@@ -2038,21 +2189,17 @@ template<class T, class S> struct gridAddScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel gridAddScalar ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const S &other;
@@ -2063,7 +2210,7 @@ template<class T, class S> struct gridMultScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const S &other) const
+ inline void op(IndexInt idx, Grid<T> &me, const S &other)
{
me[idx] *= other;
}
@@ -2077,25 +2224,22 @@ template<class T, class S> struct gridMultScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel gridMultScalar ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid<T> &me;
const S &other;
};
+
template<class T, class S> struct gridScaledAdd : public KernelBase {
gridScaledAdd(Grid<T> &me, const Grid<T> &other, const S &factor)
: KernelBase(&me, 0), me(me), other(other), factor(factor)
@@ -2103,8 +2247,9 @@ template<class T, class S> struct gridScaledAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other, const S &factor) const
+ inline void op(int i, int j, int k, Grid<T> &me, const Grid<T> &other, const S &factor)
{
+ const IndexInt idx = me.index(i, j, k);
me[idx] += factor * other[idx];
}
inline Grid<T> &getArg0()
@@ -2122,21 +2267,35 @@ template<class T, class S> struct gridScaledAdd : public KernelBase {
return factor;
}
typedef S type2;
- void runMessage()
- {
- debMsg("Executing kernel gridScaledAdd ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, factor);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const int _maxX = maxX;
+ const int _maxY = maxY;
+ if (maxZ > 1) {
+ Grid<T> &me = getArg0();
+ const Grid<T> &other = getArg1();
+ const S &factor = getArg2();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, me, other, factor);
+ }
+ }
+ else {
+ const int k = 0;
+ Grid<T> &me = getArg0();
+ const Grid<T> &other = getArg1();
+ const S &factor = getArg2();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, me, other, factor);
+ }
+ }
}
Grid<T> &me;
const Grid<T> &other;
@@ -2149,7 +2308,7 @@ template<class T> struct gridSetConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<T> &grid, T value) const
+ inline void op(IndexInt idx, Grid<T> &grid, T value)
{
grid[idx] = value;
}
@@ -2163,21 +2322,17 @@ template<class T> struct gridSetConst : public KernelBase {
return value;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel gridSetConst ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, grid, value);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, grid, value);
+ }
}
Grid<T> &grid;
T value;
@@ -2279,7 +2434,7 @@ template<class S> struct knInterpolateGridTempl : public KernelBase {
const Grid<S> &source,
const Vec3 &sourceFactor,
Vec3 offset,
- int orderSpace = 1) const
+ int orderSpace = 1)
{
Vec3 pos = Vec3(i, j, k) * sourceFactor + offset;
if (!source.is3D())
@@ -2311,37 +2466,35 @@ template<class S> struct knInterpolateGridTempl : public KernelBase {
return orderSpace;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel knInterpolateGridTempl ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, source, sourceFactor, offset, orderSpace);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, source, sourceFactor, offset, orderSpace);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, source, sourceFactor, offset, orderSpace);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, source, sourceFactor, offset, orderSpace);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<S> &target;
const Grid<S> &source;
const Vec3 &sourceFactor;
diff --git a/extern/mantaflow/preprocessed/grid.h.reg.cpp b/extern/mantaflow/preprocessed/grid.h.reg.cpp
index 2c95c002dad..1f3a80903a4 100644
--- a/extern/mantaflow/preprocessed/grid.h.reg.cpp
+++ b/extern/mantaflow/preprocessed/grid.h.reg.cpp
@@ -8,11 +8,11 @@ namespace Manta {
#ifdef _C_FlagGrid
static const Pb::Register _R_26("FlagGrid", "FlagGrid", "Grid<int>");
template<> const char *Namify<FlagGrid>::S = "FlagGrid";
-static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_40);
-static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_41);
-static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_42);
-static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_43);
-static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_44);
+static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_46);
+static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_47);
+static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_48);
+static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_49);
+static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_50);
#endif
#ifdef _C_Grid
static const Pb::Register _R_32("Grid<int>", "Grid<int>", "GridBase");
@@ -45,86 +45,104 @@ static const Pb::Register _R_57("Grid<int>", "setBound", Grid<int>::_W_34);
static const Pb::Register _R_58("Grid<int>", "setBoundNeumann", Grid<int>::_W_35);
static const Pb::Register _R_59("Grid<int>", "getDataPointer", Grid<int>::_W_36);
static const Pb::Register _R_60("Grid<int>", "printGrid", Grid<int>::_W_37);
-static const Pb::Register _R_61("Grid<Real>", "Grid<Real>", "GridBase");
+static const Pb::Register _R_61("Grid<int>", "mapToOmp", Grid<int>::_W_38);
+static const Pb::Register _R_62("Grid<int>", "mapFromOmp", Grid<int>::_W_39);
+static const Pb::Register _R_63("Grid<int>", "mapAllocOmp", Grid<int>::_W_40);
+static const Pb::Register _R_64("Grid<int>", "mapDeleteOmp", Grid<int>::_W_41);
+static const Pb::Register _R_65("Grid<int>", "updateToOmp", Grid<int>::_W_42);
+static const Pb::Register _R_66("Grid<int>", "updateFromOmp", Grid<int>::_W_43);
+static const Pb::Register _R_67("Grid<Real>", "Grid<Real>", "GridBase");
template<> const char *Namify<Grid<Real>>::S = "Grid<Real>";
-static const Pb::Register _R_62("Grid<Real>", "Grid", Grid<Real>::_W_10);
-static const Pb::Register _R_63("Grid<Real>", "save", Grid<Real>::_W_11);
-static const Pb::Register _R_64("Grid<Real>", "load", Grid<Real>::_W_12);
-static const Pb::Register _R_65("Grid<Real>", "clear", Grid<Real>::_W_13);
-static const Pb::Register _R_66("Grid<Real>", "copyFrom", Grid<Real>::_W_14);
-static const Pb::Register _R_67("Grid<Real>", "getGridType", Grid<Real>::_W_15);
-static const Pb::Register _R_68("Grid<Real>", "add", Grid<Real>::_W_16);
-static const Pb::Register _R_69("Grid<Real>", "sub", Grid<Real>::_W_17);
-static const Pb::Register _R_70("Grid<Real>", "setConst", Grid<Real>::_W_18);
-static const Pb::Register _R_71("Grid<Real>", "addConst", Grid<Real>::_W_19);
-static const Pb::Register _R_72("Grid<Real>", "addScaled", Grid<Real>::_W_20);
-static const Pb::Register _R_73("Grid<Real>", "mult", Grid<Real>::_W_21);
-static const Pb::Register _R_74("Grid<Real>", "multConst", Grid<Real>::_W_22);
-static const Pb::Register _R_75("Grid<Real>", "safeDivide", Grid<Real>::_W_23);
-static const Pb::Register _R_76("Grid<Real>", "clamp", Grid<Real>::_W_24);
-static const Pb::Register _R_77("Grid<Real>", "stomp", Grid<Real>::_W_25);
-static const Pb::Register _R_78("Grid<Real>", "permuteAxes", Grid<Real>::_W_26);
-static const Pb::Register _R_79("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27);
-static const Pb::Register _R_80("Grid<Real>", "join", Grid<Real>::_W_28);
-static const Pb::Register _R_81("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29);
-static const Pb::Register _R_82("Grid<Real>", "getMax", Grid<Real>::_W_30);
-static const Pb::Register _R_83("Grid<Real>", "getMin", Grid<Real>::_W_31);
-static const Pb::Register _R_84("Grid<Real>", "getL1", Grid<Real>::_W_32);
-static const Pb::Register _R_85("Grid<Real>", "getL2", Grid<Real>::_W_33);
-static const Pb::Register _R_86("Grid<Real>", "setBound", Grid<Real>::_W_34);
-static const Pb::Register _R_87("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35);
-static const Pb::Register _R_88("Grid<Real>", "getDataPointer", Grid<Real>::_W_36);
-static const Pb::Register _R_89("Grid<Real>", "printGrid", Grid<Real>::_W_37);
-static const Pb::Register _R_90("Grid<Vec3>", "Grid<Vec3>", "GridBase");
+static const Pb::Register _R_68("Grid<Real>", "Grid", Grid<Real>::_W_10);
+static const Pb::Register _R_69("Grid<Real>", "save", Grid<Real>::_W_11);
+static const Pb::Register _R_70("Grid<Real>", "load", Grid<Real>::_W_12);
+static const Pb::Register _R_71("Grid<Real>", "clear", Grid<Real>::_W_13);
+static const Pb::Register _R_72("Grid<Real>", "copyFrom", Grid<Real>::_W_14);
+static const Pb::Register _R_73("Grid<Real>", "getGridType", Grid<Real>::_W_15);
+static const Pb::Register _R_74("Grid<Real>", "add", Grid<Real>::_W_16);
+static const Pb::Register _R_75("Grid<Real>", "sub", Grid<Real>::_W_17);
+static const Pb::Register _R_76("Grid<Real>", "setConst", Grid<Real>::_W_18);
+static const Pb::Register _R_77("Grid<Real>", "addConst", Grid<Real>::_W_19);
+static const Pb::Register _R_78("Grid<Real>", "addScaled", Grid<Real>::_W_20);
+static const Pb::Register _R_79("Grid<Real>", "mult", Grid<Real>::_W_21);
+static const Pb::Register _R_80("Grid<Real>", "multConst", Grid<Real>::_W_22);
+static const Pb::Register _R_81("Grid<Real>", "safeDivide", Grid<Real>::_W_23);
+static const Pb::Register _R_82("Grid<Real>", "clamp", Grid<Real>::_W_24);
+static const Pb::Register _R_83("Grid<Real>", "stomp", Grid<Real>::_W_25);
+static const Pb::Register _R_84("Grid<Real>", "permuteAxes", Grid<Real>::_W_26);
+static const Pb::Register _R_85("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27);
+static const Pb::Register _R_86("Grid<Real>", "join", Grid<Real>::_W_28);
+static const Pb::Register _R_87("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29);
+static const Pb::Register _R_88("Grid<Real>", "getMax", Grid<Real>::_W_30);
+static const Pb::Register _R_89("Grid<Real>", "getMin", Grid<Real>::_W_31);
+static const Pb::Register _R_90("Grid<Real>", "getL1", Grid<Real>::_W_32);
+static const Pb::Register _R_91("Grid<Real>", "getL2", Grid<Real>::_W_33);
+static const Pb::Register _R_92("Grid<Real>", "setBound", Grid<Real>::_W_34);
+static const Pb::Register _R_93("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35);
+static const Pb::Register _R_94("Grid<Real>", "getDataPointer", Grid<Real>::_W_36);
+static const Pb::Register _R_95("Grid<Real>", "printGrid", Grid<Real>::_W_37);
+static const Pb::Register _R_96("Grid<Real>", "mapToOmp", Grid<Real>::_W_38);
+static const Pb::Register _R_97("Grid<Real>", "mapFromOmp", Grid<Real>::_W_39);
+static const Pb::Register _R_98("Grid<Real>", "mapAllocOmp", Grid<Real>::_W_40);
+static const Pb::Register _R_99("Grid<Real>", "mapDeleteOmp", Grid<Real>::_W_41);
+static const Pb::Register _R_100("Grid<Real>", "updateToOmp", Grid<Real>::_W_42);
+static const Pb::Register _R_101("Grid<Real>", "updateFromOmp", Grid<Real>::_W_43);
+static const Pb::Register _R_102("Grid<Vec3>", "Grid<Vec3>", "GridBase");
template<> const char *Namify<Grid<Vec3>>::S = "Grid<Vec3>";
-static const Pb::Register _R_91("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10);
-static const Pb::Register _R_92("Grid<Vec3>", "save", Grid<Vec3>::_W_11);
-static const Pb::Register _R_93("Grid<Vec3>", "load", Grid<Vec3>::_W_12);
-static const Pb::Register _R_94("Grid<Vec3>", "clear", Grid<Vec3>::_W_13);
-static const Pb::Register _R_95("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14);
-static const Pb::Register _R_96("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15);
-static const Pb::Register _R_97("Grid<Vec3>", "add", Grid<Vec3>::_W_16);
-static const Pb::Register _R_98("Grid<Vec3>", "sub", Grid<Vec3>::_W_17);
-static const Pb::Register _R_99("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18);
-static const Pb::Register _R_100("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19);
-static const Pb::Register _R_101("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20);
-static const Pb::Register _R_102("Grid<Vec3>", "mult", Grid<Vec3>::_W_21);
-static const Pb::Register _R_103("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22);
-static const Pb::Register _R_104("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23);
-static const Pb::Register _R_105("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24);
-static const Pb::Register _R_106("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25);
-static const Pb::Register _R_107("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26);
-static const Pb::Register _R_108("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27);
-static const Pb::Register _R_109("Grid<Vec3>", "join", Grid<Vec3>::_W_28);
-static const Pb::Register _R_110("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29);
-static const Pb::Register _R_111("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30);
-static const Pb::Register _R_112("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31);
-static const Pb::Register _R_113("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32);
-static const Pb::Register _R_114("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33);
-static const Pb::Register _R_115("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34);
-static const Pb::Register _R_116("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35);
-static const Pb::Register _R_117("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36);
-static const Pb::Register _R_118("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37);
+static const Pb::Register _R_103("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10);
+static const Pb::Register _R_104("Grid<Vec3>", "save", Grid<Vec3>::_W_11);
+static const Pb::Register _R_105("Grid<Vec3>", "load", Grid<Vec3>::_W_12);
+static const Pb::Register _R_106("Grid<Vec3>", "clear", Grid<Vec3>::_W_13);
+static const Pb::Register _R_107("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14);
+static const Pb::Register _R_108("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15);
+static const Pb::Register _R_109("Grid<Vec3>", "add", Grid<Vec3>::_W_16);
+static const Pb::Register _R_110("Grid<Vec3>", "sub", Grid<Vec3>::_W_17);
+static const Pb::Register _R_111("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18);
+static const Pb::Register _R_112("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19);
+static const Pb::Register _R_113("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20);
+static const Pb::Register _R_114("Grid<Vec3>", "mult", Grid<Vec3>::_W_21);
+static const Pb::Register _R_115("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22);
+static const Pb::Register _R_116("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23);
+static const Pb::Register _R_117("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24);
+static const Pb::Register _R_118("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25);
+static const Pb::Register _R_119("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26);
+static const Pb::Register _R_120("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27);
+static const Pb::Register _R_121("Grid<Vec3>", "join", Grid<Vec3>::_W_28);
+static const Pb::Register _R_122("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29);
+static const Pb::Register _R_123("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30);
+static const Pb::Register _R_124("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31);
+static const Pb::Register _R_125("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32);
+static const Pb::Register _R_126("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33);
+static const Pb::Register _R_127("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34);
+static const Pb::Register _R_128("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35);
+static const Pb::Register _R_129("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36);
+static const Pb::Register _R_130("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37);
+static const Pb::Register _R_131("Grid<Vec3>", "mapToOmp", Grid<Vec3>::_W_38);
+static const Pb::Register _R_132("Grid<Vec3>", "mapFromOmp", Grid<Vec3>::_W_39);
+static const Pb::Register _R_133("Grid<Vec3>", "mapAllocOmp", Grid<Vec3>::_W_40);
+static const Pb::Register _R_134("Grid<Vec3>", "mapDeleteOmp", Grid<Vec3>::_W_41);
+static const Pb::Register _R_135("Grid<Vec3>", "updateToOmp", Grid<Vec3>::_W_42);
+static const Pb::Register _R_136("Grid<Vec3>", "updateFromOmp", Grid<Vec3>::_W_43);
#endif
#ifdef _C_GridBase
-static const Pb::Register _R_119("GridBase", "GridBase", "PbClass");
+static const Pb::Register _R_137("GridBase", "GridBase", "PbClass");
template<> const char *Namify<GridBase>::S = "GridBase";
-static const Pb::Register _R_120("GridBase", "GridBase", GridBase::_W_0);
-static const Pb::Register _R_121("GridBase", "getSizeX", GridBase::_W_1);
-static const Pb::Register _R_122("GridBase", "getSizeY", GridBase::_W_2);
-static const Pb::Register _R_123("GridBase", "getSizeZ", GridBase::_W_3);
-static const Pb::Register _R_124("GridBase", "getSize", GridBase::_W_4);
-static const Pb::Register _R_125("GridBase", "is3D", GridBase::_W_5);
-static const Pb::Register _R_126("GridBase", "is4D", GridBase::_W_6);
-static const Pb::Register _R_127("GridBase", "getSizeT", GridBase::_W_7);
-static const Pb::Register _R_128("GridBase", "getStrideT", GridBase::_W_8);
-static const Pb::Register _R_129("GridBase", "setName", GridBase::_W_9);
+static const Pb::Register _R_138("GridBase", "GridBase", GridBase::_W_0);
+static const Pb::Register _R_139("GridBase", "getSizeX", GridBase::_W_1);
+static const Pb::Register _R_140("GridBase", "getSizeY", GridBase::_W_2);
+static const Pb::Register _R_141("GridBase", "getSizeZ", GridBase::_W_3);
+static const Pb::Register _R_142("GridBase", "getSize", GridBase::_W_4);
+static const Pb::Register _R_143("GridBase", "is3D", GridBase::_W_5);
+static const Pb::Register _R_144("GridBase", "is4D", GridBase::_W_6);
+static const Pb::Register _R_145("GridBase", "getSizeT", GridBase::_W_7);
+static const Pb::Register _R_146("GridBase", "getStrideT", GridBase::_W_8);
+static const Pb::Register _R_147("GridBase", "setName", GridBase::_W_9);
#endif
#ifdef _C_MACGrid
-static const Pb::Register _R_130("MACGrid", "MACGrid", "Grid<Vec3>");
+static const Pb::Register _R_148("MACGrid", "MACGrid", "Grid<Vec3>");
template<> const char *Namify<MACGrid>::S = "MACGrid";
-static const Pb::Register _R_131("MACGrid", "MACGrid", MACGrid::_W_38);
-static const Pb::Register _R_132("MACGrid", "setBoundMAC", MACGrid::_W_39);
+static const Pb::Register _R_149("MACGrid", "MACGrid", MACGrid::_W_44);
+static const Pb::Register _R_150("MACGrid", "setBoundMAC", MACGrid::_W_45);
#endif
static const Pb::Register _R_7("GridType_TypeNone", 0);
static const Pb::Register _R_8("GridType_TypeReal", 1);
@@ -255,6 +273,24 @@ void PbRegister_file_7()
KEEP_UNUSED(_R_130);
KEEP_UNUSED(_R_131);
KEEP_UNUSED(_R_132);
+ KEEP_UNUSED(_R_133);
+ KEEP_UNUSED(_R_134);
+ KEEP_UNUSED(_R_135);
+ KEEP_UNUSED(_R_136);
+ KEEP_UNUSED(_R_137);
+ KEEP_UNUSED(_R_138);
+ KEEP_UNUSED(_R_139);
+ KEEP_UNUSED(_R_140);
+ KEEP_UNUSED(_R_141);
+ KEEP_UNUSED(_R_142);
+ KEEP_UNUSED(_R_143);
+ KEEP_UNUSED(_R_144);
+ KEEP_UNUSED(_R_145);
+ KEEP_UNUSED(_R_146);
+ KEEP_UNUSED(_R_147);
+ KEEP_UNUSED(_R_148);
+ KEEP_UNUSED(_R_149);
+ KEEP_UNUSED(_R_150);
}
}
} // namespace Manta \ No newline at end of file
diff --git a/extern/mantaflow/preprocessed/grid4d.cpp b/extern/mantaflow/preprocessed/grid4d.cpp
index 72bd3a6fe50..a896b003736 100644
--- a/extern/mantaflow/preprocessed/grid4d.cpp
+++ b/extern/mantaflow/preprocessed/grid4d.cpp
@@ -179,29 +179,21 @@ struct kn4dMinReal : public KernelBase {
return val;
}
typedef Grid4d<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMinReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMinReal(kn4dMinReal &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const kn4dMinReal &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
Grid4d<Real> &val;
Real minVal;
@@ -234,29 +226,21 @@ struct kn4dMaxReal : public KernelBase {
return val;
}
typedef Grid4d<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMaxReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMaxReal(kn4dMaxReal &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const kn4dMaxReal &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
Grid4d<Real> &val;
Real maxVal;
@@ -289,29 +273,21 @@ struct kn4dMinInt : public KernelBase {
return val;
}
typedef Grid4d<int> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMinInt ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMinInt(kn4dMinInt &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<int>::max())
- {
- }
- void join(const kn4dMinInt &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int minVal = std::numeric_limits<int>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
Grid4d<int> &val;
int minVal;
@@ -344,29 +320,21 @@ struct kn4dMaxInt : public KernelBase {
return val;
}
typedef Grid4d<int> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMaxInt ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMaxInt(kn4dMaxInt &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(std::numeric_limits<int>::min())
- {
- }
- void join(const kn4dMaxInt &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int maxVal = std::numeric_limits<int>::min();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
Grid4d<int> &val;
int maxVal;
@@ -400,29 +368,21 @@ template<class VEC> struct kn4dMinVec : public KernelBase {
return val;
}
typedef Grid4d<VEC> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMinVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMinVec(kn4dMinVec &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const kn4dMinVec &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
Grid4d<VEC> &val;
Real minVal;
@@ -456,29 +416,21 @@ template<class VEC> struct kn4dMaxVec : public KernelBase {
return val;
}
typedef Grid4d<VEC> type0;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMaxVec ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- kn4dMaxVec(kn4dMaxVec &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const kn4dMaxVec &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
Grid4d<VEC> &val;
Real maxVal;
@@ -507,7 +459,7 @@ template<class T> struct kn4dSetConstReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, T val) const
+ inline void op(IndexInt idx, Grid4d<T> &me, T val)
{
me[idx] = val;
}
@@ -521,21 +473,17 @@ template<class T> struct kn4dSetConstReal : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel kn4dSetConstReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid4d<T> &me;
T val;
@@ -546,7 +494,7 @@ template<class T> struct kn4dAddConstReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, T val) const
+ inline void op(IndexInt idx, Grid4d<T> &me, T val)
{
me[idx] += val;
}
@@ -560,21 +508,17 @@ template<class T> struct kn4dAddConstReal : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel kn4dAddConstReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid4d<T> &me;
T val;
@@ -585,7 +529,7 @@ template<class T> struct kn4dMultConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, T val) const
+ inline void op(IndexInt idx, Grid4d<T> &me, T val)
{
me[idx] *= val;
}
@@ -599,21 +543,17 @@ template<class T> struct kn4dMultConst : public KernelBase {
return val;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel kn4dMultConst ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, val);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, val);
+ }
}
Grid4d<T> &me;
T val;
@@ -624,7 +564,7 @@ template<class T> struct kn4dClamp : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, T min, T max) const
+ inline void op(IndexInt idx, Grid4d<T> &me, T min, T max)
{
me[idx] = clamp(me[idx], min, max);
}
@@ -643,21 +583,17 @@ template<class T> struct kn4dClamp : public KernelBase {
return max;
}
typedef T type2;
- void runMessage()
- {
- debMsg("Executing kernel kn4dClamp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, min, max);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, min, max);
+ }
}
Grid4d<T> &me;
T min;
@@ -788,7 +724,7 @@ struct knGetComp4d : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c) const
+ inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c)
{
dst[idx] = src[idx][c];
}
@@ -807,21 +743,17 @@ struct knGetComp4d : public KernelBase {
return c;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knGetComp4d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, src, dst, c);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, src, dst, c);
+ }
}
const Grid4d<Vec4> &src;
Grid4d<Real> &dst;
@@ -835,7 +767,7 @@ struct knSetComp4d : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c) const
+ inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c)
{
dst[idx][c] = src[idx];
}
@@ -854,21 +786,17 @@ struct knSetComp4d : public KernelBase {
return c;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetComp4d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, src, dst, c);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, src, dst, c);
+ }
}
const Grid4d<Real> &src;
Grid4d<Vec4> &dst;
@@ -959,7 +887,7 @@ template<class T> struct knSetBnd4d : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w) const
+ inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w)
{
bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w ||
k <= w || k >= grid.getSizeZ() - 1 - w || t <= w || t >= grid.getSizeT() - 1 - w);
@@ -981,50 +909,47 @@ template<class T> struct knSetBnd4d : public KernelBase {
return w;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetBnd4d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
- << " "
- " t "
- << minT << " - " << maxT,
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
+ const int _maxX = maxX;
+ const int _maxY = maxY;
if (maxT > 1) {
- for (int t = __r.begin(); t != (int)__r.end(); t++)
- for (int k = 0; k < maxZ; k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, value, w);
+ const int _maxZ = maxZ;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int t = 0; t < maxT; t++)
+ for (int k = 0; k < _maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, value, w);
+ }
}
else if (maxZ > 1) {
const int t = 0;
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, value, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, value, w);
+ }
}
else {
const int t = 0;
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, value, w);
- }
- }
- void run()
- {
- if (maxT > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
- }
- else if (maxZ > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- }
- else {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, value, w);
+ }
}
}
Grid4d<T> &grid;
@@ -1043,7 +968,7 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w) const
+ inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w)
{
bool set = false;
int si = i, sj = j, sk = k, st = t;
@@ -1092,50 +1017,47 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase {
return w;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel knSetBnd4dNeumann ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
- << " "
- " t "
- << minT << " - " << maxT,
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
+ const int _maxX = maxX;
+ const int _maxY = maxY;
if (maxT > 1) {
- for (int t = __r.begin(); t != (int)__r.end(); t++)
- for (int k = 0; k < maxZ; k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, w);
+ const int _maxZ = maxZ;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int t = 0; t < maxT; t++)
+ for (int k = 0; k < _maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, w);
+ }
}
else if (maxZ > 1) {
const int t = 0;
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, w);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, w);
+ }
}
else {
const int t = 0;
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, grid, w);
- }
- }
- void run()
- {
- if (maxT > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
- }
- else if (maxZ > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- }
- else {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, grid, w);
+ }
}
}
Grid4d<T> &grid;
@@ -1329,7 +1251,7 @@ template<class S> struct knSetRegion4d : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value) const
+ inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value)
{
Vec4 p(i, j, k, t);
for (int c = 0; c < 4; ++c)
@@ -1357,50 +1279,47 @@ template<class S> struct knSetRegion4d : public KernelBase {
return value;
}
typedef S type3;
- void runMessage()
- {
- debMsg("Executing kernel knSetRegion4d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
- << " "
- " t "
- << minT << " - " << maxT,
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
+ const int _maxX = maxX;
+ const int _maxY = maxY;
if (maxT > 1) {
- for (int t = __r.begin(); t != (int)__r.end(); t++)
- for (int k = 0; k < maxZ; k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, dst, start, end, value);
+ const int _maxZ = maxZ;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int t = 0; t < maxT; t++)
+ for (int k = 0; k < _maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, dst, start, end, value);
+ }
}
else if (maxZ > 1) {
const int t = 0;
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, dst, start, end, value);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, dst, start, end, value);
+ }
}
else {
const int t = 0;
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, dst, start, end, value);
- }
- }
- void run()
- {
- if (maxT > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
- }
- else if (maxZ > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- }
- else {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, dst, start, end, value);
+ }
}
}
Grid4d<S> &dst;
@@ -1619,7 +1538,7 @@ template<class S> struct knInterpol4d : public KernelBase {
Grid4d<S> &target,
Grid4d<S> &source,
const Vec4 &srcFac,
- const Vec4 &offset) const
+ const Vec4 &offset)
{
Vec4 pos = Vec4(i, j, k, t) * srcFac + offset;
target(i, j, k, t) = source.getInterpolated(pos);
@@ -1644,50 +1563,47 @@ template<class S> struct knInterpol4d : public KernelBase {
return offset;
}
typedef Vec4 type3;
- void runMessage()
- {
- debMsg("Executing kernel knInterpol4d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
- << " "
- " t "
- << minT << " - " << maxT,
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
+ const int _maxX = maxX;
+ const int _maxY = maxY;
if (maxT > 1) {
- for (int t = __r.begin(); t != (int)__r.end(); t++)
- for (int k = 0; k < maxZ; k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, srcFac, offset);
+ const int _maxZ = maxZ;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int t = 0; t < maxT; t++)
+ for (int k = 0; k < _maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, srcFac, offset);
+ }
}
else if (maxZ > 1) {
const int t = 0;
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, srcFac, offset);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, srcFac, offset);
+ }
}
else {
const int t = 0;
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, srcFac, offset);
- }
- }
- void run()
- {
- if (maxT > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
- }
- else if (maxZ > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- }
- else {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, srcFac, offset);
+ }
}
}
Grid4d<S> &target;
diff --git a/extern/mantaflow/preprocessed/grid4d.h b/extern/mantaflow/preprocessed/grid4d.h
index 1741db590b7..2a2abad1e24 100644
--- a/extern/mantaflow/preprocessed/grid4d.h
+++ b/extern/mantaflow/preprocessed/grid4d.h
@@ -326,7 +326,6 @@ class Grid4dBase : public PbClass {
// precomputed Z,T shift: to ensure 2D compatibility, always use this instead of sx*sy !
IndexInt mStrideZ;
IndexInt mStrideT;
-
public:
PbArgs _args;
}
@@ -951,7 +950,6 @@ template<class T> class Grid4d : public Grid4dBase {
protected:
T *mData;
-
public:
PbArgs _args;
}
@@ -1027,7 +1025,7 @@ template<class T, class S> struct Grid4dAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{
me[idx] += other[idx];
}
@@ -1041,21 +1039,17 @@ template<class T, class S> struct Grid4dAdd : public KernelBase {
return other;
}
typedef Grid4d<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dAdd ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const Grid4d<S> &other;
@@ -1066,7 +1060,7 @@ template<class T, class S> struct Grid4dSub : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{
me[idx] -= other[idx];
}
@@ -1080,21 +1074,17 @@ template<class T, class S> struct Grid4dSub : public KernelBase {
return other;
}
typedef Grid4d<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dSub ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const Grid4d<S> &other;
@@ -1105,7 +1095,7 @@ template<class T, class S> struct Grid4dMult : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{
me[idx] *= other[idx];
}
@@ -1119,21 +1109,17 @@ template<class T, class S> struct Grid4dMult : public KernelBase {
return other;
}
typedef Grid4d<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dMult ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const Grid4d<S> &other;
@@ -1144,7 +1130,7 @@ template<class T, class S> struct Grid4dDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{
me[idx] /= other[idx];
}
@@ -1158,21 +1144,17 @@ template<class T, class S> struct Grid4dDiv : public KernelBase {
return other;
}
typedef Grid4d<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dDiv ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const Grid4d<S> &other;
@@ -1183,7 +1165,7 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const S &other)
{
me[idx] += other;
}
@@ -1197,21 +1179,17 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dAddScalar ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const S &other;
@@ -1222,7 +1200,7 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const S &other)
{
me[idx] *= other;
}
@@ -1236,21 +1214,17 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dMultScalar ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const S &other;
@@ -1262,7 +1236,7 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor)
{
me[idx] += factor * other[idx];
}
@@ -1281,21 +1255,17 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase {
return factor;
}
typedef S type2;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dScaledAdd ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, factor);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, factor);
+ }
}
Grid4d<T> &me;
const Grid4d<T> &other;
@@ -1308,7 +1278,7 @@ template<class T> struct Grid4dSafeDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other) const
+ inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other)
{
me[idx] = safeDivide(me[idx], other[idx]);
}
@@ -1322,21 +1292,17 @@ template<class T> struct Grid4dSafeDiv : public KernelBase {
return other;
}
typedef Grid4d<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dSafeDiv ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
Grid4d<T> &me;
const Grid4d<T> &other;
@@ -1347,7 +1313,7 @@ template<class T> struct Grid4dSetConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid4d<T> &me, T value) const
+ inline void op(IndexInt idx, Grid4d<T> &me, T value)
{
me[idx] = value;
}
@@ -1361,21 +1327,17 @@ template<class T> struct Grid4dSetConst : public KernelBase {
return value;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel Grid4dSetConst ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, value);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, value);
+ }
}
Grid4d<T> &me;
T value;
@@ -1473,7 +1435,7 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase {
Grid4d<S> &target,
Grid4d<S> &source,
const Vec4 &sourceFactor,
- Vec4 offset) const
+ Vec4 offset)
{
Vec4 pos = Vec4(i, j, k, t) * sourceFactor + offset;
if (!source.is3D())
@@ -1502,50 +1464,47 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase {
return offset;
}
typedef Vec4 type3;
- void runMessage()
- {
- debMsg("Executing kernel KnInterpolateGrid4dTempl ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
- << " "
- " t "
- << minT << " - " << maxT,
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
+ const int _maxX = maxX;
+ const int _maxY = maxY;
if (maxT > 1) {
- for (int t = __r.begin(); t != (int)__r.end(); t++)
- for (int k = 0; k < maxZ; k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, sourceFactor, offset);
+ const int _maxZ = maxZ;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int t = 0; t < maxT; t++)
+ for (int k = 0; k < _maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, sourceFactor, offset);
+ }
}
else if (maxZ > 1) {
const int t = 0;
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < maxY; j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, sourceFactor, offset);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, sourceFactor, offset);
+ }
}
else {
const int t = 0;
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < maxX; i++)
- op(i, j, k, t, target, source, sourceFactor, offset);
- }
- }
- void run()
- {
- if (maxT > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
- }
- else if (maxZ > 1) {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- }
- else {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, t, target, source, sourceFactor, offset);
+ }
}
}
Grid4d<S> &target;
diff --git a/extern/mantaflow/preprocessed/kernel.h b/extern/mantaflow/preprocessed/kernel.h
index dbcc2342a11..90e30cd21e1 100644
--- a/extern/mantaflow/preprocessed/kernel.h
+++ b/extern/mantaflow/preprocessed/kernel.h
@@ -71,19 +71,6 @@ class ParticleBase;
for (int j = bnd; j < (grid).getSizeY() - bnd; ++j) \
for (int i = bnd; i < (grid).getSizeX() - bnd; ++i)
-#define FOR_NEIGHBORS_BND(grid, radius, bnd) \
- for (int zj = ((grid).is3D() ? std::max(bnd, k - radius) : 0); \
- zj <= ((grid).is3D() ? std::min(k + radius, (grid).getSizeZ() - 1 - bnd) : 0); \
- zj++) \
- for (int yj = std::max(bnd, j - radius); \
- yj <= std::min(j + radius, (grid).getSizeY() - 1 - bnd); \
- yj++) \
- for (int xj = std::max(bnd, i - radius); \
- xj <= std::min(i + radius, (grid).getSizeX() - 1 - bnd); \
- xj++)
-
-#define FOR_NEIGHBORS(grid, radius) FOR_NEIGHBORS_BND(grid, radius, 0)
-
//! Basic data structure for kernel data, initialized based on kernel type (e.g. single, idx, etc).
struct KernelBase {
int maxX, maxY, maxZ, minZ, maxT, minT;
diff --git a/extern/mantaflow/preprocessed/levelset.cpp b/extern/mantaflow/preprocessed/levelset.cpp
index c4fecace7ca..1ce18fab3f6 100644
--- a/extern/mantaflow/preprocessed/levelset.cpp
+++ b/extern/mantaflow/preprocessed/levelset.cpp
@@ -62,7 +62,7 @@ struct InitFmIn : public KernelBase {
Grid<int> &fmFlags,
Grid<Real> &phi,
bool ignoreWalls,
- int obstacleType) const
+ int obstacleType)
{
const IndexInt idx = flags.index(i, j, k);
const Real v = phi[idx];
@@ -104,37 +104,35 @@ struct InitFmIn : public KernelBase {
return obstacleType;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel InitFmIn ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<int> &fmFlags;
Grid<Real> &phi;
@@ -165,7 +163,7 @@ struct InitFmOut : public KernelBase {
Grid<int> &fmFlags,
Grid<Real> &phi,
bool ignoreWalls,
- int obstacleType) const
+ int obstacleType)
{
const IndexInt idx = flags.index(i, j, k);
const Real v = phi[idx];
@@ -205,37 +203,35 @@ struct InitFmOut : public KernelBase {
return obstacleType;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel InitFmOut ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<int> &fmFlags;
Grid<Real> &phi;
@@ -269,7 +265,7 @@ struct SetUninitialized : public KernelBase {
Grid<Real> &phi,
const Real val,
int ignoreWalls,
- int obstacleType) const
+ int obstacleType)
{
if (ignoreWalls) {
if ((fmFlags(i, j, k) != FlagInited) && ((flags(i, j, k) & obstacleType) == 0)) {
@@ -311,37 +307,35 @@ struct SetUninitialized : public KernelBase {
return obstacleType;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel SetUninitialized ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<int> &flags;
Grid<int> &fmFlags;
Grid<Real> &phi;
@@ -371,7 +365,8 @@ inline bool isAtInterface(const Grid<int> &fmFlags, Grid<Real> &phi, const Vec3i
//************************************************************************
// Levelset class def
-LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show) : Grid<Real>(parent, show)
+LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show, bool offload)
+ : Grid<Real>(parent, show, false /*sparse*/, offload)
{
mType = (GridType)(TypeLevelset | TypeReal);
}
@@ -394,7 +389,7 @@ struct KnJoin : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b) const
+ inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b)
{
a[idx] = min(a[idx], b[idx]);
}
@@ -408,21 +403,17 @@ struct KnJoin : public KernelBase {
return b;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel KnJoin ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, b);
+ }
}
Grid<Real> &a;
const Grid<Real> &b;
@@ -440,11 +431,8 @@ struct KnSubtract : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx,
- Grid<Real> &a,
- const Grid<Real> &b,
- const FlagGrid *flags,
- int subtractType) const
+ inline void op(
+ IndexInt idx, Grid<Real> &a, const Grid<Real> &b, const FlagGrid *flags, int subtractType)
{
if (flags && ((*flags)(idx)&subtractType) == 0)
return;
@@ -471,21 +459,17 @@ struct KnSubtract : public KernelBase {
return subtractType;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel KnSubtract ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, b, flags, subtractType);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, b, flags, subtractType);
+ }
}
Grid<Real> &a;
const Grid<Real> &b;
@@ -668,7 +652,7 @@ struct KnFillApply : public KernelBase {
Grid<int> &visited,
const Real value,
const int boundaryWidth,
- const bool outside) const
+ const bool outside)
{
if (visited(i, j, k) == ID_VISITED)
@@ -706,37 +690,35 @@ struct KnFillApply : public KernelBase {
return outside;
}
typedef bool type4;
- void runMessage()
- {
- debMsg("Executing kernel KnFillApply ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = boundaryWidth; j < _maxY; j++)
- for (int i = boundaryWidth; i < _maxX; i++)
- op(i, j, k, target, visited, value, boundaryWidth, outside);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = boundaryWidth; j < _maxY; j++)
+ for (int i = boundaryWidth; i < _maxX; i++)
+ op(i, j, k, target, visited, value, boundaryWidth, outside);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = boundaryWidth; i < _maxX; i++)
- op(i, j, k, target, visited, value, boundaryWidth, outside);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = boundaryWidth; j < _maxY; j++)
+ for (int i = boundaryWidth; i < _maxX; i++)
+ op(i, j, k, target, visited, value, boundaryWidth, outside);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this);
- }
Grid<Real> &target;
Grid<int> &visited;
const Real value;
diff --git a/extern/mantaflow/preprocessed/levelset.h b/extern/mantaflow/preprocessed/levelset.h
index ae162f73c3d..eea604ca2fd 100644
--- a/extern/mantaflow/preprocessed/levelset.h
+++ b/extern/mantaflow/preprocessed/levelset.h
@@ -27,7 +27,7 @@ class Mesh;
//! Special function for levelsets
class LevelsetGrid : public Grid<Real> {
public:
- LevelsetGrid(FluidSolver *parent, bool show = true);
+ LevelsetGrid(FluidSolver *parent, bool show = true, bool offload = false);
static int _W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
PbClass *obj = Pb::objFromPy(_self);
@@ -41,7 +41,8 @@ class LevelsetGrid : public Grid<Real> {
ArgLocker _lock;
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock);
- obj = new LevelsetGrid(parent, show);
+ bool offload = _args.getOpt<bool>("offload", 2, false, &_lock);
+ obj = new LevelsetGrid(parent, show, offload);
obj->registerObject(_self, &_args);
_args.check();
}
@@ -266,7 +267,6 @@ class LevelsetGrid : public Grid<Real> {
}
static Real invalidTimeValue();
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/mesh.cpp b/extern/mantaflow/preprocessed/mesh.cpp
index 79ddc4bd879..7eb69438e7f 100644
--- a/extern/mantaflow/preprocessed/mesh.cpp
+++ b/extern/mantaflow/preprocessed/mesh.cpp
@@ -344,22 +344,19 @@ void Mesh::rebuildChannels()
mNodeChannels[i]->resize(mNodes.size());
}
-struct _KnAdvectMeshInGrid : public KernelBase {
- _KnAdvectMeshInGrid(const KernelBase &base,
- vector<Node> &nodes,
- const FlagGrid &flags,
- const MACGrid &vel,
- const Real dt,
- vector<Vec3> &u)
- : KernelBase(base), nodes(nodes), flags(flags), vel(vel), dt(dt), u(u)
+struct KnAdvectMeshInGrid : public KernelBase {
+ KnAdvectMeshInGrid(vector<Node> &nodes, const FlagGrid &flags, const MACGrid &vel, const Real dt)
+ : KernelBase(nodes.size()), nodes(nodes), flags(flags), vel(vel), dt(dt), u((size))
{
+ runMessage();
+ run();
}
inline void op(IndexInt idx,
vector<Node> &nodes,
const FlagGrid &flags,
const MACGrid &vel,
const Real dt,
- vector<Vec3> &u) const
+ vector<Vec3> &u)
{
if (nodes[idx].flags & Mesh::NfFixed)
u[idx] = 0.0;
@@ -368,38 +365,6 @@ struct _KnAdvectMeshInGrid : public KernelBase {
else
u[idx] = vel.getInterpolated(nodes[idx].pos) * dt;
}
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, nodes, flags, vel, dt, u);
- }
- void run()
- {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- vector<Node> &nodes;
- const FlagGrid &flags;
- const MACGrid &vel;
- const Real dt;
- vector<Vec3> &u;
-};
-struct KnAdvectMeshInGrid : public KernelBase {
- KnAdvectMeshInGrid(vector<Node> &nodes, const FlagGrid &flags, const MACGrid &vel, const Real dt)
- : KernelBase(nodes.size()),
- _inner(KernelBase(nodes.size()), nodes, flags, vel, dt, u),
- nodes(nodes),
- flags(flags),
- vel(vel),
- dt(dt),
- u((size))
- {
- runMessage();
- run();
- }
- void run()
- {
- _inner.run();
- }
inline operator vector<Vec3>()
{
return u;
@@ -428,14 +393,18 @@ struct KnAdvectMeshInGrid : public KernelBase {
return dt;
}
typedef Real type3;
- void runMessage()
+ void runMessage(){};
+ void run()
{
- debMsg("Executing kernel KnAdvectMeshInGrid ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- _KnAdvectMeshInGrid _inner;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, nodes, flags, vel, dt, u);
+ }
+ }
vector<Node> &nodes;
const FlagGrid &flags;
const MACGrid &vel;
@@ -1012,7 +981,7 @@ template<class T> struct ApplyMeshToGrid : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, Grid<T> *grid, Grid<Real> &sdf, T value, FlagGrid *respectFlags) const
+ int i, int j, int k, Grid<T> *grid, Grid<Real> &sdf, T value, FlagGrid *respectFlags)
{
if (respectFlags && respectFlags->isObstacle(i, j, k))
return;
@@ -1040,37 +1009,35 @@ template<class T> struct ApplyMeshToGrid : public KernelBase {
return respectFlags;
}
typedef FlagGrid type3;
- void runMessage()
- {
- debMsg("Executing kernel ApplyMeshToGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, sdf, value, respectFlags);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, sdf, value, respectFlags);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, sdf, value, respectFlags);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, sdf, value, respectFlags);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> *grid;
Grid<Real> &sdf;
T value;
@@ -1431,7 +1398,7 @@ template<class T> struct knSetMdataConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value)
{
mdata[idx] = value;
}
@@ -1445,21 +1412,17 @@ template<class T> struct knSetMdataConst : public KernelBase {
return value;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knSetMdataConst ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, mdata, value);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, mdata, value);
+ }
}
MeshDataImpl<T> &mdata;
T value;
@@ -1472,7 +1435,7 @@ template<class T, class S> struct knMdataSet : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other)
{
me[idx] += other[idx];
}
@@ -1486,21 +1449,17 @@ template<class T, class S> struct knMdataSet : public KernelBase {
return other;
}
typedef MeshDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSet ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<S> &other;
@@ -1512,7 +1471,7 @@ template<class T, class S> struct knMdataAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other)
{
me[idx] += other[idx];
}
@@ -1526,21 +1485,17 @@ template<class T, class S> struct knMdataAdd : public KernelBase {
return other;
}
typedef MeshDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataAdd ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<S> &other;
@@ -1552,7 +1507,7 @@ template<class T, class S> struct knMdataSub : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other)
{
me[idx] -= other[idx];
}
@@ -1566,21 +1521,17 @@ template<class T, class S> struct knMdataSub : public KernelBase {
return other;
}
typedef MeshDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSub ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<S> &other;
@@ -1592,7 +1543,7 @@ template<class T, class S> struct knMdataMult : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other)
{
me[idx] *= other[idx];
}
@@ -1606,21 +1557,17 @@ template<class T, class S> struct knMdataMult : public KernelBase {
return other;
}
typedef MeshDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataMult ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<S> &other;
@@ -1632,7 +1579,7 @@ template<class T, class S> struct knMdataDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<S> &other)
{
me[idx] /= other[idx];
}
@@ -1646,21 +1593,17 @@ template<class T, class S> struct knMdataDiv : public KernelBase {
return other;
}
typedef MeshDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataDiv ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<S> &other;
@@ -1673,7 +1616,7 @@ template<class T, class S> struct knMdataSetScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other)
{
me[idx] = other;
}
@@ -1687,21 +1630,17 @@ template<class T, class S> struct knMdataSetScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSetScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const S &other;
@@ -1713,7 +1652,7 @@ template<class T, class S> struct knMdataAddScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other)
{
me[idx] += other;
}
@@ -1727,21 +1666,17 @@ template<class T, class S> struct knMdataAddScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataAddScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const S &other;
@@ -1753,7 +1688,7 @@ template<class T, class S> struct knMdataMultScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const S &other)
{
me[idx] *= other;
}
@@ -1767,21 +1702,17 @@ template<class T, class S> struct knMdataMultScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataMultScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const S &other;
@@ -1793,10 +1724,7 @@ template<class T, class S> struct knMdataScaledAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx,
- MeshDataImpl<T> &me,
- const MeshDataImpl<T> &other,
- const S &factor) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other, const S &factor)
{
me[idx] += factor * other[idx];
}
@@ -1815,21 +1743,17 @@ template<class T, class S> struct knMdataScaledAdd : public KernelBase {
return factor;
}
typedef S type2;
- void runMessage()
- {
- debMsg("Executing kernel knMdataScaledAdd ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, factor);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, factor);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<T> &other;
@@ -1843,7 +1767,7 @@ template<class T> struct knMdataSafeDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const MeshDataImpl<T> &other)
{
me[idx] = safeDivide(me[idx], other[idx]);
}
@@ -1857,21 +1781,17 @@ template<class T> struct knMdataSafeDiv : public KernelBase {
return other;
}
typedef MeshDataImpl<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSafeDiv ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
MeshDataImpl<T> &me;
const MeshDataImpl<T> &other;
@@ -1883,7 +1803,7 @@ template<class T> struct knMdataSetConst : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &mdata, T value)
{
mdata[idx] = value;
}
@@ -1897,21 +1817,17 @@ template<class T> struct knMdataSetConst : public KernelBase {
return value;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSetConst ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, mdata, value);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, mdata, value);
+ }
}
MeshDataImpl<T> &mdata;
T value;
@@ -1924,7 +1840,7 @@ template<class T> struct knMdataClamp : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, T min, T max) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, T min, T max)
{
me[idx] = clamp(me[idx], min, max);
}
@@ -1943,21 +1859,17 @@ template<class T> struct knMdataClamp : public KernelBase {
return max;
}
typedef T type2;
- void runMessage()
- {
- debMsg("Executing kernel knMdataClamp ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, min, max);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, min, max);
+ }
}
MeshDataImpl<T> &me;
T min;
@@ -1969,7 +1881,7 @@ template<class T> struct knMdataClampMin : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmin) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmin)
{
me[idx] = std::max(vmin, me[idx]);
}
@@ -1983,21 +1895,17 @@ template<class T> struct knMdataClampMin : public KernelBase {
return vmin;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataClampMin ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmin);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmin);
+ }
}
MeshDataImpl<T> &me;
const T vmin;
@@ -2008,7 +1916,7 @@ template<class T> struct knMdataClampMax : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmax) const
+ inline void op(IndexInt idx, MeshDataImpl<T> &me, const T vmax)
{
me[idx] = std::min(vmax, me[idx]);
}
@@ -2022,21 +1930,17 @@ template<class T> struct knMdataClampMax : public KernelBase {
return vmax;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataClampMax ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmax);
+ }
}
MeshDataImpl<T> &me;
const T vmax;
@@ -2048,7 +1952,7 @@ struct knMdataClampMinVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmin) const
+ inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmin)
{
me[idx].x = std::max(vmin, me[idx].x);
me[idx].y = std::max(vmin, me[idx].y);
@@ -2064,21 +1968,17 @@ struct knMdataClampMinVec3 : public KernelBase {
return vmin;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataClampMinVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmin);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmin);
+ }
}
MeshDataImpl<Vec3> &me;
const Real vmin;
@@ -2090,7 +1990,7 @@ struct knMdataClampMaxVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmax) const
+ inline void op(IndexInt idx, MeshDataImpl<Vec3> &me, const Real vmax)
{
me[idx].x = std::min(vmax, me[idx].x);
me[idx].y = std::min(vmax, me[idx].y);
@@ -2106,21 +2006,17 @@ struct knMdataClampMaxVec3 : public KernelBase {
return vmax;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knMdataClampMaxVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmax);
+ }
}
MeshDataImpl<Vec3> &me;
const Real vmax;
@@ -2162,7 +2058,7 @@ template<class T, class S> struct knMdataSetScalarIntFlag : public KernelBase {
MeshDataImpl<T> &me,
const S &other,
const MeshDataImpl<int> &t,
- const int itype) const
+ const int itype)
{
if (t[idx] & itype)
me[idx] = other;
@@ -2187,21 +2083,17 @@ template<class T, class S> struct knMdataSetScalarIntFlag : public KernelBase {
return itype;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel knMdataSetScalarIntFlag ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, t, itype);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, t, itype);
+ }
}
MeshDataImpl<T> &me;
const S &other;
@@ -2311,29 +2203,21 @@ template<typename T> struct KnPtsSum : public KernelBase {
return itype;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSum ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, t, itype, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSum(KnPtsSum &o, tbb::split)
- : KernelBase(o), val(o.val), t(o.t), itype(o.itype), result(T(0.))
- {
- }
- void join(const KnPtsSum &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ T result = T(0.);
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, t, itype, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const MeshDataImpl<T> &val;
const MeshDataImpl<int> *t;
@@ -2363,28 +2247,21 @@ template<typename T> struct KnPtsSumSquare : public KernelBase {
return val;
}
typedef MeshDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSumSquare ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSumSquare(KnPtsSumSquare &o, tbb::split) : KernelBase(o), val(o.val), result(0.)
- {
- }
- void join(const KnPtsSumSquare &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real result = 0.;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const MeshDataImpl<T> &val;
Real result;
@@ -2412,28 +2289,21 @@ template<typename T> struct KnPtsSumMagnitude : public KernelBase {
return val;
}
typedef MeshDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSumMagnitude ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSumMagnitude(KnPtsSumMagnitude &o, tbb::split) : KernelBase(o), val(o.val), result(0.)
- {
- }
- void join(const KnPtsSumMagnitude &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real result = 0.;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const MeshDataImpl<T> &val;
Real result;
@@ -2479,29 +2349,21 @@ struct CompMdata_Min : public KernelBase {
return val;
}
typedef MeshDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMdata_Min ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMdata_Min(CompMdata_Min &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMdata_Min &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const MeshDataImpl<T> &val;
Real minVal;
@@ -2534,29 +2396,21 @@ struct CompMdata_Max : public KernelBase {
return val;
}
typedef MeshDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMdata_Max ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMdata_Max(CompMdata_Max &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMdata_Max &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const MeshDataImpl<T> &val;
Real maxVal;
@@ -2632,29 +2486,21 @@ struct CompMdata_MinVec3 : public KernelBase {
return val;
}
typedef MeshDataImpl<Vec3> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMdata_MinVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMdata_MinVec3(CompMdata_MinVec3 &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const CompMdata_MinVec3 &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const MeshDataImpl<Vec3> &val;
Real minVal;
@@ -2686,29 +2532,21 @@ struct CompMdata_MaxVec3 : public KernelBase {
return val;
}
typedef MeshDataImpl<Vec3> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompMdata_MaxVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompMdata_MaxVec3(CompMdata_MaxVec3 &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::min())
- {
- }
- void join(const CompMdata_MaxVec3 &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::min();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const MeshDataImpl<Vec3> &val;
Real maxVal;
diff --git a/extern/mantaflow/preprocessed/mesh.h b/extern/mantaflow/preprocessed/mesh.h
index b5de66ce095..d3a69abc4ea 100644
--- a/extern/mantaflow/preprocessed/mesh.h
+++ b/extern/mantaflow/preprocessed/mesh.h
@@ -796,7 +796,6 @@ class Mesh : public PbClass {
std::vector<MeshDataImpl<int> *>
mMdataInt; //! indicate that mdata of this mesh is copied, and needs to be freed
bool mFreeMdata;
-
public:
PbArgs _args;
}
@@ -882,7 +881,6 @@ class MeshDataBase : public PbClass {
protected:
Mesh *mMesh;
-
public:
PbArgs _args;
}
@@ -1647,7 +1645,6 @@ template<class T> class MeshDataImpl : public MeshDataBase {
//! optionally , we might have an associated grid from which to grab new data
Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3
bool mGridSourceMAC;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/movingobs.h b/extern/mantaflow/preprocessed/movingobs.h
index 83ef6ed0c9f..0661ddf5b37 100644
--- a/extern/mantaflow/preprocessed/movingobs.h
+++ b/extern/mantaflow/preprocessed/movingobs.h
@@ -154,7 +154,6 @@ class MovingObstacle : public PbClass {
int mEmptyType;
int mID;
static int sIDcnt;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/multigrid.cpp b/extern/mantaflow/preprocessed/multigrid.cpp
index 9e35c6f9368..763eede540d 100644
--- a/extern/mantaflow/preprocessed/multigrid.cpp
+++ b/extern/mantaflow/preprocessed/multigrid.cpp
@@ -428,7 +428,7 @@ struct knCopyA : public KernelBase {
const Grid<Real> *pA0,
const Grid<Real> *pAi,
const Grid<Real> *pAj,
- const Grid<Real> *pAk) const
+ const Grid<Real> *pAk)
{
A0[idx * stencilSize0 + 0] = (*pA0)[idx];
A0[idx * stencilSize0 + 1] = (*pAi)[idx];
@@ -476,21 +476,17 @@ struct knCopyA : public KernelBase {
return pAk;
}
typedef Grid<Real> type7;
- void runMessage()
- {
- debMsg("Executing kernel knCopyA ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk);
+ }
}
std::vector<Real> &sizeRef;
std::vector<Real> &A0;
@@ -523,7 +519,7 @@ struct knActivateVertices : public KernelBase {
std::vector<Real> &A0,
bool &nonZeroStencilSumFound,
bool &trivialEquationsFound,
- const GridMg &mg) const
+ const GridMg &mg)
{
// active vertices on level 0 are vertices with non-zero diagonal entry in A
type_0[idx] = GridMg::vtInactive;
@@ -572,21 +568,17 @@ struct knActivateVertices : public KernelBase {
return mg;
}
typedef GridMg type4;
- void runMessage()
- {
- debMsg("Executing kernel knActivateVertices ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg);
+ }
}
std::vector<GridMg::VertexType> &type_0;
std::vector<Real> &A0;
@@ -642,7 +634,7 @@ struct knSetRhs : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) const
+ inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg)
{
b[idx] = rhs[idx];
@@ -666,21 +658,17 @@ struct knSetRhs : public KernelBase {
return mg;
}
typedef GridMg type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetRhs ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, b, rhs, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, b, rhs, mg);
+ }
}
std::vector<Real> &b;
const Grid<Real> &rhs;
@@ -702,7 +690,7 @@ template<class T> struct knSet : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<T> &data, T value) const
+ inline void op(IndexInt idx, std::vector<T> &data, T value)
{
data[idx] = value;
}
@@ -716,21 +704,17 @@ template<class T> struct knSet : public KernelBase {
return value;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knSet ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, data, value);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, data, value);
+ }
}
std::vector<T> &data;
T value;
@@ -743,7 +727,7 @@ template<class T> struct knCopyToVector : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) const
+ inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src)
{
dst[idx] = src[idx];
}
@@ -757,21 +741,17 @@ template<class T> struct knCopyToVector : public KernelBase {
return src;
}
typedef Grid<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knCopyToVector ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, dst, src);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, dst, src);
+ }
}
std::vector<T> &dst;
const Grid<T> &src;
@@ -784,7 +764,7 @@ template<class T> struct knCopyToGrid : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) const
+ inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst)
{
dst[idx] = src[idx];
}
@@ -798,21 +778,17 @@ template<class T> struct knCopyToGrid : public KernelBase {
return dst;
}
typedef Grid<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knCopyToGrid ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, src, dst);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, src, dst);
+ }
}
const std::vector<T> &src;
Grid<T> &dst;
@@ -825,7 +801,7 @@ template<class T> struct knAddAssign : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) const
+ inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src)
{
dst[idx] += src[idx];
}
@@ -839,21 +815,17 @@ template<class T> struct knAddAssign : public KernelBase {
return src;
}
typedef std::vector<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knAddAssign ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, dst, src);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, dst, src);
+ }
}
std::vector<T> &dst;
const std::vector<T> &src;
@@ -930,7 +902,7 @@ struct knActivateCoarseVertices : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) const
+ inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused)
{
// set all remaining 'free' vertices to 'removed',
if (type[idx] == GridMg::vtFree)
@@ -952,21 +924,17 @@ struct knActivateCoarseVertices : public KernelBase {
return unused;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel knActivateCoarseVertices ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, type, unused);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, type, unused);
+ }
}
std::vector<GridMg::VertexType> &type;
int unused;
@@ -1052,11 +1020,8 @@ struct knGenCoarseGridOperator : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx,
- std::vector<Real> &sizeRef,
- std::vector<Real> &A,
- int l,
- const GridMg &mg) const
+ inline void op(
+ IndexInt idx, std::vector<Real> &sizeRef, std::vector<Real> &A, int l, const GridMg &mg)
{
if (mg.mType[l][idx] == GridMg::vtInactive)
return;
@@ -1178,21 +1143,17 @@ struct knGenCoarseGridOperator : public KernelBase {
return mg;
}
typedef GridMg type3;
- void runMessage()
- {
- debMsg("Executing kernel knGenCoarseGridOperator ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, sizeRef, A, l, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for schedule(static, 1)
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, sizeRef, A, l, mg);
+ }
}
std::vector<Real> &sizeRef;
std::vector<Real> &A;
@@ -1232,7 +1193,7 @@ struct knSmoothColor : public KernelBase {
const Vec3i &blockSize,
const std::vector<Vec3i> &colorOffs,
int l,
- const GridMg &mg) const
+ const GridMg &mg)
{
Vec3i blockOff(int(idx) % blockSize.x,
(int(idx) % (blockSize.x * blockSize.y)) / blockSize.x,
@@ -1318,21 +1279,17 @@ struct knSmoothColor : public KernelBase {
return mg;
}
typedef GridMg type5;
- void runMessage()
- {
- debMsg("Executing kernel knSmoothColor ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, numBlocks, x, blockSize, colorOffs, l, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for schedule(static, 1)
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, numBlocks, x, blockSize, colorOffs, l, mg);
+ }
}
ThreadSize &numBlocks;
std::vector<Real> &x;
@@ -1386,7 +1343,7 @@ struct knCalcResidual : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) const
+ inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg)
{
if (mg.mType[l][idx] == GridMg::vtInactive)
return;
@@ -1443,21 +1400,17 @@ struct knCalcResidual : public KernelBase {
return mg;
}
typedef GridMg type2;
- void runMessage()
- {
- debMsg("Executing kernel knCalcResidual ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, r, l, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for schedule(static, 1)
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, r, l, mg);
+ }
}
std::vector<Real> &r;
int l;
@@ -1506,29 +1459,21 @@ struct knResidualNormSumSqr : public KernelBase {
return mg;
}
typedef GridMg type2;
- void runMessage()
- {
- debMsg("Executing kernel knResidualNormSumSqr ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, r, l, mg, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split)
- : KernelBase(o), r(o.r), l(o.l), mg(o.mg), result(Real(0))
- {
- }
- void join(const knResidualNormSumSqr &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real result = Real(0);
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, r, l, mg, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const vector<Real> &r;
int l;
@@ -1700,7 +1645,7 @@ struct knRestrict : public KernelBase {
std::vector<Real> &dst,
const std::vector<Real> &src,
int l_dst,
- const GridMg &mg) const
+ const GridMg &mg)
{
if (mg.mType[l_dst][idx] == GridMg::vtInactive)
return;
@@ -1746,21 +1691,17 @@ struct knRestrict : public KernelBase {
return mg;
}
typedef GridMg type3;
- void runMessage()
- {
- debMsg("Executing kernel knRestrict ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, dst, src, l_dst, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for schedule(static, 1)
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, dst, src, l_dst, mg);
+ }
}
std::vector<Real> &dst;
const std::vector<Real> &src;
@@ -1784,7 +1725,7 @@ struct knInterpolate : public KernelBase {
std::vector<Real> &dst,
const std::vector<Real> &src,
int l_dst,
- const GridMg &mg) const
+ const GridMg &mg)
{
if (mg.mType[l_dst][idx] == GridMg::vtInactive)
return;
@@ -1827,21 +1768,17 @@ struct knInterpolate : public KernelBase {
return mg;
}
typedef GridMg type3;
- void runMessage()
- {
- debMsg("Executing kernel knInterpolate ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, dst, src, l_dst, mg);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for schedule(static, 1)
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, dst, src, l_dst, mg);
+ }
}
std::vector<Real> &dst;
const std::vector<Real> &src;
diff --git a/extern/mantaflow/preprocessed/noisefield.h b/extern/mantaflow/preprocessed/noisefield.h
index 6ed8ac0012d..73c9de779ef 100644
--- a/extern/mantaflow/preprocessed/noisefield.h
+++ b/extern/mantaflow/preprocessed/noisefield.h
@@ -236,7 +236,6 @@ class WaveletNoiseField : public PbClass {
static int randomSeed;
// global reference count for noise tile
static std::atomic<int> mNoiseReferenceCount;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/particle.cpp b/extern/mantaflow/preprocessed/particle.cpp
index ad1c344d307..45234389cfe 100644
--- a/extern/mantaflow/preprocessed/particle.cpp
+++ b/extern/mantaflow/preprocessed/particle.cpp
@@ -455,7 +455,7 @@ template<class T, class S> struct knPdataAdd : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{
me[idx] += other[idx];
}
@@ -469,21 +469,17 @@ template<class T, class S> struct knPdataAdd : public KernelBase {
return other;
}
typedef ParticleDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataAdd ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other;
@@ -495,7 +491,7 @@ template<class T, class S> struct knPdataSub : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{
me[idx] -= other[idx];
}
@@ -509,21 +505,17 @@ template<class T, class S> struct knPdataSub : public KernelBase {
return other;
}
typedef ParticleDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataSub ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other;
@@ -535,7 +527,7 @@ template<class T, class S> struct knPdataMult : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{
me[idx] *= other[idx];
}
@@ -549,21 +541,17 @@ template<class T, class S> struct knPdataMult : public KernelBase {
return other;
}
typedef ParticleDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataMult ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other;
@@ -575,7 +563,7 @@ template<class T, class S> struct knPdataDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{
me[idx] /= other[idx];
}
@@ -589,21 +577,17 @@ template<class T, class S> struct knPdataDiv : public KernelBase {
return other;
}
typedef ParticleDataImpl<S> type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataDiv ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other;
@@ -615,7 +599,7 @@ template<class T> struct knPdataSafeDiv : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other)
{
me[idx] = safeDivide(me[idx], other[idx]);
}
@@ -629,21 +613,17 @@ template<class T> struct knPdataSafeDiv : public KernelBase {
return other;
}
typedef ParticleDataImpl<T> type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataSafeDiv ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<T> &other;
@@ -656,7 +636,7 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{
me[idx] = other;
}
@@ -670,21 +650,17 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataSetScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const S &other;
@@ -696,7 +672,7 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{
me[idx] += other;
}
@@ -710,21 +686,17 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataAddScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const S &other;
@@ -736,7 +708,7 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{
me[idx] *= other;
}
@@ -750,21 +722,17 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase {
return other;
}
typedef S type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataMultScalar ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other);
+ }
}
ParticleDataImpl<T> &me;
const S &other;
@@ -779,7 +747,7 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase {
inline void op(IndexInt idx,
ParticleDataImpl<T> &me,
const ParticleDataImpl<T> &other,
- const S &factor) const
+ const S &factor)
{
me[idx] += factor * other[idx];
}
@@ -798,21 +766,17 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase {
return factor;
}
typedef S type2;
- void runMessage()
- {
- debMsg("Executing kernel knPdataScaledAdd ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, factor);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, factor);
+ }
}
ParticleDataImpl<T> &me;
const ParticleDataImpl<T> &other;
@@ -826,7 +790,7 @@ template<class T> struct knPdataClamp : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax)
{
me[idx] = clamp(me[idx], vmin, vmax);
}
@@ -845,21 +809,17 @@ template<class T> struct knPdataClamp : public KernelBase {
return vmax;
}
typedef T type2;
- void runMessage()
- {
- debMsg("Executing kernel knPdataClamp ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmin, vmax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmin, vmax);
+ }
}
ParticleDataImpl<T> &me;
const T vmin;
@@ -872,7 +832,7 @@ template<class T> struct knPdataClampMin : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin)
{
me[idx] = std::max(vmin, me[idx]);
}
@@ -886,21 +846,17 @@ template<class T> struct knPdataClampMin : public KernelBase {
return vmin;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataClampMin ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmin);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmin);
+ }
}
ParticleDataImpl<T> &me;
const T vmin;
@@ -912,7 +868,7 @@ template<class T> struct knPdataClampMax : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax) const
+ inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax)
{
me[idx] = std::min(vmax, me[idx]);
}
@@ -926,21 +882,17 @@ template<class T> struct knPdataClampMax : public KernelBase {
return vmax;
}
typedef T type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataClampMax ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmax);
+ }
}
ParticleDataImpl<T> &me;
const T vmax;
@@ -953,7 +905,7 @@ struct knPdataClampMinVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin) const
+ inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin)
{
me[idx].x = std::max(vmin, me[idx].x);
me[idx].y = std::max(vmin, me[idx].y);
@@ -969,21 +921,17 @@ struct knPdataClampMinVec3 : public KernelBase {
return vmin;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataClampMinVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmin);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmin);
+ }
}
ParticleDataImpl<Vec3> &me;
const Real vmin;
@@ -996,7 +944,7 @@ struct knPdataClampMaxVec3 : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax) const
+ inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax)
{
me[idx].x = std::min(vmax, me[idx].x);
me[idx].y = std::min(vmax, me[idx].y);
@@ -1012,21 +960,17 @@ struct knPdataClampMaxVec3 : public KernelBase {
return vmax;
}
typedef Real type1;
- void runMessage()
- {
- debMsg("Executing kernel knPdataClampMaxVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, vmax);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, vmax);
+ }
}
ParticleDataImpl<Vec3> &me;
const Real vmax;
@@ -1071,7 +1015,7 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase {
ParticleDataImpl<T> &me,
const S &other,
const ParticleDataImpl<int> &t,
- const int itype) const
+ const int itype)
{
if (t[idx] & itype)
me[idx] = other;
@@ -1096,21 +1040,17 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase {
return itype;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel knPdataSetScalarIntFlag ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, t, itype);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, t, itype);
+ }
}
ParticleDataImpl<T> &me;
const S &other;
@@ -1223,29 +1163,21 @@ template<typename T> struct KnPtsSum : public KernelBase {
return itype;
}
typedef int type2;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSum ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, t, itype, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSum(KnPtsSum &o, tbb::split)
- : KernelBase(o), val(o.val), t(o.t), itype(o.itype), result(T(0.))
- {
- }
- void join(const KnPtsSum &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ T result = T(0.);
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, t, itype, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const ParticleDataImpl<T> &val;
const ParticleDataImpl<int> *t;
@@ -1275,28 +1207,21 @@ template<typename T> struct KnPtsSumSquare : public KernelBase {
return val;
}
typedef ParticleDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSumSquare ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSumSquare(KnPtsSumSquare &o, tbb::split) : KernelBase(o), val(o.val), result(0.)
- {
- }
- void join(const KnPtsSumSquare &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real result = 0.;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const ParticleDataImpl<T> &val;
Real result;
@@ -1324,28 +1249,21 @@ template<typename T> struct KnPtsSumMagnitude : public KernelBase {
return val;
}
typedef ParticleDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel KnPtsSumMagnitude ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, result);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- KnPtsSumMagnitude(KnPtsSumMagnitude &o, tbb::split) : KernelBase(o), val(o.val), result(0.)
- {
- }
- void join(const KnPtsSumMagnitude &o)
- {
- result += o.result;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real result = 0.;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, result);
+#pragma omp critical
+ {
+ this->result += result;
+ }
+ }
}
const ParticleDataImpl<T> &val;
Real result;
@@ -1392,29 +1310,21 @@ struct CompPdata_Min : public KernelBase {
return val;
}
typedef ParticleDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompPdata_Min ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompPdata_Min(CompPdata_Min &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const CompPdata_Min &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const ParticleDataImpl<T> &val;
Real minVal;
@@ -1447,29 +1357,21 @@ struct CompPdata_Max : public KernelBase {
return val;
}
typedef ParticleDataImpl<T> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompPdata_Max ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompPdata_Max(CompPdata_Max &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const CompPdata_Max &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const ParticleDataImpl<T> &val;
Real maxVal;
@@ -1545,29 +1447,21 @@ struct CompPdata_MinVec3 : public KernelBase {
return val;
}
typedef ParticleDataImpl<Vec3> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompPdata_MinVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, minVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompPdata_MinVec3(CompPdata_MinVec3 &o, tbb::split)
- : KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max())
- {
- }
- void join(const CompPdata_MinVec3 &o)
- {
- minVal = min(minVal, o.minVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real minVal = std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, minVal);
+#pragma omp critical
+ {
+ this->minVal = min(minVal, this->minVal);
+ }
+ }
}
const ParticleDataImpl<Vec3> &val;
Real minVal;
@@ -1599,29 +1493,21 @@ struct CompPdata_MaxVec3 : public KernelBase {
return val;
}
typedef ParticleDataImpl<Vec3> type0;
- void runMessage()
- {
- debMsg("Executing kernel CompPdata_MaxVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, val, maxVal);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CompPdata_MaxVec3(CompPdata_MaxVec3 &o, tbb::split)
- : KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max())
- {
- }
- void join(const CompPdata_MaxVec3 &o)
- {
- maxVal = max(maxVal, o.maxVal);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ Real maxVal = -std::numeric_limits<Real>::max();
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, val, maxVal);
+#pragma omp critical
+ {
+ this->maxVal = max(maxVal, this->maxVal);
+ }
+ }
}
const ParticleDataImpl<Vec3> &val;
Real maxVal;
diff --git a/extern/mantaflow/preprocessed/particle.h b/extern/mantaflow/preprocessed/particle.h
index 7e0c64e6d03..1e3abcaca20 100644
--- a/extern/mantaflow/preprocessed/particle.h
+++ b/extern/mantaflow/preprocessed/particle.h
@@ -205,7 +205,6 @@ class ParticleBase : public PbClass {
//! custom seed for particle systems, used by plugins
int mSeed; //! fix global random seed storage, used mainly by functions in this class
static int globalSeed;
-
public:
PbArgs _args;
}
@@ -629,7 +628,6 @@ template<class S> class ParticleSystem : public ParticleBase {
std::vector<S> mData;
//! reduce storage , called by doCompress
virtual void compress();
-
public:
PbArgs _args;
}
@@ -920,7 +918,6 @@ class ParticleIndexSystem : public ParticleSystem<ParticleIndexData> {
return -1;
}
};
-
public:
PbArgs _args;
}
@@ -985,7 +982,6 @@ template<class DATA, class CON> class ConnectedParticleSystem : public ParticleS
protected:
std::vector<CON> mSegments;
virtual void compress();
-
public:
PbArgs _args;
}
@@ -1075,7 +1071,6 @@ class ParticleDataBase : public PbClass {
protected:
ParticleBase *mpParticleSys;
-
public:
PbArgs _args;
}
@@ -1848,7 +1843,6 @@ template<class T> class ParticleDataImpl : public ParticleDataBase {
//! optionally , we might have an associated grid from which to grab new data
Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3
bool mGridSourceMAC;
-
public:
PbArgs _args;
}
@@ -1912,19 +1906,17 @@ template<class S> void ParticleSystem<S>::transformPositions(Vec3i dimOld, Vec3i
// check for deletion/invalid position, otherwise return velocity
-template<class S> struct _GridAdvectKernel : public KernelBase {
- _GridAdvectKernel(const KernelBase &base,
- std::vector<S> &p,
- const MACGrid &vel,
- const FlagGrid &flags,
- const Real dt,
- const bool deleteInObstacle,
- const bool stopInObstacle,
- const bool skipNew,
- const ParticleDataImpl<int> *ptype,
- const int exclude,
- std::vector<Vec3> &u)
- : KernelBase(base),
+template<class S> struct GridAdvectKernel : public KernelBase {
+ GridAdvectKernel(std::vector<S> &p,
+ const MACGrid &vel,
+ const FlagGrid &flags,
+ const Real dt,
+ const bool deleteInObstacle,
+ const bool stopInObstacle,
+ const bool skipNew,
+ const ParticleDataImpl<int> *ptype,
+ const int exclude)
+ : KernelBase(p.size()),
p(p),
vel(vel),
flags(flags),
@@ -1934,8 +1926,10 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
skipNew(skipNew),
ptype(ptype),
exclude(exclude),
- u(u)
+ u((size))
{
+ runMessage();
+ run();
}
inline void op(IndexInt idx,
std::vector<S> &p,
@@ -1947,7 +1941,7 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
const bool skipNew,
const ParticleDataImpl<int> *ptype,
const int exclude,
- std::vector<Vec3> &u) const
+ std::vector<Vec3> &u)
{
if ((p[idx].flag & ParticleBase::PDELETE) || (ptype && ((*ptype)[idx] & exclude)) ||
(skipNew && (p[idx].flag & ParticleBase::PNEW))) {
@@ -1968,66 +1962,6 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
}
u[idx] = vel.getInterpolated(p[idx].pos) * dt;
}
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u);
- }
- void run()
- {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- std::vector<S> &p;
- const MACGrid &vel;
- const FlagGrid &flags;
- const Real dt;
- const bool deleteInObstacle;
- const bool stopInObstacle;
- const bool skipNew;
- const ParticleDataImpl<int> *ptype;
- const int exclude;
- std::vector<Vec3> &u;
-};
-template<class S> struct GridAdvectKernel : public KernelBase {
- GridAdvectKernel(std::vector<S> &p,
- const MACGrid &vel,
- const FlagGrid &flags,
- const Real dt,
- const bool deleteInObstacle,
- const bool stopInObstacle,
- const bool skipNew,
- const ParticleDataImpl<int> *ptype,
- const int exclude)
- : KernelBase(p.size()),
- _inner(KernelBase(p.size()),
- p,
- vel,
- flags,
- dt,
- deleteInObstacle,
- stopInObstacle,
- skipNew,
- ptype,
- exclude,
- u),
- p(p),
- vel(vel),
- flags(flags),
- dt(dt),
- deleteInObstacle(deleteInObstacle),
- stopInObstacle(stopInObstacle),
- skipNew(skipNew),
- ptype(ptype),
- exclude(exclude),
- u((size))
- {
- runMessage();
- run();
- }
- void run()
- {
- _inner.run();
- }
inline operator std::vector<Vec3>()
{
return u;
@@ -2081,14 +2015,18 @@ template<class S> struct GridAdvectKernel : public KernelBase {
return exclude;
}
typedef int type8;
- void runMessage()
+ void runMessage(){};
+ void run()
{
- debMsg("Executing kernel GridAdvectKernel ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- _GridAdvectKernel<S> _inner;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u);
+ }
+ }
std::vector<S> &p;
const MACGrid &vel;
const FlagGrid &flags;
@@ -2112,7 +2050,7 @@ template<class S> struct KnDeleteInObstacle : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags) const
+ inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags)
{
if (p[idx].flag & ParticleBase::PDELETE)
return;
@@ -2130,21 +2068,17 @@ template<class S> struct KnDeleteInObstacle : public KernelBase {
return flags;
}
typedef FlagGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel KnDeleteInObstacle ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, flags);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, flags);
+ }
}
std::vector<S> &p;
const FlagGrid &flags;
@@ -2189,7 +2123,7 @@ template<class S> struct KnClampPositions : public KernelBase {
ParticleDataImpl<Vec3> *posOld = nullptr,
bool stopInObstacle = true,
const ParticleDataImpl<int> *ptype = nullptr,
- const int exclude = 0) const
+ const int exclude = 0)
{
if (p[idx].flag & ParticleBase::PDELETE)
return;
@@ -2235,21 +2169,17 @@ template<class S> struct KnClampPositions : public KernelBase {
return exclude;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnClampPositions ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, flags, posOld, stopInObstacle, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, flags, posOld, stopInObstacle, ptype, exclude);
+ }
}
std::vector<S> &p;
const FlagGrid &flags;
@@ -2341,13 +2271,7 @@ template<class S> struct KnProjectParticles : public KernelBase {
return rand;
}
typedef RandomStream type2;
- void runMessage()
- {
- debMsg("Executing kernel KnProjectParticles ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const IndexInt _sz = size;
@@ -2389,7 +2313,7 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase {
const Real bnd,
const bool *axis,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (!part.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return;
@@ -2438,21 +2362,17 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase {
return exclude;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnProjectOutOfBnd ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, part, flags, bnd, axis, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, part, flags, bnd, axis, ptype, exclude);
+ }
}
ParticleSystem<S> &part;
const FlagGrid &flags;
diff --git a/extern/mantaflow/preprocessed/plugin/advection.cpp b/extern/mantaflow/preprocessed/plugin/advection.cpp
index dd891e22088..6a548841bef 100644
--- a/extern/mantaflow/preprocessed/plugin/advection.cpp
+++ b/extern/mantaflow/preprocessed/plugin/advection.cpp
@@ -59,7 +59,7 @@ template<class T> struct SemiLagrange : public KernelBase {
Real dt,
bool isLevelset,
int orderSpace,
- int orderTrace) const
+ int orderTrace)
{
if (orderTrace == 1) {
// traceback position
@@ -117,37 +117,35 @@ template<class T> struct SemiLagrange : public KernelBase {
return orderTrace;
}
typedef int type7;
- void runMessage()
- {
- debMsg("Executing kernel SemiLagrange ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
Grid<T> &dst;
@@ -189,7 +187,7 @@ struct SemiLagrangeMAC : public KernelBase {
const MACGrid &src,
Real dt,
int orderSpace,
- int orderTrace) const
+ int orderTrace)
{
if (orderTrace == 1) {
// get currect velocity at MAC position
@@ -259,37 +257,35 @@ struct SemiLagrangeMAC : public KernelBase {
return orderTrace;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel SemiLagrangeMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
MACGrid &dst;
@@ -331,7 +327,7 @@ template<class T> struct MacCormackCorrect : public KernelBase {
const Grid<T> &bwd,
Real strength,
bool isLevelSet,
- bool isMAC = false) const
+ bool isMAC = false)
{
dst[idx] = fwd[idx];
@@ -380,21 +376,17 @@ template<class T> struct MacCormackCorrect : public KernelBase {
return isMAC;
}
typedef bool type7;
- void runMessage()
- {
- debMsg("Executing kernel MacCormackCorrect ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
+ }
}
const FlagGrid &flags;
Grid<T> &dst;
@@ -440,7 +432,7 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase {
const Grid<T> &bwd,
Real strength,
bool isLevelSet,
- bool isMAC = false) const
+ bool isMAC = false)
{
bool skip[3] = {false, false, false};
@@ -505,37 +497,35 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase {
return isMAC;
}
typedef bool type7;
- void runMessage()
- {
- debMsg("Executing kernel MacCormackCorrectMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<T> &dst;
const Grid<T> &old;
@@ -762,7 +752,7 @@ template<class T> struct MacCormackClamp : public KernelBase {
const Grid<T> &orig,
const Grid<T> &fwd,
Real dt,
- const int clampMode) const
+ const int clampMode)
{
T dval = dst(i, j, k);
Vec3i gridUpper = flags.getSize() - 1;
@@ -830,37 +820,35 @@ template<class T> struct MacCormackClamp : public KernelBase {
return clampMode;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel MacCormackClamp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
Grid<T> &dst;
@@ -901,7 +889,7 @@ struct MacCormackClampMAC : public KernelBase {
const MACGrid &orig,
const MACGrid &fwd,
Real dt,
- const int clampMode) const
+ const int clampMode)
{
Vec3 pos(i, j, k);
Vec3 dval = dst(i, j, k);
@@ -957,37 +945,35 @@ struct MacCormackClampMAC : public KernelBase {
return clampMode;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel MacCormackClampMAC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
MACGrid &dst;
@@ -1016,27 +1002,39 @@ void fnAdvectSemiLagrange(FluidSolver *parent,
bool levelset = orig.getType() & GridBase::TypeLevelset;
// forward step
- GridType fwd(parent);
- SemiLagrange<T>(flags, vel, fwd, orig, dt, levelset, orderSpace, orderTrace);
+ GridType *fwd = new GridType(parent, true, false, false);
+ SemiLagrange<T>(flags, vel, *fwd, orig, dt, levelset, orderSpace, orderTrace);
if (order == 1) {
- orig.swap(fwd);
+#if OPENMP && OPENMP_OFFLOAD
+ orig.copyFrom(*fwd, true, false);
+#else
+ orig.swap(*fwd);
+#endif
}
else if (order == 2) { // MacCormack
GridType bwd(parent);
- GridType newGrid(parent);
+ GridType *newGrid = new GridType(parent, true, false, false);
// bwd <- backwards step
- SemiLagrange<T>(flags, vel, bwd, fwd, -dt, levelset, orderSpace, orderTrace);
+ SemiLagrange<T>(flags, vel, bwd, *fwd, -dt, levelset, orderSpace, orderTrace);
// newGrid <- compute correction
- MacCormackCorrect<T>(flags, newGrid, orig, fwd, bwd, strength, levelset);
+ MacCormackCorrect<T>(flags, *newGrid, orig, *fwd, bwd, strength, levelset);
// clamp values
- MacCormackClamp<T>(flags, vel, newGrid, orig, fwd, dt, clampMode);
-
- orig.swap(newGrid);
- }
+ MacCormackClamp<T>(flags, vel, *newGrid, orig, *fwd, dt, clampMode);
+
+#if OPENMP && OPENMP_OFFLOAD
+ orig.copyFrom(*newGrid, true, false);
+#else
+ orig.swap(*newGrid);
+#endif
+ if (newGrid)
+ delete newGrid;
+ }
+ if (fwd)
+ delete fwd;
}
// outflow functions
@@ -1087,7 +1085,7 @@ struct extrapolateVelConvectiveBC : public KernelBase {
const MACGrid &vel,
MACGrid &velDst,
const MACGrid &velPrev,
- Real timeStep) const
+ Real timeStep)
{
if (flags.isOutflow(i, j, k)) {
const Vec3 bulkVel = getBulkVel(flags, vel, i, j, k);
@@ -1154,37 +1152,35 @@ struct extrapolateVelConvectiveBC : public KernelBase {
return timeStep;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel extrapolateVelConvectiveBC ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velDst, velPrev, timeStep);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velDst, velPrev, timeStep);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velDst, velPrev, timeStep);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velDst, velPrev, timeStep);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
MACGrid &velDst;
@@ -1200,8 +1196,7 @@ struct copyChangedVels : public KernelBase {
runMessage();
run();
}
- inline void op(
- int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel) const
+ inline void op(int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel)
{
if (flags.isOutflow(i, j, k))
vel(i, j, k) = velDst(i, j, k);
@@ -1221,37 +1216,35 @@ struct copyChangedVels : public KernelBase {
return vel;
}
typedef MACGrid type2;
- void runMessage()
- {
- debMsg("Executing kernel copyChangedVels ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, velDst, vel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, velDst, vel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, velDst, vel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, velDst, vel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &velDst;
MACGrid &vel;
@@ -1275,7 +1268,7 @@ struct knResetPhiInObs : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf) const
+ inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf)
{
if (flags.isObstacle(i, j, k) && (sdf(i, j, k) < 0.)) {
sdf(i, j, k) = 0.1;
@@ -1291,37 +1284,35 @@ struct knResetPhiInObs : public KernelBase {
return sdf;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel knResetPhiInObs ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, sdf);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, sdf);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, sdf);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, sdf);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &sdf;
};
@@ -1378,33 +1369,45 @@ void fnAdvectSemiLagrange<MACGrid>(FluidSolver *parent,
Real dt = parent->getDt();
// forward step
- MACGrid fwd(parent);
- SemiLagrangeMAC(flags, vel, fwd, orig, dt, orderSpace, orderTrace);
+ MACGrid *fwd = new MACGrid(parent, true, false, false);
+ SemiLagrangeMAC(flags, vel, *fwd, orig, dt, orderSpace, orderTrace);
if (orderSpace != 1) {
debMsg("Warning higher order for MAC grids not yet implemented...", 1);
}
if (order == 1) {
- applyOutflowBC(flags, fwd, orig, dt);
- orig.swap(fwd);
+ applyOutflowBC(flags, *fwd, orig, dt);
+#if OPENMP && OPENMP_OFFLOAD
+ orig.copyFrom(*fwd, true, false);
+#else
+ orig.swap(*fwd);
+#endif
}
else if (order == 2) { // MacCormack
MACGrid bwd(parent);
- MACGrid newGrid(parent);
+ MACGrid *newGrid = new MACGrid(parent, true, false, false);
// bwd <- backwards step
- SemiLagrangeMAC(flags, vel, bwd, fwd, -dt, orderSpace, orderTrace);
+ SemiLagrangeMAC(flags, vel, bwd, *fwd, -dt, orderSpace, orderTrace);
// newGrid <- compute correction
- MacCormackCorrectMAC<Vec3>(flags, newGrid, orig, fwd, bwd, strength, false, true);
+ MacCormackCorrectMAC<Vec3>(flags, *newGrid, orig, *fwd, bwd, strength, false, true);
// clamp values
- MacCormackClampMAC(flags, vel, newGrid, orig, fwd, dt, clampMode);
-
- applyOutflowBC(flags, newGrid, orig, dt);
- orig.swap(newGrid);
- }
+ MacCormackClampMAC(flags, vel, *newGrid, orig, *fwd, dt, clampMode);
+
+ applyOutflowBC(flags, *newGrid, orig, dt);
+#if OPENMP && OPENMP_OFFLOAD
+ orig.copyFrom(*newGrid, true, false);
+#else
+ orig.swap(*newGrid);
+#endif
+ if (newGrid)
+ delete newGrid;
+ }
+ if (fwd)
+ delete fwd;
}
//! Perform semi-lagrangian advection of target Real- or Vec3 grid
diff --git a/extern/mantaflow/preprocessed/plugin/apic.cpp b/extern/mantaflow/preprocessed/plugin/apic.cpp
index 9df6b597e05..5da507bb44b 100644
--- a/extern/mantaflow/preprocessed/plugin/apic.cpp
+++ b/extern/mantaflow/preprocessed/plugin/apic.cpp
@@ -239,13 +239,7 @@ struct knApicMapLinearVec3ToMACGrid : public KernelBase {
return boundaryWidth;
}
typedef int type9;
- void runMessage()
- {
- debMsg("Executing kernel knApicMapLinearVec3ToMACGrid ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const IndexInt _sz = size;
@@ -370,7 +364,7 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase {
const FlagGrid &flags,
const ParticleDataImpl<int> *ptype,
const int exclude,
- const int boundaryWidth) const
+ const int boundaryWidth)
{
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return;
@@ -509,21 +503,17 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase {
return boundaryWidth;
}
typedef int type9;
- void runMessage()
- {
- debMsg("Executing kernel knApicMapLinearMACGridToVec3 ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth);
+ }
}
ParticleDataImpl<Vec3> &vp;
ParticleDataImpl<Vec3> &cpx;
diff --git a/extern/mantaflow/preprocessed/plugin/extforces.cpp b/extern/mantaflow/preprocessed/plugin/extforces.cpp
index 88935fa7ae9..a0fb4659aa2 100644
--- a/extern/mantaflow/preprocessed/plugin/extforces.cpp
+++ b/extern/mantaflow/preprocessed/plugin/extforces.cpp
@@ -52,7 +52,7 @@ struct KnApplyForceField : public KernelBase {
const Grid<Vec3> &force,
const Grid<Real> *include,
bool additive,
- bool isMAC) const
+ bool isMAC)
{
bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k);
@@ -105,37 +105,35 @@ struct KnApplyForceField : public KernelBase {
return isMAC;
}
typedef bool type5;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyForceField ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force, include, additive, isMAC);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force, include, additive, isMAC);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force, include, additive, isMAC);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force, include, additive, isMAC);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
MACGrid &vel;
const Grid<Vec3> &force;
@@ -165,7 +163,7 @@ struct KnApplyForce : public KernelBase {
MACGrid &vel,
Vec3 force,
const Grid<Real> *exclude,
- bool additive) const
+ bool additive)
{
bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k);
@@ -206,37 +204,35 @@ struct KnApplyForce : public KernelBase {
return additive;
}
typedef bool type4;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyForce ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force, exclude, additive);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force, exclude, additive);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force, exclude, additive);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force, exclude, additive);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
MACGrid &vel;
Vec3 force;
@@ -346,7 +342,7 @@ struct KnAddBuoyancy : public KernelBase {
const FlagGrid &flags,
const Grid<Real> &factor,
MACGrid &vel,
- Vec3 strength) const
+ Vec3 strength)
{
if (!flags.isFluid(i, j, k))
return;
@@ -377,37 +373,35 @@ struct KnAddBuoyancy : public KernelBase {
return strength;
}
typedef Vec3 type3;
- void runMessage()
- {
- debMsg("Executing kernel KnAddBuoyancy ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, factor, vel, strength);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, factor, vel, strength);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, factor, vel, strength);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, factor, vel, strength);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const Grid<Real> &factor;
MACGrid &vel;
@@ -662,7 +656,7 @@ struct KnSetInflow : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val) const
+ inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val)
{
Vec3i p(i, j, k);
if (p[dim] == p0 || p[dim] == p0 + 1)
@@ -688,37 +682,35 @@ struct KnSetInflow : public KernelBase {
return val;
}
typedef Vec3 type3;
- void runMessage()
- {
- debMsg("Executing kernel KnSetInflow ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, vel, dim, p0, val);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, vel, dim, p0, val);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, vel, dim, p0, val);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, vel, dim, p0, val);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &vel;
int dim;
int p0;
@@ -784,8 +776,7 @@ struct KnSetWallBcs : public KernelBase {
runMessage();
run();
}
- inline void op(
- int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel) const
+ inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel)
{
bool curFluid = flags.isFluid(i, j, k);
@@ -848,37 +839,35 @@ struct KnSetWallBcs : public KernelBase {
return obvel;
}
typedef MACGrid type2;
- void runMessage()
- {
- debMsg("Executing kernel KnSetWallBcs ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, obvel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, obvel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, obvel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, obvel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
MACGrid &vel;
const MACGrid *obvel;
@@ -912,7 +901,7 @@ struct KnSetWallBcsFrac : public KernelBase {
MACGrid &velTarget,
const MACGrid *obvel,
const Grid<Real> *phiObs,
- const int &boundaryWidth = 0) const
+ const int &boundaryWidth = 0)
{
bool curFluid = flags.isFluid(i, j, k);
bool curObs = flags.isObstacle(i, j, k);
@@ -1025,37 +1014,35 @@ struct KnSetWallBcsFrac : public KernelBase {
return boundaryWidth;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnSetWallBcsFrac ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
MACGrid &velTarget;
@@ -1127,8 +1114,7 @@ struct KnAddForceIfLower : public KernelBase {
runMessage();
run();
}
- inline void op(
- int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force) const
+ inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force)
{
bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k);
@@ -1173,37 +1159,35 @@ struct KnAddForceIfLower : public KernelBase {
return force;
}
typedef Grid<Vec3> type2;
- void runMessage()
- {
- debMsg("Executing kernel KnAddForceIfLower ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, force);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, force);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
MACGrid &vel;
const Grid<Vec3> &force;
@@ -1266,7 +1250,7 @@ struct KnConfForce : public KernelBase {
const Grid<Real> &grid,
const Grid<Vec3> &curl,
Real str,
- const Grid<Real> *strGrid) const
+ const Grid<Real> *strGrid)
{
Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k),
grid(i, j + 1, k) - grid(i, j - 1, k),
@@ -1303,37 +1287,35 @@ struct KnConfForce : public KernelBase {
return strGrid;
}
typedef Grid<Real> type4;
- void runMessage()
- {
- debMsg("Executing kernel KnConfForce ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, force, grid, curl, str, strGrid);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, force, grid, curl, str, strGrid);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, force, grid, curl, str, strGrid);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, force, grid, curl, str, strGrid);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Vec3> &force;
const Grid<Real> &grid;
const Grid<Vec3> &curl;
@@ -1514,7 +1496,7 @@ struct KnDissolveSmoke : public KernelBase {
int speed,
bool logFalloff,
float dydx,
- float fac) const
+ float fac)
{
bool curFluid = flags.isFluid(i, j, k);
@@ -1602,37 +1584,35 @@ struct KnDissolveSmoke : public KernelBase {
return fac;
}
typedef float type9;
- void runMessage()
- {
- debMsg("Executing kernel KnDissolveSmoke ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &density;
Grid<Real> *heat;
diff --git a/extern/mantaflow/preprocessed/plugin/fire.cpp b/extern/mantaflow/preprocessed/plugin/fire.cpp
index f907bdf0504..8f924c57ddc 100644
--- a/extern/mantaflow/preprocessed/plugin/fire.cpp
+++ b/extern/mantaflow/preprocessed/plugin/fire.cpp
@@ -71,7 +71,7 @@ struct KnProcessBurn : public KernelBase {
Real ignitionTemp,
Real maxTemp,
Real dt,
- Vec3 flameSmokeColor) const
+ Vec3 flameSmokeColor)
{
// Save initial values
Real origFuel = fuel(i, j, k);
@@ -179,19 +179,44 @@ struct KnProcessBurn : public KernelBase {
return flameSmokeColor;
}
typedef Vec3 type12;
- void runMessage()
- {
- debMsg("Executing kernel KnProcessBurn ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ fuel,
+ density,
+ react,
+ red,
+ green,
+ blue,
+ heat,
+ burningRate,
+ flameSmoke,
+ ignitionTemp,
+ maxTemp,
+ dt,
+ flameSmokeColor);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
@@ -210,35 +235,8 @@ struct KnProcessBurn : public KernelBase {
maxTemp,
dt,
flameSmokeColor);
+ }
}
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i,
- j,
- k,
- fuel,
- density,
- react,
- red,
- green,
- blue,
- heat,
- burningRate,
- flameSmoke,
- ignitionTemp,
- maxTemp,
- dt,
- flameSmokeColor);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<Real> &fuel;
Grid<Real> &density;
@@ -344,7 +342,7 @@ struct KnUpdateFlame : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame) const
+ inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame)
{
if (react(i, j, k) > 0.0f)
flame(i, j, k) = pow(react(i, j, k), 0.5f);
@@ -361,37 +359,35 @@ struct KnUpdateFlame : public KernelBase {
return flame;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateFlame ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, react, flame);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, react, flame);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, react, flame);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, react, flame);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<Real> &react;
Grid<Real> &flame;
};
diff --git a/extern/mantaflow/preprocessed/plugin/flip.cpp b/extern/mantaflow/preprocessed/plugin/flip.cpp
index 8757958d4b0..e63d8236f28 100644
--- a/extern/mantaflow/preprocessed/plugin/flip.cpp
+++ b/extern/mantaflow/preprocessed/plugin/flip.cpp
@@ -283,7 +283,7 @@ struct knClearFluidFlags : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0) const
+ inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0)
{
if (flags.isFluid(i, j, k)) {
flags(i, j, k) = (flags(i, j, k) | FlagGrid::TypeEmpty) & ~FlagGrid::TypeFluid;
@@ -299,37 +299,35 @@ struct knClearFluidFlags : public KernelBase {
return dummy;
}
typedef int type1;
- void runMessage()
- {
- debMsg("Executing kernel knClearFluidFlags ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, dummy);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dummy);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, dummy);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, dummy);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
FlagGrid &flags;
int dummy;
};
@@ -342,7 +340,7 @@ struct knSetNbObstacle : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs) const
+ int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs)
{
if (phiObs(i, j, k) > 0.)
return;
@@ -381,37 +379,35 @@ struct knSetNbObstacle : public KernelBase {
return phiObs;
}
typedef Grid<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetNbObstacle ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, nflags, flags, phiObs);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, nflags, flags, phiObs);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, nflags, flags, phiObs);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, nflags, flags, phiObs);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
FlagGrid &nflags;
const FlagGrid &flags;
const Grid<Real> &phiObs;
@@ -438,7 +434,11 @@ void markFluidCells(const BasicParticleSystem &parts,
if (phiObs) {
FlagGrid tmp(flags);
knSetNbObstacle(tmp, flags, *phiObs);
+#if OPENMP && OPENMP_OFFLOAD
+ flags.copyFrom(tmp, true, false);
+#else
flags.swap(tmp);
+#endif
}
}
static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
@@ -818,33 +818,37 @@ struct ComputeUnionLevelsetPindex : public KernelBase {
LevelsetGrid &phi,
const Real radius,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell
Real phiv = radius * 1.0; // outside
- const int r = int(radius) + 1;
- FOR_NEIGHBORS(phi, r)
- {
+ int r = int(radius) + 1;
+ int rZ = phi.is3D() ? r : 0;
+ for (int zj = k - rZ; zj <= k + rZ; zj++)
+ for (int yj = j - r; yj <= j + r; yj++)
+ for (int xj = i - r; xj <= i + r; xj++) {
+ if (!phi.isInBounds(Vec3i(xj, yj, zj)))
+ continue;
- // note, for the particle indices in indexSys the access is periodic (ie, dont skip for eg
- // inBounds(sx,10,10)
- IndexInt isysIdxS = index.index(xj, yj, zj);
- IndexInt pStart = index(isysIdxS), pEnd = 0;
- if (phi.isInBounds(isysIdxS + 1))
- pEnd = index(isysIdxS + 1);
- else
- pEnd = indexSys.size();
-
- // now loop over particles in cell
- for (IndexInt p = pStart; p < pEnd; ++p) {
- const int psrc = indexSys[p].sourceIndex;
- if (ptype && ((*ptype)[psrc] & exclude))
- continue;
- const Vec3 pos = parts[psrc].pos;
- phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius);
- }
- }
+ // note, for the particle indices in indexSys the access is periodic (ie, dont skip for
+ // eg inBounds(sx,10,10)
+ IndexInt isysIdxS = index.index(xj, yj, zj);
+ IndexInt pStart = index(isysIdxS), pEnd = 0;
+ if (phi.isInBounds(isysIdxS + 1))
+ pEnd = index(isysIdxS + 1);
+ else
+ pEnd = indexSys.size();
+
+ // now loop over particles in cell
+ for (IndexInt p = pStart; p < pEnd; ++p) {
+ const int psrc = indexSys[p].sourceIndex;
+ if (ptype && ((*ptype)[psrc] & exclude))
+ continue;
+ const Vec3 pos = parts[psrc].pos;
+ phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius);
+ }
+ }
phi(i, j, k) = phiv;
}
inline const Grid<int> &getArg0()
@@ -882,37 +886,35 @@ struct ComputeUnionLevelsetPindex : public KernelBase {
return exclude;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel ComputeUnionLevelsetPindex ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const Grid<int> &index;
const BasicParticleSystem &parts;
const ParticleIndexSystem &indexSys;
@@ -1015,42 +1017,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase {
const ParticleDataImpl<int> *ptype,
const int exclude,
Grid<Vec3> *save_pAcc = nullptr,
- Grid<Real> *save_rAcc = nullptr) const
+ Grid<Real> *save_rAcc = nullptr)
{
const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell
Real phiv = radius * 1.0; // outside
// loop over neighborhood, similar to ComputeUnionLevelsetPindex
const Real sradiusInv = 1. / (4. * radius * radius);
- const int r = int(radius) + 1;
+ int r = int(1. * radius) + 1;
+ int rZ = phi.is3D() ? r : 0;
// accumulators
Real wacc = 0.;
Vec3 pacc = Vec3(0.);
Real racc = 0.;
- FOR_NEIGHBORS(phi, r)
- {
+ for (int zj = k - rZ; zj <= k + rZ; zj++)
+ for (int yj = j - r; yj <= j + r; yj++)
+ for (int xj = i - r; xj <= i + r; xj++) {
+ if (!phi.isInBounds(Vec3i(xj, yj, zj)))
+ continue;
- IndexInt isysIdxS = index.index(xj, yj, zj);
- IndexInt pStart = index(isysIdxS), pEnd = 0;
- if (phi.isInBounds(isysIdxS + 1))
- pEnd = index(isysIdxS + 1);
- else
- pEnd = indexSys.size();
- for (IndexInt p = pStart; p < pEnd; ++p) {
- IndexInt psrc = indexSys[p].sourceIndex;
- if (ptype && ((*ptype)[psrc] & exclude))
- continue;
+ IndexInt isysIdxS = index.index(xj, yj, zj);
+ IndexInt pStart = index(isysIdxS), pEnd = 0;
+ if (phi.isInBounds(isysIdxS + 1))
+ pEnd = index(isysIdxS + 1);
+ else
+ pEnd = indexSys.size();
+ for (IndexInt p = pStart; p < pEnd; ++p) {
+ IndexInt psrc = indexSys[p].sourceIndex;
+ if (ptype && ((*ptype)[psrc] & exclude))
+ continue;
- Vec3 pos = parts[psrc].pos;
- Real s = normSquare(gridPos - pos) * sradiusInv;
- // Real w = std::max(0., cubed(1.-s) );
- Real w = std::max(0., (1. - s)); // a bit smoother
- wacc += w;
- racc += radius * w;
- pacc += pos * w;
- }
- }
+ Vec3 pos = parts[psrc].pos;
+ Real s = normSquare(gridPos - pos) * sradiusInv;
+ // Real w = std::max(0., cubed(1.-s) );
+ Real w = std::max(0., (1. - s)); // a bit smoother
+ wacc += w;
+ racc += radius * w;
+ pacc += pos * w;
+ }
+ }
if (wacc > VECTOR_EPSILON) {
racc /= wacc;
@@ -1109,37 +1115,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase {
return save_rAcc;
}
typedef Grid<Real> type8;
- void runMessage()
- {
- debMsg("Executing kernel ComputeAveragedLevelsetWeight ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ parts,
+ index,
+ indexSys,
+ phi,
+ radius,
+ ptype,
+ exclude,
+ save_pAcc,
+ save_rAcc);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const BasicParticleSystem &parts;
const Grid<int> &index;
const ParticleIndexSystem &indexSys;
@@ -1165,7 +1180,7 @@ template<class T> struct knSmoothGrid : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const
+ inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor)
{
T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k);
if (me.is3D()) {
@@ -1188,37 +1203,35 @@ template<class T> struct knSmoothGrid : public KernelBase {
return factor;
}
typedef Real type2;
- void runMessage()
- {
- debMsg("Executing kernel knSmoothGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, me, tmp, factor);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, me, tmp, factor);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, me, tmp, factor);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, me, tmp, factor);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<T> &me;
Grid<T> &tmp;
Real factor;
@@ -1231,7 +1244,7 @@ template<class T> struct knSmoothGridNeg : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const
+ inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor)
{
T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k);
if (me.is3D()) {
@@ -1258,37 +1271,35 @@ template<class T> struct knSmoothGridNeg : public KernelBase {
return factor;
}
typedef Real type2;
- void runMessage()
- {
- debMsg("Executing kernel knSmoothGridNeg ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, me, tmp, factor);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, me, tmp, factor);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, me, tmp, factor);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, me, tmp, factor);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<T> &me;
Grid<T> &tmp;
Real factor;
@@ -1396,7 +1407,7 @@ struct correctLevelset : public KernelBase {
const Grid<Real> &rAcc,
const Real radius,
const Real t_low,
- const Real t_high) const
+ const Real t_high)
{
if (rAcc(i, j, k) <= VECTOR_EPSILON)
return; // outside nothing happens
@@ -1463,37 +1474,35 @@ struct correctLevelset : public KernelBase {
return t_high;
}
typedef Real type5;
- void runMessage()
- {
- debMsg("Executing kernel correctLevelset ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
LevelsetGrid &phi;
const Grid<Vec3> &pAcc;
const Grid<Real> &rAcc;
@@ -1624,7 +1633,7 @@ struct knPushOutofObs : public KernelBase {
const Real shift,
const Real thresh,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (!parts.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return;
@@ -1675,21 +1684,17 @@ struct knPushOutofObs : public KernelBase {
return exclude;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel knPushOutofObs ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, parts, flags, phiObs, shift, thresh, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, parts, flags, phiObs, shift, thresh, ptype, exclude);
+ }
}
BasicParticleSystem &parts;
const FlagGrid &flags;
@@ -1759,10 +1764,7 @@ template<class T> struct knSafeDivReal : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx,
- Grid<T> &me,
- const Grid<Real> &other,
- Real cutoff = VECTOR_EPSILON) const
+ inline void op(IndexInt idx, Grid<T> &me, const Grid<Real> &other, Real cutoff = VECTOR_EPSILON)
{
if (other[idx] < cutoff) {
me[idx] = 0.;
@@ -1787,21 +1789,17 @@ template<class T> struct knSafeDivReal : public KernelBase {
return cutoff;
}
typedef Real type2;
- void runMessage()
- {
- debMsg("Executing kernel knSafeDivReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, me, other, cutoff);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, me, other, cutoff);
+ }
}
Grid<T> &me;
const Grid<Real> &other;
@@ -1879,13 +1877,7 @@ struct knMapLinearVec3ToMACGrid : public KernelBase {
return exclude;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel knMapLinearVec3ToMACGrid ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const IndexInt _sz = size;
@@ -2022,13 +2014,7 @@ template<class T> struct knMapLinear : public KernelBase {
return psource;
}
typedef ParticleDataImpl<T> type4;
- void runMessage()
- {
- debMsg("Executing kernel knMapLinear ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const IndexInt _sz = size;
@@ -2153,7 +2139,7 @@ template<class T> struct knMapFromGrid : public KernelBase {
inline void op(IndexInt idx,
const BasicParticleSystem &p,
const Grid<T> &gsrc,
- ParticleDataImpl<T> &target) const
+ ParticleDataImpl<T> &target)
{
if (!p.isActive(idx))
return;
@@ -2174,21 +2160,17 @@ template<class T> struct knMapFromGrid : public KernelBase {
return target;
}
typedef ParticleDataImpl<T> type2;
- void runMessage()
- {
- debMsg("Executing kernel knMapFromGrid ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, gsrc, target);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, gsrc, target);
+ }
}
const BasicParticleSystem &p;
const Grid<T> &gsrc;
@@ -2298,7 +2280,7 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase {
const MACGrid &vel,
ParticleDataImpl<Vec3> &pvel,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return;
@@ -2335,21 +2317,17 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase {
return exclude;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel knMapLinearMACGridToVec3_PIC ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, flags, vel, pvel, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, flags, vel, pvel, ptype, exclude);
+ }
}
const BasicParticleSystem &p;
const FlagGrid &flags;
@@ -2438,7 +2416,7 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase {
ParticleDataImpl<Vec3> &pvel,
const Real flipRatio,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return;
@@ -2486,21 +2464,17 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase {
return exclude;
}
typedef int type7;
- void runMessage()
- {
- debMsg("Executing kernel knMapLinearMACGridToVec3_FLIP ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude);
+ }
}
const BasicParticleSystem &p;
const FlagGrid &flags;
@@ -2592,7 +2566,7 @@ struct knCombineVels : public KernelBase {
MACGrid &combineVel,
const LevelsetGrid *phi,
Real narrowBand,
- Real thresh) const
+ Real thresh)
{
int idx = vel.index(i, j, k);
@@ -2648,37 +2622,35 @@ struct knCombineVels : public KernelBase {
return thresh;
}
typedef Real type5;
- void runMessage()
- {
- debMsg("Executing kernel knCombineVels ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &vel;
const Grid<Vec3> &w;
MACGrid &combineVel;
diff --git a/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp b/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp
index de881840a2e..2b11f3a2557 100644
--- a/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp
+++ b/extern/mantaflow/preprocessed/plugin/fluidguiding.cpp
@@ -62,7 +62,7 @@ struct apply1DKernelDirX : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const
+ inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{
int nx = in.getSizeX();
int kn = kernel.n;
@@ -91,37 +91,35 @@ struct apply1DKernelDirX : public KernelBase {
return kernel;
}
typedef Matrix type2;
- void runMessage()
- {
- debMsg("Executing kernel apply1DKernelDirX ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const MACGrid &in;
MACGrid &out;
const Matrix &kernel;
@@ -136,7 +134,7 @@ struct apply1DKernelDirY : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const
+ inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{
int ny = in.getSizeY();
int kn = kernel.n;
@@ -165,37 +163,35 @@ struct apply1DKernelDirY : public KernelBase {
return kernel;
}
typedef Matrix type2;
- void runMessage()
- {
- debMsg("Executing kernel apply1DKernelDirY ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const MACGrid &in;
MACGrid &out;
const Matrix &kernel;
@@ -210,7 +206,7 @@ struct apply1DKernelDirZ : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const
+ inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{
int nz = in.getSizeZ();
int kn = kernel.n;
@@ -239,37 +235,35 @@ struct apply1DKernelDirZ : public KernelBase {
return kernel;
}
typedef Matrix type2;
- void runMessage()
- {
- debMsg("Executing kernel apply1DKernelDirZ ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, in, out, kernel);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, in, out, kernel);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const MACGrid &in;
MACGrid &out;
const Matrix &kernel;
@@ -569,197 +563,88 @@ void prox_f(MACGrid &v,
// *****************************************************************************
-// re-uses main pressure solve from pressure.cpp
-void solvePressure(MACGrid &vel,
- Grid<Real> &pressure,
- const FlagGrid &flags,
- Real cgAccuracy = 1e-3,
- const Grid<Real> *phi = nullptr,
- const Grid<Real> *perCellCorr = nullptr,
- const MACGrid *fractions = nullptr,
- const MACGrid *obvel = nullptr,
- Real gfClamp = 1e-04,
- Real cgMaxIterFac = 1.5,
- bool precondition = true,
- int preconditioner = 1,
- bool enforceCompatibility = false,
- bool useL2Norm = false,
- bool zeroPressureFixing = false,
- const Grid<Real> *curv = nullptr,
- const Real surfTens = 0.0,
- Grid<Real> *retRhs = nullptr);
-
-//! Main function for fluid guiding , includes "regular" pressure solve
-
-void PD_fluid_guiding(MACGrid &vel,
- MACGrid &velT,
- Grid<Real> &pressure,
- FlagGrid &flags,
- Grid<Real> &weight,
- int blurRadius = 5,
- Real theta = 1.0,
- Real tau = 1.0,
- Real sigma = 1.0,
- Real epsRel = 1e-3,
- Real epsAbs = 1e-3,
- int maxIters = 200,
- Grid<Real> *phi = nullptr,
- Grid<Real> *perCellCorr = nullptr,
- MACGrid *fractions = nullptr,
- MACGrid *obvel = nullptr,
- Real gfClamp = 1e-04,
- Real cgMaxIterFac = 1.5,
- Real cgAccuracy = 1e-3,
- int preconditioner = 1,
- bool zeroPressureFixing = false,
- const Grid<Real> *curv = nullptr,
- const Real surfTens = 0.)
-{
- FluidSolver *parent = vel.getParent();
-
- // initialize dual/slack variables
- MACGrid velC = MACGrid(parent);
- velC.copyFrom(vel);
- MACGrid x = MACGrid(parent);
- MACGrid y = MACGrid(parent);
- MACGrid z = MACGrid(parent);
- MACGrid x0 = MACGrid(parent);
- MACGrid z0 = MACGrid(parent);
-
- // precomputation
- ADMM_precompute_Separable(blurRadius);
- MACGrid Q = MACGrid(parent);
- precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma);
- MACGrid invA = MACGrid(parent);
- precomputeInvA(invA, weight, sigma);
-
- // loop
- int iter = 0;
- for (iter = 0; iter < maxIters; iter++) {
- // x-update
- x0.copyFrom(x);
- x.multConst(1.0 / sigma);
- x.add(y);
- prox_f(x, flags, Q, velC, sigma, invA);
- x.multConst(-sigma);
- x.addScaled(y, sigma);
- x.add(x0);
-
- // z-update
- z0.copyFrom(z);
- z.addScaled(x, -tau);
- Real cgAccuracyAdaptive = cgAccuracy;
-
- solvePressure(z,
- pressure,
- flags,
- cgAccuracyAdaptive,
- phi,
- perCellCorr,
- fractions,
- obvel,
- gfClamp,
- cgMaxIterFac,
- true,
- preconditioner,
- false,
- false,
- zeroPressureFixing,
- curv,
- surfTens);
-
- // y-update
- y.copyFrom(z);
- y.sub(z0);
- y.multConst(theta);
- y.add(z);
-
- // stopping criterion
- bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z));
-
- if (stop || (iter == maxIters - 1))
- break;
- }
-
- // vel_new = z
- vel.copyFrom(z);
-
- debMsg("PD_fluid_guiding iterations:" << iter, 1);
-}
-static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
-{
- try {
- PbArgs _args(_linargs, _kwds);
- FluidSolver *parent = _args.obtainParent();
- bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
- pbPreparePlugin(parent, "PD_fluid_guiding", !noTiming);
- PyObject *_retval = nullptr;
- {
- ArgLocker _lock;
- MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock);
- MACGrid &velT = *_args.getPtr<MACGrid>("velT", 1, &_lock);
- Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock);
- FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock);
- Grid<Real> &weight = *_args.getPtr<Grid<Real>>("weight", 4, &_lock);
- int blurRadius = _args.getOpt<int>("blurRadius", 5, 5, &_lock);
- Real theta = _args.getOpt<Real>("theta", 6, 1.0, &_lock);
- Real tau = _args.getOpt<Real>("tau", 7, 1.0, &_lock);
- Real sigma = _args.getOpt<Real>("sigma", 8, 1.0, &_lock);
- Real epsRel = _args.getOpt<Real>("epsRel", 9, 1e-3, &_lock);
- Real epsAbs = _args.getOpt<Real>("epsAbs", 10, 1e-3, &_lock);
- int maxIters = _args.getOpt<int>("maxIters", 11, 200, &_lock);
- Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock);
- Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>("perCellCorr", 13, nullptr, &_lock);
- MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock);
- MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 15, nullptr, &_lock);
- Real gfClamp = _args.getOpt<Real>("gfClamp", 16, 1e-04, &_lock);
- Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 17, 1.5, &_lock);
- Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 18, 1e-3, &_lock);
- int preconditioner = _args.getOpt<int>("preconditioner", 19, 1, &_lock);
- bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 20, false, &_lock);
- const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 21, nullptr, &_lock);
- const Real surfTens = _args.getOpt<Real>("surfTens", 22, 0., &_lock);
- _retval = getPyNone();
- PD_fluid_guiding(vel,
- velT,
- pressure,
- flags,
- weight,
- blurRadius,
- theta,
- tau,
- sigma,
- epsRel,
- epsAbs,
- maxIters,
- phi,
- perCellCorr,
- fractions,
- obvel,
- gfClamp,
- cgMaxIterFac,
- cgAccuracy,
- preconditioner,
- zeroPressureFixing,
- curv,
- surfTens);
- _args.check();
- }
- pbFinalizePlugin(parent, "PD_fluid_guiding", !noTiming);
- return _retval;
- }
- catch (std::exception &e) {
- pbSetError("PD_fluid_guiding", e.what());
- return 0;
- }
-}
-static const Pb::Register _RP_PD_fluid_guiding("", "PD_fluid_guiding", _W_2);
-extern "C" {
-void PbRegister_PD_fluid_guiding()
-{
- KEEP_UNUSED(_RP_PD_fluid_guiding);
-}
-}
+// TODO (sebbas): Disabled for now
+// // re-uses main pressure solve from pressure.cpp
+// void solvePressure(
+// MACGrid& vel, Grid<Real>& pressure, const FlagGrid& flags, Real cgAccuracy = 1e-3,
+// const Grid<Real>* phi = nullptr,
+// const Grid<Real>* perCellCorr = nullptr,
+// const MACGrid* fractions = nullptr,
+// const MACGrid* obvel = nullptr,
+// Real gfClamp = 1e-04,
+// Real cgMaxIterFac = 1.5,
+// bool precondition = true,
+// int preconditioner = 1,
+// bool enforceCompatibility = false,
+// bool useL2Norm = false,
+// bool zeroPressureFixing = false,
+// const Grid<Real> *curv = nullptr,
+// const Real surfTens = 0.0,
+// Grid<Real>* retRhs = nullptr );
+
+// //! Main function for fluid guiding , includes "regular" pressure solve
+// PYTHON() void PD_fluid_guiding(MACGrid& vel, MACGrid& velT,
+// Grid<Real>& pressure, FlagGrid& flags, Grid<Real>& weight, int blurRadius = 5,
+// Real theta = 1.0, Real tau = 1.0, Real sigma = 1.0,
+// Real epsRel = 1e-3, Real epsAbs = 1e-3, int maxIters = 200,
+// // duplicated for pressure solve
+// Grid<Real>* phi = nullptr, Grid<Real>* perCellCorr = nullptr, MACGrid* fractions = nullptr,
+// MACGrid* obvel = nullptr, Real gfClamp = 1e-04, Real cgMaxIterFac = 1.5, Real cgAccuracy = 1e-3,
+// int preconditioner = 1, bool zeroPressureFixing = false, const Grid<Real> *curv = nullptr,
+// const Real surfTens = 0.)
+// {
+// FluidSolver* parent = vel.getParent();
+
+// // initialize dual/slack variables
+// MACGrid velC = MACGrid(parent); velC.copyFrom(vel);
+// MACGrid x = MACGrid(parent);
+// MACGrid y = MACGrid(parent);
+// MACGrid z = MACGrid(parent);
+// MACGrid x0 = MACGrid(parent);
+// MACGrid z0 = MACGrid(parent);
+
+// // precomputation
+// ADMM_precompute_Separable(blurRadius);
+// MACGrid Q = MACGrid(parent);
+// precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma);
+// MACGrid invA = MACGrid(parent);
+// precomputeInvA(invA, weight, sigma);
+
+// // loop
+// int iter = 0;
+// for (iter = 0; iter < maxIters; iter++) {
+// // x-update
+// x0.copyFrom(x);
+// x.multConst(1.0 / sigma);
+// x.add(y);
+// prox_f(x, flags, Q, velC, sigma, invA);
+// x.multConst(-sigma); x.addScaled(y, sigma); x.add(x0);
+
+// // z-update
+// z0.copyFrom(z);
+// z.addScaled(x, -tau);
+// Real cgAccuracyAdaptive = cgAccuracy;
+
+// solvePressure (z, pressure, flags, cgAccuracyAdaptive, phi, perCellCorr, fractions, obvel,
+// gfClamp, cgMaxIterFac, true, preconditioner, false, false, zeroPressureFixing, curv, surfTens );
+
+// // y-update
+// y.copyFrom(z);
+// y.sub(z0);
+// y.multConst(theta);
+// y.add(z);
+
+// // stopping criterion
+// bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z));
+
+// if (stop || (iter == maxIters - 1)) break;
+// }
+
+// // vel_new = z
+// vel.copyFrom(z);
+
+// debMsg("PD_fluid_guiding iterations:" << iter, 1);
+// }
//! reset precomputation
void releaseBlurPrecomp()
@@ -768,7 +653,7 @@ void releaseBlurPrecomp()
gBlurKernelRadius = -1;
gBlurKernel = 0.f;
}
-static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
+static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
@@ -790,7 +675,7 @@ static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
return 0;
}
}
-static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_3);
+static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_2);
extern "C" {
void PbRegister_releaseBlurPrecomp()
{
diff --git a/extern/mantaflow/preprocessed/plugin/initplugins.cpp b/extern/mantaflow/preprocessed/plugin/initplugins.cpp
index a0cc2761dab..db507642f68 100644
--- a/extern/mantaflow/preprocessed/plugin/initplugins.cpp
+++ b/extern/mantaflow/preprocessed/plugin/initplugins.cpp
@@ -56,7 +56,7 @@ struct KnApplyNoiseInfl : public KernelBase {
const WaveletNoiseField &noise,
const Grid<Real> &sdf,
Real scale,
- Real sigma) const
+ Real sigma)
{
if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma)
return;
@@ -96,37 +96,35 @@ struct KnApplyNoiseInfl : public KernelBase {
return sigma;
}
typedef Real type5;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyNoiseInfl ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, noise, sdf, scale, sigma);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, noise, sdf, scale, sigma);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, noise, sdf, scale, sigma);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, noise, sdf, scale, sigma);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &density;
const WaveletNoiseField &noise;
@@ -202,7 +200,7 @@ struct KnAddNoise : public KernelBase {
Grid<Real> &density,
const WaveletNoiseField &noise,
const Grid<Real> *sdf,
- Real scale) const
+ Real scale)
{
if (!flags.isFluid(i, j, k) || (sdf && (*sdf)(i, j, k) > 0.))
return;
@@ -233,37 +231,35 @@ struct KnAddNoise : public KernelBase {
return scale;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel KnAddNoise ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, noise, sdf, scale);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, noise, sdf, scale);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, noise, sdf, scale);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, noise, sdf, scale);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &density;
const WaveletNoiseField &noise;
@@ -329,7 +325,7 @@ template<class T> struct knSetPdataNoise : public KernelBase {
const BasicParticleSystem &parts,
ParticleDataImpl<T> &pdata,
const WaveletNoiseField &noise,
- Real scale) const
+ Real scale)
{
pdata[idx] = noise.evaluate(parts.getPos(idx)) * scale;
}
@@ -353,21 +349,17 @@ template<class T> struct knSetPdataNoise : public KernelBase {
return scale;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel knSetPdataNoise ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, parts, pdata, noise, scale);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, parts, pdata, noise, scale);
+ }
}
const BasicParticleSystem &parts;
ParticleDataImpl<T> &pdata;
@@ -389,7 +381,7 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase {
const BasicParticleSystem &parts,
ParticleDataImpl<T> &pdata,
const WaveletNoiseField &noise,
- Real scale) const
+ Real scale)
{
pdata[idx] = noise.evaluateVec(parts.getPos(idx)) * scale;
}
@@ -413,21 +405,17 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase {
return scale;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel knSetPdataNoiseVec ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, parts, pdata, noise, scale);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, parts, pdata, noise, scale);
+ }
}
const BasicParticleSystem &parts;
ParticleDataImpl<T> &pdata;
@@ -688,7 +676,7 @@ struct KnApplyEmission : public KernelBase {
const Grid<Real> &source,
const Grid<Real> *emissionTexture,
bool isAbsolute,
- int type) const
+ int type)
{
// if type is given, only apply emission when celltype matches type from flaggrid
// and if emission texture is given, only apply emission when some emission is present at cell
@@ -733,37 +721,35 @@ struct KnApplyEmission : public KernelBase {
return type;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyEmission ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &target;
const Grid<Real> &source;
@@ -837,7 +823,7 @@ struct KnApplyDensity : public KernelBase {
Grid<Real> &density,
const Grid<Real> &sdf,
Real value,
- Real sigma) const
+ Real sigma)
{
if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma)
return;
@@ -868,37 +854,35 @@ struct KnApplyDensity : public KernelBase {
return sigma;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyDensity ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, sdf, value, sigma);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, sdf, value, sigma);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, density, sdf, value, sigma);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, density, sdf, value, sigma);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &density;
const Grid<Real> &sdf;
@@ -1041,7 +1025,7 @@ struct KnResetInObstacle : public KernelBase {
Grid<Real> *red,
Grid<Real> *green,
Grid<Real> *blue,
- Real resetValue) const
+ Real resetValue)
{
if (!flags.isObstacle(i, j, k))
return;
@@ -1115,37 +1099,35 @@ struct KnResetInObstacle : public KernelBase {
return resetValue;
}
typedef Real type9;
- void runMessage()
- {
- debMsg("Executing kernel KnResetInObstacle ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
FlagGrid &flags;
MACGrid &vel;
Grid<Real> *density;
@@ -1692,7 +1674,7 @@ struct KnUpdateFractions : public KernelBase {
const Grid<Real> &phiObs,
MACGrid &fractions,
const int &boundaryWidth,
- const Real fracThreshold) const
+ const Real fracThreshold)
{
// walls at domain bounds and inner objects
@@ -1787,37 +1769,35 @@ struct KnUpdateFractions : public KernelBase {
return fracThreshold;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateFractions ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
const Grid<Real> &phiObs;
MACGrid &fractions;
@@ -1896,7 +1876,7 @@ struct KnUpdateFlagsObs : public KernelBase {
const Grid<Real> &phiObs,
const Grid<Real> *phiOut,
const Grid<Real> *phiIn,
- int boundaryWidth) const
+ int boundaryWidth)
{
bool isObs = false;
@@ -1964,37 +1944,35 @@ struct KnUpdateFlagsObs : public KernelBase {
return boundaryWidth;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateFlagsObs ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = boundaryWidth; j < _maxY; j++)
- for (int i = boundaryWidth; i < _maxX; i++)
- op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = boundaryWidth; j < _maxY; j++)
+ for (int i = boundaryWidth; i < _maxX; i++)
+ op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = boundaryWidth; i < _maxX; i++)
- op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = boundaryWidth; j < _maxY; j++)
+ for (int i = boundaryWidth; i < _maxX; i++)
+ op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this);
- }
FlagGrid &flags;
const MACGrid *fractions;
const Grid<Real> &phiObs;
@@ -2067,7 +2045,7 @@ struct kninitVortexVelocity : public KernelBase {
const Grid<Real> &phiObs,
MACGrid &vel,
const Vec3 &center,
- const Real &radius) const
+ const Real &radius)
{
if (phiObs(i, j, k) >= -1.) {
@@ -2115,37 +2093,35 @@ struct kninitVortexVelocity : public KernelBase {
return radius;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel kninitVortexVelocity ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phiObs, vel, center, radius);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phiObs, vel, center, radius);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phiObs, vel, center, radius);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phiObs, vel, center, radius);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const Grid<Real> &phiObs;
MACGrid &vel;
const Vec3 &center;
@@ -2318,7 +2294,7 @@ template<class T> struct knBlurGrid : public KernelBase {
Grid<T> &originGrid,
Grid<T> &targetGrid,
GaussianKernelCreator &gkSigma,
- int cdir) const
+ int cdir)
{
targetGrid(i, j, k) = convolveGrid<T>(originGrid, gkSigma, Vec3(i, j, k), cdir);
}
@@ -2342,37 +2318,35 @@ template<class T> struct knBlurGrid : public KernelBase {
return cdir;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel knBlurGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> &originGrid;
Grid<T> &targetGrid;
GaussianKernelCreator &gkSigma;
@@ -2412,7 +2386,7 @@ struct KnBlurMACGridGauss : public KernelBase {
MACGrid &originGrid,
MACGrid &target,
GaussianKernelCreator &gkSigma,
- int cdir) const
+ int cdir)
{
Vec3 pos(i, j, k);
Vec3 step(1.0, 0.0, 0.0);
@@ -2462,37 +2436,35 @@ struct KnBlurMACGridGauss : public KernelBase {
return cdir;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel KnBlurMACGridGauss ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, originGrid, target, gkSigma, cdir);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, originGrid, target, gkSigma, cdir);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, originGrid, target, gkSigma, cdir);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, originGrid, target, gkSigma, cdir);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &originGrid;
MACGrid &target;
GaussianKernelCreator &gkSigma;
diff --git a/extern/mantaflow/preprocessed/plugin/kepsilon.cpp b/extern/mantaflow/preprocessed/plugin/kepsilon.cpp
index 32425a5756e..c5711b95242 100644
--- a/extern/mantaflow/preprocessed/plugin/kepsilon.cpp
+++ b/extern/mantaflow/preprocessed/plugin/kepsilon.cpp
@@ -61,7 +61,7 @@ struct KnTurbulenceClamp : public KernelBase {
Real minK,
Real maxK,
Real minNu,
- Real maxNu) const
+ Real maxNu)
{
Real eps = egrid[idx];
Real ke = clamp(kgrid[idx], minK, maxK);
@@ -104,21 +104,17 @@ struct KnTurbulenceClamp : public KernelBase {
return maxNu;
}
typedef Real type5;
- void runMessage()
- {
- debMsg("Executing kernel KnTurbulenceClamp ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, kgrid, egrid, minK, maxK, minNu, maxNu);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, kgrid, egrid, minK, maxK, minNu, maxNu);
+ }
}
Grid<Real> &kgrid;
Grid<Real> &egrid;
@@ -163,7 +159,7 @@ struct KnComputeProduction : public KernelBase {
Grid<Real> &prod,
Grid<Real> &nuT,
Grid<Real> *strain,
- Real pscale = 1.0f) const
+ Real pscale = 1.0f)
{
Real curEps = eps(i, j, k);
if (curEps > 0) {
@@ -234,37 +230,35 @@ struct KnComputeProduction : public KernelBase {
return pscale;
}
typedef Real type7;
- void runMessage()
- {
- debMsg("Executing kernel KnComputeProduction ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const MACGrid &vel;
const Grid<Vec3> &velCenter;
const Grid<Real> &ke;
@@ -345,7 +339,7 @@ struct KnAddTurbulenceSource : public KernelBase {
run();
}
inline void op(
- IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt) const
+ IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt)
{
Real eps = egrid[idx], prod = pgrid[idx], ke = kgrid[idx];
if (ke <= 0)
@@ -379,21 +373,17 @@ struct KnAddTurbulenceSource : public KernelBase {
return dt;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel KnAddTurbulenceSource ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, kgrid, egrid, pgrid, dt);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, kgrid, egrid, pgrid, dt);
+ }
}
Grid<Real> &kgrid;
Grid<Real> &egrid;
diff --git a/extern/mantaflow/preprocessed/plugin/pressure.cpp b/extern/mantaflow/preprocessed/plugin/pressure.cpp
index 593aeb16859..4674bf4a7bf 100644
--- a/extern/mantaflow/preprocessed/plugin/pressure.cpp
+++ b/extern/mantaflow/preprocessed/plugin/pressure.cpp
@@ -138,9 +138,10 @@ struct MakeRhs : public KernelBase {
}
}
+ // TODO (sebbas): Disabled for now
// per cell divergence correction (optional)
- if (perCellCorr)
- set += perCellCorr->get(i, j, k);
+ // if(perCellCorr)
+ // set += perCellCorr->get(i,j,k);
// obtain sum, cell count
sum += set;
@@ -198,19 +199,61 @@ struct MakeRhs : public KernelBase {
return gfClamp;
}
typedef Real type9;
- void runMessage()
- {
- debMsg("Executing kernel MakeRhs ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &rhs = getArg1();
+ const MACGrid &vel = getArg2();
+ const Grid<Real> *perCellCorr = getArg3();
+ const MACGrid *fractions = getArg4();
+ const MACGrid *obvel = getArg5();
+ const Grid<Real> *phi = getArg6();
+ const Grid<Real> *curv = getArg7();
+ const Real &surfTens = getArg8();
+ const Real &gfClamp = getArg9();
+#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(3) schedule(static,1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ flags,
+ rhs,
+ vel,
+ perCellCorr,
+ fractions,
+ obvel,
+ phi,
+ curv,
+ surfTens,
+ gfClamp,
+ cnt,
+ sum);
+ }
+ {
+ this->sum = sum;
+ }
+ }
+ else {
+ const int k = 0;
+ const FlagGrid &flags = getArg0();
+ Grid<Real> &rhs = getArg1();
+ const MACGrid &vel = getArg2();
+ const Grid<Real> *perCellCorr = getArg3();
+ const MACGrid *fractions = getArg4();
+ const MACGrid *obvel = getArg5();
+ const Grid<Real> *phi = getArg6();
+ const Grid<Real> *curv = getArg7();
+ const Real &surfTens = getArg8();
+ const Real &gfClamp = getArg9();
+#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(2) schedule(static,1)
+ {
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
@@ -228,55 +271,11 @@ struct MakeRhs : public KernelBase {
gfClamp,
cnt,
sum);
+ }
+ {
+ this->sum = sum;
+ }
}
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i,
- j,
- k,
- flags,
- rhs,
- vel,
- perCellCorr,
- fractions,
- obvel,
- phi,
- curv,
- surfTens,
- gfClamp,
- cnt,
- sum);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
- MakeRhs(MakeRhs &o, tbb::split)
- : KernelBase(o),
- flags(o.flags),
- rhs(o.rhs),
- vel(o.vel),
- perCellCorr(o.perCellCorr),
- fractions(o.fractions),
- obvel(o.obvel),
- phi(o.phi),
- curv(o.curv),
- surfTens(o.surfTens),
- gfClamp(o.gfClamp),
- cnt(0),
- sum(0)
- {
- }
- void join(const MakeRhs &o)
- {
- cnt += o.cnt;
- sum += o.sum;
}
const FlagGrid &flags;
Grid<Real> &rhs;
@@ -302,7 +301,7 @@ struct knCorrectVelocity : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure) const
+ int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure)
{
const IndexInt idx = flags.index(i, j, k);
if (flags.isFluid(idx)) {
@@ -353,37 +352,36 @@ struct knCorrectVelocity : public KernelBase {
return pressure;
}
typedef Grid<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel knCorrectVelocity ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, pressure);
+ const FlagGrid &flags = getArg0();
+ MACGrid &vel = getArg1();
+ const Grid<Real> &pressure = getArg2();
+#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
+ {
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, pressure);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, vel, pressure);
+ const FlagGrid &flags = getArg0();
+ MACGrid &vel = getArg1();
+ const Grid<Real> &pressure = getArg2();
+#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
+ {
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, vel, pressure);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
MACGrid &vel;
const Grid<Real> &pressure;
@@ -441,7 +439,7 @@ struct ApplyGhostFluidDiagonal : public KernelBase {
Grid<Real> &A0,
const FlagGrid &flags,
const Grid<Real> &phi,
- const Real gfClamp) const
+ const Real gfClamp)
{
const int X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
const IndexInt idx = flags.index(i, j, k);
@@ -483,37 +481,35 @@ struct ApplyGhostFluidDiagonal : public KernelBase {
return gfClamp;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel ApplyGhostFluidDiagonal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, A0, flags, phi, gfClamp);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, A0, flags, phi, gfClamp);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, A0, flags, phi, gfClamp);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, A0, flags, phi, gfClamp);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &A0;
const FlagGrid &flags;
const Grid<Real> &phi;
@@ -551,7 +547,7 @@ struct knCorrectVelocityGhostFluid : public KernelBase {
const Grid<Real> &phi,
Real gfClamp,
const Grid<Real> *curv,
- const Real surfTens) const
+ const Real surfTens)
{
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
const IndexInt idx = flags.index(i, j, k);
@@ -640,37 +636,35 @@ struct knCorrectVelocityGhostFluid : public KernelBase {
return surfTens;
}
typedef Real type6;
- void runMessage()
- {
- debMsg("Executing kernel knCorrectVelocityGhostFluid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &vel;
const FlagGrid &flags;
const Grid<Real> &pressure;
@@ -710,7 +704,7 @@ struct knReplaceClampedGhostFluidVels : public KernelBase {
const FlagGrid &flags,
const Grid<Real> &pressure,
const Grid<Real> &phi,
- Real gfClamp) const
+ Real gfClamp)
{
const IndexInt idx = flags.index(i, j, k);
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
@@ -758,37 +752,35 @@ struct knReplaceClampedGhostFluidVels : public KernelBase {
return gfClamp;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel knReplaceClampedGhostFluidVels ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, flags, pressure, phi, gfClamp);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, flags, pressure, phi, gfClamp);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, flags, pressure, phi, gfClamp);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, flags, pressure, phi, gfClamp);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
MACGrid &vel;
const FlagGrid &flags;
const Grid<Real> &pressure;
@@ -822,28 +814,21 @@ struct CountEmptyCells : public KernelBase {
return flags;
}
typedef FlagGrid type0;
- void runMessage()
- {
- debMsg("Executing kernel CountEmptyCells ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, numEmpty);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- CountEmptyCells(CountEmptyCells &o, tbb::split) : KernelBase(o), flags(o.flags), numEmpty(0)
- {
- }
- void join(const CountEmptyCells &o)
- {
- numEmpty += o.numEmpty;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ int numEmpty = 0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, numEmpty);
+#pragma omp critical
+ {
+ this->numEmpty += numEmpty;
+ }
+ }
}
const FlagGrid &flags;
int numEmpty;
@@ -964,11 +949,19 @@ void computePressureRhs(Grid<Real> &rhs,
const Real surfTens = 0.)
{
// compute divergence and init right hand side
- MakeRhs kernMakeRhs(
- flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp);
-
- if (enforceCompatibility)
- rhs += (Real)(-kernMakeRhs.sum / (Real)kernMakeRhs.cnt);
+ // auto kernMakeRhs = new MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv,
+ // surfTens, gfClamp );
+ printf("pressure = %p, flags = %p, rhs = %p, vel = %p\n",
+ pressure.mData,
+ flags.mData,
+ rhs.mData,
+ vel.mData);
+ MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp);
+
+ // TODO (sebbas): Disabled for now
+ // if(enforceCompatibility)
+ // rhs += (Real)(-kernMakeRhs->sum / (Real)kernMakeRhs->cnt);
+ // delete kernMakeRhs;
}
static PyObject *_W_1(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
@@ -1050,6 +1043,13 @@ void solvePressureSystem(Grid<Real> &rhs,
MACGrid &vel,
Grid<Real> &pressure,
const FlagGrid &flags,
+ Grid<Real> *residual = nullptr,
+ Grid<Real> *search = nullptr,
+ Grid<Real> *A0 = nullptr,
+ Grid<Real> *Ai = nullptr,
+ Grid<Real> *Aj = nullptr,
+ Grid<Real> *Ak = nullptr,
+ Grid<Real> *tmp = nullptr,
Real cgAccuracy = 1e-3,
const Grid<Real> *phi = nullptr,
const Grid<Real> *perCellCorr = nullptr,
@@ -1069,19 +1069,37 @@ void solvePressureSystem(Grid<Real> &rhs,
// reserve temp grids
FluidSolver *parent = flags.getParent();
- Grid<Real> residual(parent);
- Grid<Real> search(parent);
- Grid<Real> A0(parent);
- Grid<Real> Ai(parent);
- Grid<Real> Aj(parent);
- Grid<Real> Ak(parent);
- Grid<Real> tmp(parent);
+
+ bool cleanUp = false;
+ if (!residual) {
+ residual = new Grid<Real>(parent, true, false, true);
+ search = new Grid<Real>(parent, true, false, true);
+ A0 = new Grid<Real>(parent, true, false, true);
+ Ai = new Grid<Real>(parent, true, false, true);
+ Aj = new Grid<Real>(parent, true, false, true);
+ Ak = new Grid<Real>(parent, true, false, true);
+ tmp = new Grid<Real>(parent, true, false, true);
+ cleanUp = true;
+ }
+ else {
+ residual->clear(true);
+ search->clear(true);
+ A0->clear(true);
+ Ai->clear(true);
+ Aj->clear(true);
+ Ak->clear(true);
+ tmp->clear(true);
+ }
+
+ std::cout << "HERE 5" << std::endl;
// setup matrix and boundaries
- MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions);
+ MakeLaplaceMatrix(flags, *A0, *Ai, *Aj, *Ak, fractions);
+ // MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions);
+ // TODO (sebbas): Disabled for now
if (phi) {
- ApplyGhostFluidDiagonal(A0, flags, *phi, gfClamp);
+ ApplyGhostFluidDiagonal(*A0, flags, *phi, gfClamp);
}
// check whether we need to fix some pressure value...
@@ -1125,7 +1143,8 @@ void solvePressureSystem(Grid<Real> &rhs,
// debMsg("No empty cells! Fixing pressure of cell "<<fixPidx<<" to zero",1);
}
if (fixPidx >= 0) {
- fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak);
+ fixPressure(fixPidx, Real(0), rhs, *A0, *Ai, *Aj, *Ak);
+ // fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak);
static bool msgOnce = false;
if (!msgOnce) {
debMsg("Pinning pressure of cell " << fixPidx << " to zero", 2);
@@ -1133,20 +1152,21 @@ void solvePressureSystem(Grid<Real> &rhs,
}
}
}
+ std::cout << "HERE 6" << std::endl;
// CG setup
// note: the last factor increases the max iterations for 2d, which right now can't use a
// preconditioner
GridCgInterface *gcg;
- vector<Grid<Real> *> matA{&A0, &Ai, &Aj};
-
- if (vel.is3D()) {
- matA.push_back(&Ak);
- gcg = new GridCg<ApplyMatrix>(pressure, rhs, residual, search, flags, tmp, matA);
- }
- else {
- gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, matA);
- }
+ if (vel.is3D())
+ gcg = new GridCg<ApplyMatrix>(pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak);
+ // gcg = new GridCg<ApplyMatrix> (pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj,
+ // &Ak);
+ else
+ gcg = new GridCg<ApplyMatrix2D>(
+ pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak);
+ // gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj,
+ // &Ak);
gcg->setAccuracy(cgAccuracy);
gcg->setUseL2Norm(useL2Norm);
@@ -1155,6 +1175,7 @@ void solvePressureSystem(Grid<Real> &rhs,
Grid<Real> *pca0 = nullptr, *pca1 = nullptr, *pca2 = nullptr, *pca3 = nullptr;
GridMg *pmg = nullptr;
+ std::cout << "HERE 7" << std::endl;
// optional preconditioning
if (preconditioner == PcMIC) {
@@ -1180,10 +1201,12 @@ void solvePressureSystem(Grid<Real> &rhs,
gcg->setMGPreconditioner(GridCgInterface::PC_MGP, pmg);
}
+ std::cout << "HERE 8" << std::endl;
// CG solve
+ Real time = 0;
for (int iter = 0; iter < maxIter; iter++) {
- if (!gcg->iterate())
+ if (!gcg->iterate(time))
iter = maxIter;
if (iter < maxIter)
debMsg("FluidSolver::solvePressure iteration " << iter
@@ -1193,8 +1216,26 @@ void solvePressureSystem(Grid<Real> &rhs,
debMsg("FluidSolver::solvePressure done. Iterations:" << gcg->getIterations()
<< ", residual:" << gcg->getResNorm(),
2);
+ // std::cout << "TIME: " << time << std::endl;
// Cleanup
+ if (cleanUp) {
+ if (residual)
+ delete residual;
+ if (search)
+ delete search;
+ if (A0)
+ delete A0;
+ if (Ai)
+ delete Ai;
+ if (Aj)
+ delete Aj;
+ if (Ak)
+ delete Ak;
+ if (tmp)
+ delete tmp;
+ }
+
if (gcg)
delete gcg;
if (pca0)
@@ -1225,26 +1266,40 @@ static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
MACGrid &vel = *_args.getPtr<MACGrid>("vel", 1, &_lock);
Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock);
const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock);
- Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 4, 1e-3, &_lock);
- const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 5, nullptr, &_lock);
+ Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 4, nullptr, &_lock);
+ Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 5, nullptr, &_lock);
+ Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 6, nullptr, &_lock);
+ Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 7, nullptr, &_lock);
+ Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 8, nullptr, &_lock);
+ Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 9, nullptr, &_lock);
+ Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 10, nullptr, &_lock);
+ Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 11, 1e-3, &_lock);
+ const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock);
const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>(
- "perCellCorr", 6, nullptr, &_lock);
- const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 7, nullptr, &_lock);
- Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock);
- Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock);
- bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock);
- int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock);
+ "perCellCorr", 13, nullptr, &_lock);
+ const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock);
+ Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock);
+ Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock);
+ bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock);
+ int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock);
const bool enforceCompatibility = _args.getOpt<bool>(
- "enforceCompatibility", 12, false, &_lock);
- const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock);
- const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock);
- const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock);
- const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock);
+ "enforceCompatibility", 19, false, &_lock);
+ const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock);
+ const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock);
+ const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock);
+ const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock);
_retval = getPyNone();
solvePressureSystem(rhs,
vel,
pressure,
flags,
+ residual,
+ search,
+ A0,
+ Ai,
+ Aj,
+ Ak,
+ tmp,
cgAccuracy,
phi,
perCellCorr,
@@ -1370,6 +1425,13 @@ void PbRegister_correctVelocity()
void solvePressure(MACGrid &vel,
Grid<Real> &pressure,
const FlagGrid &flags,
+ Grid<Real> *residual = nullptr,
+ Grid<Real> *search = nullptr,
+ Grid<Real> *A0 = nullptr,
+ Grid<Real> *Ai = nullptr,
+ Grid<Real> *Aj = nullptr,
+ Grid<Real> *Ak = nullptr,
+ Grid<Real> *tmp = nullptr,
Real cgAccuracy = 1e-3,
const Grid<Real> *phi = nullptr,
const Grid<Real> *perCellCorr = nullptr,
@@ -1411,6 +1473,13 @@ void solvePressure(MACGrid &vel,
vel,
pressure,
flags,
+ residual,
+ search,
+ A0,
+ Ai,
+ Aj,
+ Aj,
+ tmp,
cgAccuracy,
phi,
perCellCorr,
@@ -1442,10 +1511,11 @@ void solvePressure(MACGrid &vel,
curv,
surfTens);
+ // TODO (sebbas): Disabled for now
// optionally , return RHS
- if (retRhs) {
- retRhs->copyFrom(rhs);
- }
+ // if(retRhs) {
+ // retRhs->copyFrom(rhs);
+ // }
}
static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
@@ -1460,26 +1530,40 @@ static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock);
Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 1, &_lock);
const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 2, &_lock);
- Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 3, 1e-3, &_lock);
- const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 4, nullptr, &_lock);
+ Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 3, nullptr, &_lock);
+ Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 4, nullptr, &_lock);
+ Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 5, nullptr, &_lock);
+ Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 6, nullptr, &_lock);
+ Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 7, nullptr, &_lock);
+ Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 8, nullptr, &_lock);
+ Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 9, nullptr, &_lock);
+ Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 10, 1e-3, &_lock);
+ const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 11, nullptr, &_lock);
const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>(
- "perCellCorr", 5, nullptr, &_lock);
- const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 6, nullptr, &_lock);
- const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 7, nullptr, &_lock);
- Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock);
- Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock);
- bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock);
- int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock);
- bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 12, false, &_lock);
- bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock);
- bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock);
- const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock);
- const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock);
- Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 17, nullptr, &_lock);
+ "perCellCorr", 12, nullptr, &_lock);
+ const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 13, nullptr, &_lock);
+ const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 14, nullptr, &_lock);
+ Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock);
+ Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock);
+ bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock);
+ int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock);
+ bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 19, false, &_lock);
+ bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock);
+ bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock);
+ const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock);
+ const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock);
+ Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 24, nullptr, &_lock);
_retval = getPyNone();
solvePressure(vel,
pressure,
flags,
+ residual,
+ search,
+ A0,
+ Ai,
+ Aj,
+ Ak,
+ tmp,
cgAccuracy,
phi,
perCellCorr,
diff --git a/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp b/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp
index 7b2aedb694e..6d75e220d7a 100644
--- a/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp
+++ b/extern/mantaflow/preprocessed/plugin/ptsplugins.cpp
@@ -34,7 +34,7 @@ struct KnAddForcePvel : public KernelBase {
ParticleDataImpl<Vec3> &v,
const Vec3 &da,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (ptype && ((*ptype)[idx] & exclude))
return;
@@ -60,21 +60,17 @@ struct KnAddForcePvel : public KernelBase {
return exclude;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel KnAddForcePvel ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, v, da, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, v, da, ptype, exclude);
+ }
}
ParticleDataImpl<Vec3> &v;
const Vec3 &da;
@@ -150,7 +146,7 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase {
const ParticleDataImpl<Vec3> &x_prev,
const Real over_dt,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (ptype && ((*ptype)[idx] & exclude))
return;
@@ -186,21 +182,17 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase {
return exclude;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateVelocityFromDeltaPos ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, v, x_prev, over_dt, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, v, x_prev, over_dt, ptype, exclude);
+ }
}
const BasicParticleSystem &p;
ParticleDataImpl<Vec3> &v;
@@ -273,7 +265,7 @@ struct KnStepEuler : public KernelBase {
const ParticleDataImpl<Vec3> &v,
const Real dt,
const ParticleDataImpl<int> *ptype,
- const int exclude) const
+ const int exclude)
{
if (ptype && ((*ptype)[idx] & exclude))
return;
@@ -304,21 +296,17 @@ struct KnStepEuler : public KernelBase {
return exclude;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel KnStepEuler ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, v, dt, ptype, exclude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, v, dt, ptype, exclude);
+ }
}
BasicParticleSystem &p;
const ParticleDataImpl<Vec3> &v;
@@ -393,7 +381,7 @@ struct KnSetPartType : public KernelBase {
const int mark,
const int stype,
const FlagGrid &flags,
- const int cflag) const
+ const int cflag)
{
if (flags.isInBounds(part.getPos(idx), 0) && (flags.getAt(part.getPos(idx)) & cflag) &&
(ptype[idx] & stype))
@@ -429,21 +417,17 @@ struct KnSetPartType : public KernelBase {
return cflag;
}
typedef int type5;
- void runMessage()
- {
- debMsg("Executing kernel KnSetPartType ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, ptype, part, mark, stype, flags, cflag);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, ptype, part, mark, stype, flags, cflag);
+ }
}
ParticleDataImpl<int> &ptype;
const BasicParticleSystem &part;
diff --git a/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp b/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp
index 7a1d8224d94..5d519710296 100644
--- a/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp
+++ b/extern/mantaflow/preprocessed/plugin/secondaryparticles.cpp
@@ -99,7 +99,7 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid,
const int jtype = FlagGrid::TypeObstacle | FlagGrid::TypeOutflow |
- FlagGrid::TypeInflow) const
+ FlagGrid::TypeInflow)
{
if (!(flags(i, j, k) & itype))
@@ -253,19 +253,48 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
return jtype;
}
typedef int type16;
- void runMessage()
- {
- debMsg("Executing kernel knFlipComputeSecondaryParticlePotentials ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = radius; j < _maxY; j++)
+ for (int i = radius; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ potTA,
+ potWC,
+ potKE,
+ neighborRatio,
+ flags,
+ v,
+ normal,
+ radius,
+ tauMinTA,
+ tauMaxTA,
+ tauMinWC,
+ tauMaxWC,
+ tauMinKE,
+ tauMaxKE,
+ scaleFromManta,
+ itype,
+ jtype);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
for (int j = radius; j < _maxY; j++)
for (int i = radius; i < _maxX; i++)
op(i,
@@ -288,40 +317,9 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
scaleFromManta,
itype,
jtype);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = radius; i < _maxX; i++)
- op(i,
- j,
- k,
- potTA,
- potWC,
- potKE,
- neighborRatio,
- flags,
- v,
- normal,
- radius,
- tauMinTA,
- tauMaxTA,
- tauMinWC,
- tauMaxWC,
- tauMinKE,
- tauMaxKE,
- scaleFromManta,
- itype,
- jtype);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(radius, maxY), *this);
- }
Grid<Real> &potTA;
Grid<Real> &potWC;
Grid<Real> &potKE;
@@ -670,13 +668,7 @@ struct knFlipSampleSecondaryParticlesMoreCylinders : public KernelBase {
return rand;
}
typedef RandomStream type17;
- void runMessage()
- {
- debMsg("Executing kernel knFlipSampleSecondaryParticlesMoreCylinders ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const int _maxX = maxX;
@@ -930,13 +922,7 @@ struct knFlipSampleSecondaryParticles : public KernelBase {
return rand;
}
typedef RandomStream type17;
- void runMessage()
- {
- debMsg("Executing kernel knFlipSampleSecondaryParticles ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
+ void runMessage(){};
void run()
{
const int _maxX = maxX;
@@ -1192,7 +1178,7 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase {
const Real c_b,
const Real dt,
const int exclude,
- const int antitunneling) const
+ const int antitunneling)
{
if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude)
@@ -1342,36 +1328,32 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase {
return antitunneling;
}
typedef int type14;
- void runMessage()
- {
- debMsg("Executing kernel knFlipUpdateSecondaryParticlesLinear ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx,
- pts_sec,
- v_sec,
- l_sec,
- f_sec,
- flags,
- v,
- neighborRatio,
- gravity,
- k_b,
- k_d,
- c_s,
- c_b,
- dt,
- exclude,
- antitunneling);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i,
+ pts_sec,
+ v_sec,
+ l_sec,
+ f_sec,
+ flags,
+ v,
+ neighborRatio,
+ gravity,
+ k_b,
+ k_d,
+ c_s,
+ c_b,
+ dt,
+ exclude,
+ antitunneling);
+ }
}
BasicParticleSystem &pts_sec;
ParticleDataImpl<Vec3> &v_sec;
@@ -1449,7 +1431,7 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase {
const Real dt,
const int exclude,
const int antitunneling,
- const int itype) const
+ const int itype)
{
if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude)
@@ -1655,38 +1637,34 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase {
return itype;
}
typedef int type16;
- void runMessage()
- {
- debMsg("Executing kernel knFlipUpdateSecondaryParticlesCubic ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx,
- pts_sec,
- v_sec,
- l_sec,
- f_sec,
- flags,
- v,
- neighborRatio,
- radius,
- gravity,
- k_b,
- k_d,
- c_s,
- c_b,
- dt,
- exclude,
- antitunneling,
- itype);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i,
+ pts_sec,
+ v_sec,
+ l_sec,
+ f_sec,
+ flags,
+ v,
+ neighborRatio,
+ radius,
+ gravity,
+ k_b,
+ k_d,
+ c_s,
+ c_b,
+ dt,
+ exclude,
+ antitunneling,
+ itype);
+ }
}
BasicParticleSystem &pts_sec;
ParticleDataImpl<Vec3> &v_sec;
@@ -1856,7 +1834,7 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags) const
+ inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags)
{
if (!pts.isActive(idx))
@@ -1885,21 +1863,17 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase {
return flags;
}
typedef FlagGrid type1;
- void runMessage()
- {
- debMsg("Executing kernel knFlipDeleteParticlesInObstacle ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, pts, flags);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, pts, flags);
+ }
}
BasicParticleSystem &pts;
const FlagGrid &flags;
@@ -2031,7 +2005,7 @@ struct knSetFlagsFromLevelset : public KernelBase {
FlagGrid &flags,
const Grid<Real> &phi,
const int exclude = FlagGrid::TypeObstacle,
- const int itype = FlagGrid::TypeFluid) const
+ const int itype = FlagGrid::TypeFluid)
{
if (phi(idx) < 0 && !(flags(idx) & exclude))
flags(idx) = itype;
@@ -2056,21 +2030,17 @@ struct knSetFlagsFromLevelset : public KernelBase {
return itype;
}
typedef int type3;
- void runMessage()
- {
- debMsg("Executing kernel knSetFlagsFromLevelset ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, flags, phi, exclude, itype);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, flags, phi, exclude, itype);
+ }
}
FlagGrid &flags;
const Grid<Real> &phi;
@@ -2126,7 +2096,7 @@ struct knSetMACFromLevelset : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c) const
+ inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c)
{
if (phi.getInterpolated(Vec3(i, j, k)) > 0)
v(i, j, k) = c;
@@ -2146,37 +2116,35 @@ struct knSetMACFromLevelset : public KernelBase {
return c;
}
typedef Vec3 type2;
- void runMessage()
- {
- debMsg("Executing kernel knSetMACFromLevelset ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, v, phi, c);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, v, phi, c);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, v, phi, c);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, v, phi, c);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &v;
const Grid<Real> &phi;
const Vec3 c;
@@ -2268,7 +2236,7 @@ struct knFlipComputePotentialTrappedAir : public KernelBase {
const Real tauMax,
const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid,
- const int jtype = FlagGrid::TypeFluid) const
+ const int jtype = FlagGrid::TypeFluid)
{
if (!(flags(i, j, k) & itype))
@@ -2342,37 +2310,35 @@ struct knFlipComputePotentialTrappedAir : public KernelBase {
return jtype;
}
typedef int type8;
- void runMessage()
- {
- debMsg("Executing kernel knFlipComputePotentialTrappedAir ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &pot;
const FlagGrid &flags;
const MACGrid &v;
@@ -2472,7 +2438,7 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase {
const Real tauMin,
const Real tauMax,
const Real scaleFromManta,
- const int itype = FlagGrid::TypeFluid) const
+ const int itype = FlagGrid::TypeFluid)
{
if (!(flags(i, j, k) & itype))
@@ -2520,37 +2486,35 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase {
return itype;
}
typedef int type6;
- void runMessage()
- {
- debMsg("Executing kernel knFlipComputePotentialKineticEnergy ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Real> &pot;
const FlagGrid &flags;
const MACGrid &v;
@@ -2650,7 +2614,7 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
const Real tauMax,
const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid,
- const int jtype = FlagGrid::TypeFluid) const
+ const int jtype = FlagGrid::TypeFluid)
{
if (!(flags(i, j, k) & itype))
@@ -2736,19 +2700,41 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
return jtype;
}
typedef int type9;
- void runMessage()
- {
- debMsg("Executing kernel knFlipComputePotentialWaveCrest ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ pot,
+ flags,
+ v,
+ radius,
+ normal,
+ tauMin,
+ tauMax,
+ scaleFromManta,
+ itype,
+ jtype);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
@@ -2764,21 +2750,9 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
scaleFromManta,
itype,
jtype);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, pot, flags, v, radius, normal, tauMin, tauMax, scaleFromManta, itype, jtype);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
Grid<Real> &pot;
const FlagGrid &flags;
const MACGrid &v;
@@ -2860,7 +2834,7 @@ struct knFlipComputeSurfaceNormals : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi) const
+ inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi)
{
normal[idx] = getNormalized(normal[idx]);
}
@@ -2874,21 +2848,17 @@ struct knFlipComputeSurfaceNormals : public KernelBase {
return phi;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel knFlipComputeSurfaceNormals ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, normal, phi);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, normal, phi);
+ }
}
Grid<Vec3> &normal;
const Grid<Real> &phi;
@@ -2958,7 +2928,7 @@ struct knFlipUpdateNeighborRatio : public KernelBase {
Grid<Real> &neighborRatio,
const int radius,
const int itype = FlagGrid::TypeFluid,
- const int jtype = FlagGrid::TypeObstacle) const
+ const int jtype = FlagGrid::TypeObstacle)
{
if (!(flags(i, j, k) & itype))
@@ -3008,37 +2978,35 @@ struct knFlipUpdateNeighborRatio : public KernelBase {
return jtype;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel knFlipUpdateNeighborRatio ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, neighborRatio, radius, itype, jtype);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, neighborRatio, radius, itype, jtype);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, neighborRatio, radius, itype, jtype);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, neighborRatio, radius, itype, jtype);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &neighborRatio;
const int radius;
diff --git a/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp b/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp
index e5aa09117ea..c4be7ab3ea5 100644
--- a/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp
+++ b/extern/mantaflow/preprocessed/plugin/surfaceturbulence.cpp
@@ -569,7 +569,7 @@ struct advectSurfacePoints : public KernelBase {
inline void op(IndexInt idx,
BasicParticleSystemWrapper &surfacePoints,
const BasicParticleSystemWrapper &coarseParticles,
- const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos) const
+ const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos)
{
if (surfacePoints.isActive(idx)) {
Vec3 avgDisplacement(0, 0, 0);
@@ -606,21 +606,17 @@ struct advectSurfacePoints : public KernelBase {
return coarseParticlesPrevPos;
}
typedef ParticleDataImplVec3Wrapper type2;
- void runMessage()
- {
- debMsg("Executing kernel advectSurfacePoints ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, coarseParticles, coarseParticlesPrevPos);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, coarseParticles, coarseParticlesPrevPos);
+ }
}
BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles;
@@ -673,7 +669,7 @@ struct computeSurfaceNormals : public KernelBase {
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
const BasicParticleSystemWrapper &coarseParticles,
- ParticleDataImpl<Vec3> &surfaceNormals) const
+ ParticleDataImpl<Vec3> &surfaceNormals)
{
Vec3 pos = surfacePoints.getPos(idx);
@@ -743,21 +739,17 @@ struct computeSurfaceNormals : public KernelBase {
return surfaceNormals;
}
typedef ParticleDataImpl<Vec3> type2;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceNormals ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, coarseParticles, surfaceNormals);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, coarseParticles, surfaceNormals);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles;
@@ -780,7 +772,7 @@ struct computeAveragedNormals : public KernelBase {
}
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
- const ParticleDataImpl<Vec3> &surfaceNormals) const
+ const ParticleDataImpl<Vec3> &surfaceNormals)
{
Vec3 pos = surfacePoints.getPos(idx);
Vec3 newNormal = Vec3(0, 0, 0);
@@ -800,21 +792,17 @@ struct computeAveragedNormals : public KernelBase {
return surfaceNormals;
}
typedef ParticleDataImpl<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel computeAveragedNormals ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -832,7 +820,7 @@ struct assignNormals : public KernelBase {
}
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
- ParticleDataImpl<Vec3> &surfaceNormals) const
+ ParticleDataImpl<Vec3> &surfaceNormals)
{
surfaceNormals[idx] = tempSurfaceVec3[idx];
}
@@ -846,21 +834,17 @@ struct assignNormals : public KernelBase {
return surfaceNormals;
}
typedef ParticleDataImpl<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel assignNormals ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Vec3> &surfaceNormals;
@@ -963,7 +947,7 @@ struct computeSurfaceDensities : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy) const
+ inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy)
{
Vec3 pos = surfacePoints.getPos(idx);
Real density = 0;
@@ -984,21 +968,17 @@ struct computeSurfaceDensities : public KernelBase {
return dummy;
}
typedef void type1;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceDensities ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, dummy);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, dummy);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
void *dummy;
@@ -1016,7 +996,7 @@ struct computeSurfaceDisplacements : public KernelBase {
}
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
- const ParticleDataImpl<Vec3> &surfaceNormals) const
+ const ParticleDataImpl<Vec3> &surfaceNormals)
{
Vec3 pos = surfacePoints.getPos(idx);
Vec3 normal = surfaceNormals[idx];
@@ -1068,21 +1048,17 @@ struct computeSurfaceDisplacements : public KernelBase {
return surfaceNormals;
}
typedef ParticleDataImpl<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceDisplacements ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1095,7 +1071,7 @@ struct applySurfaceDisplacements : public KernelBase {
runMessage();
run();
}
- inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy) const
+ inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy)
{
surfacePoints.setPos(idx, surfacePoints.getPos(idx) + tempSurfaceVec3[idx]);
}
@@ -1109,21 +1085,17 @@ struct applySurfaceDisplacements : public KernelBase {
return dummy;
}
typedef void type1;
- void runMessage()
- {
- debMsg("Executing kernel applySurfaceDisplacements ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, dummy);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, dummy);
+ }
}
BasicParticleSystemWrapper &surfacePoints;
void *dummy;
@@ -1152,7 +1124,7 @@ struct constrainSurface : public KernelBase {
}
inline void op(IndexInt idx,
BasicParticleSystemWrapper &surfacePoints,
- const BasicParticleSystemWrapper &coarseParticles) const
+ const BasicParticleSystemWrapper &coarseParticles)
{
Vec3 pos = surfacePoints.getPos(idx);
Real level = computeConstraintLevel(coarseParticles, surfacePoints.getPos(idx));
@@ -1179,21 +1151,17 @@ struct constrainSurface : public KernelBase {
return coarseParticles;
}
typedef BasicParticleSystemWrapper type1;
- void runMessage()
- {
- debMsg("Executing kernel constrainSurface ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, coarseParticles);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, coarseParticles);
+ }
}
BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles;
@@ -1220,7 +1188,7 @@ struct interpolateNewWaveData : public KernelBase {
ParticleDataImpl<Real> &surfaceWaveH,
ParticleDataImpl<Real> &surfaceWaveDtH,
ParticleDataImpl<Real> &surfaceWaveSeed,
- ParticleDataImpl<Real> &surfaceWaveSeedAmplitude) const
+ ParticleDataImpl<Real> &surfaceWaveSeedAmplitude)
{
if (surfacePoints.getStatus(idx) & ParticleBase::PNEW) {
Vec3 pos = surfacePoints.getPos(idx);
@@ -1270,26 +1238,22 @@ struct interpolateNewWaveData : public KernelBase {
return surfaceWaveSeedAmplitude;
}
typedef ParticleDataImpl<Real> type4;
- void runMessage()
- {
- debMsg("Executing kernel interpolateNewWaveData ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx,
- surfacePoints,
- surfaceWaveH,
- surfaceWaveDtH,
- surfaceWaveSeed,
- surfaceWaveSeedAmplitude);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i,
+ surfacePoints,
+ surfaceWaveH,
+ surfaceWaveDtH,
+ surfaceWaveSeed,
+ surfaceWaveSeedAmplitude);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH;
@@ -1345,7 +1309,7 @@ struct addSeed : public KernelBase {
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveH,
- const ParticleDataImpl<Real> &surfaceWaveSeed) const
+ const ParticleDataImpl<Real> &surfaceWaveSeed)
{
surfaceWaveH[idx] += surfaceWaveSeed[idx];
}
@@ -1364,21 +1328,17 @@ struct addSeed : public KernelBase {
return surfaceWaveSeed;
}
typedef ParticleDataImpl<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel addSeed ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceWaveH, surfaceWaveSeed);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceWaveH, surfaceWaveSeed);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH;
@@ -1400,7 +1360,7 @@ struct computeSurfaceWaveNormal : public KernelBase {
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals,
- const ParticleDataImpl<Real> &surfaceWaveH) const
+ const ParticleDataImpl<Real> &surfaceWaveH)
{
Vec3 pos = surfacePoints.getPos(idx);
@@ -1464,21 +1424,17 @@ struct computeSurfaceWaveNormal : public KernelBase {
return surfaceWaveH;
}
typedef ParticleDataImpl<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceWaveNormal ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals, surfaceWaveH);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals, surfaceWaveH);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1500,7 +1456,7 @@ struct computeSurfaceWaveLaplacians : public KernelBase {
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals,
- const ParticleDataImpl<Real> &surfaceWaveH) const
+ const ParticleDataImpl<Real> &surfaceWaveH)
{
Real laplacian = 0;
Real wTotal = 0;
@@ -1561,21 +1517,17 @@ struct computeSurfaceWaveLaplacians : public KernelBase {
return surfaceWaveH;
}
typedef ParticleDataImpl<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceWaveLaplacians ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals, surfaceWaveH);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals, surfaceWaveH);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1600,7 +1552,7 @@ struct evolveWave : public KernelBase {
const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveH,
ParticleDataImpl<Real> &surfaceWaveDtH,
- const ParticleDataImpl<Real> &surfaceWaveSeed) const
+ const ParticleDataImpl<Real> &surfaceWaveSeed)
{
surfaceWaveDtH[idx] += params.waveSpeed * params.waveSpeed * params.dt * tempSurfaceFloat[idx];
surfaceWaveDtH[idx] /= (1 + params.dt * params.waveDamping);
@@ -1635,21 +1587,17 @@ struct evolveWave : public KernelBase {
return surfaceWaveSeed;
}
typedef ParticleDataImpl<Real> type3;
- void runMessage()
- {
- debMsg("Executing kernel evolveWave ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH;
@@ -1669,7 +1617,7 @@ struct computeSurfaceCurvature : public KernelBase {
}
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
- const ParticleDataImpl<Vec3> &surfaceNormals) const
+ const ParticleDataImpl<Vec3> &surfaceNormals)
{
Vec3 pPos = surfacePoints.getPos(idx);
Real wTotal = 0;
@@ -1710,21 +1658,17 @@ struct computeSurfaceCurvature : public KernelBase {
return surfaceNormals;
}
typedef ParticleDataImpl<Vec3> type1;
- void runMessage()
- {
- debMsg("Executing kernel computeSurfaceCurvature ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceNormals);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceNormals);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1742,7 +1686,7 @@ struct smoothCurvature : public KernelBase {
}
inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints,
- ParticleDataImpl<Real> &surfaceWaveSource) const
+ ParticleDataImpl<Real> &surfaceWaveSource)
{
Vec3 pPos = surfacePoints.getPos(idx);
Real curv = 0;
@@ -1768,21 +1712,17 @@ struct smoothCurvature : public KernelBase {
return surfaceWaveSource;
}
typedef ParticleDataImpl<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel smoothCurvature ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceWaveSource);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceWaveSource);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveSource;
@@ -1806,7 +1746,7 @@ struct seedWaves : public KernelBase {
const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveSeed,
ParticleDataImpl<Real> &surfaceWaveSeedAmplitude,
- ParticleDataImpl<Real> &surfaceWaveSource) const
+ ParticleDataImpl<Real> &surfaceWaveSource)
{
Real source = smoothstep(params.waveSeedingCurvatureThresholdRegionCenter -
params.waveSeedingCurvatureThresholdRegionRadius,
@@ -1850,21 +1790,17 @@ struct seedWaves : public KernelBase {
return surfaceWaveSource;
}
typedef ParticleDataImpl<Real> type3;
- void runMessage()
- {
- debMsg("Executing kernel seedWaves ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource);
+ }
}
const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveSeed;
diff --git a/extern/mantaflow/preprocessed/plugin/viscosity.cpp b/extern/mantaflow/preprocessed/plugin/viscosity.cpp
deleted file mode 100644
index a9e1985336e..00000000000
--- a/extern/mantaflow/preprocessed/plugin/viscosity.cpp
+++ /dev/null
@@ -1,1428 +0,0 @@
-
-
-// DO NOT EDIT !
-// This file is generated using the MantaFlow preprocessor (prep generate).
-
-/******************************************************************************
- *
- * MantaFlow fluid solver framework
- * Copyright 2020 Sebastian Barschkis, Nils Thuerey
- *
- * This program is free software, distributed under the terms of the
- * Apache License, Version 2.0
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Accurate Viscous Free Surfaces for Buckling, Coiling, and Rotating Liquids
- * Batty et al., SCA 2008
- *
- ******************************************************************************/
-
-#include "conjugategrad.h"
-#include "general.h"
-#include "grid.h"
-#include "vectorbase.h"
-
-#include <chrono>
-
-#if OPENMP == 1 || TBB == 1
-# define ENABLE_PARALLEL 0
-#endif
-
-#if ENABLE_PARALLEL == 1
-# include <thread>
-# include <algorithm>
-
-static const int manta_num_threads = std::thread::hardware_concurrency();
-
-# define parallel_block \
- { \
- std::vector<std::thread> threads; \
- {
-
-# define do_parallel threads.push_back( std::thread([&]() {
-# define do_end \
- } ) );
-
-# define block_end \
- } \
- for (auto &thread : threads) { \
- thread.join(); \
- } \
- }
-
-#endif
-
-#define FOR_INT_IJK(num) \
- for (int k_off = 0; k_off < num; ++k_off) \
- for (int j_off = 0; j_off < num; ++j_off) \
- for (int i_off = 0; i_off < num; ++i_off)
-
-using namespace std;
-
-namespace Manta {
-
-//! Assumes phi0<0 and phi1>=0, phi2>=0, and phi3>=0 or vice versa.
-//! In particular, phi0 must not equal any of phi1, phi2 or phi3.
-static Real sortedTetFraction(Real phi0, Real phi1, Real phi2, Real phi3)
-{
- return phi0 * phi0 * phi0 / ((phi0 - phi1) * (phi0 - phi2) * (phi0 - phi3));
-}
-
-//! Assumes phi0<0, phi1<0, and phi2>=0, and phi3>=0 or vice versa.
-//! In particular, phi0 and phi1 must not equal any of phi2 and phi3.
-static Real sortedPrismFraction(Real phi0, Real phi1, Real phi2, Real phi3)
-{
- Real a = phi0 / (phi0 - phi2);
- Real b = phi0 / (phi0 - phi3);
- Real c = phi1 / (phi1 - phi3);
- Real d = phi1 / (phi1 - phi2);
- return a * b * (1 - d) + b * (1 - c) * d + c * d;
-}
-
-Real volumeFraction(Real phi0, Real phi1, Real phi2, Real phi3)
-{
- sort(phi0, phi1, phi2, phi3);
- if (phi3 <= 0)
- return 1;
- else if (phi2 <= 0)
- return 1 - sortedTetFraction(phi3, phi2, phi1, phi0);
- else if (phi1 <= 0)
- return sortedPrismFraction(phi0, phi1, phi2, phi3);
- else if (phi0 <= 0)
- return sortedTetFraction(phi0, phi1, phi2, phi3);
- else
- return 0;
-}
-
-//! The average of the two possible decompositions of the cube into five tetrahedra.
-Real volumeFraction(Real phi000,
- Real phi100,
- Real phi010,
- Real phi110,
- Real phi001,
- Real phi101,
- Real phi011,
- Real phi111)
-{
- return (volumeFraction(phi000, phi001, phi101, phi011) +
- volumeFraction(phi000, phi101, phi100, phi110) +
- volumeFraction(phi000, phi010, phi011, phi110) +
- volumeFraction(phi101, phi011, phi111, phi110) +
- 2 * volumeFraction(phi000, phi011, phi101, phi110) +
- volumeFraction(phi100, phi101, phi001, phi111) +
- volumeFraction(phi100, phi001, phi000, phi010) +
- volumeFraction(phi100, phi110, phi111, phi010) +
- volumeFraction(phi001, phi111, phi011, phi010) +
- 2 * volumeFraction(phi100, phi111, phi001, phi010)) /
- 12;
-}
-
-//! Kernel loop over grid with 2x base resolution!
-
-struct KnEstimateVolumeFraction : public KernelBase {
- KnEstimateVolumeFraction(Grid<Real> &volumes,
- const Grid<Real> &phi,
- const Vec3 &startCentre,
- const Real dx)
- : KernelBase(&volumes, 0), volumes(volumes), phi(phi), startCentre(startCentre), dx(dx)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- Grid<Real> &volumes,
- const Grid<Real> &phi,
- const Vec3 &startCentre,
- const Real dx) const
- {
- const Vec3 centre = startCentre + Vec3(i, j, k) * 0.5;
- const Real offset = 0.5 * dx;
- const int order = 1; // is sufficient
-
- Real phi000 = phi.getInterpolatedHi(centre + Vec3(-offset, -offset, -offset), order);
- Real phi001 = phi.getInterpolatedHi(centre + Vec3(-offset, -offset, +offset), order);
- Real phi010 = phi.getInterpolatedHi(centre + Vec3(-offset, +offset, -offset), order);
- Real phi011 = phi.getInterpolatedHi(centre + Vec3(-offset, +offset, +offset), order);
- Real phi100 = phi.getInterpolatedHi(centre + Vec3(+offset, -offset, -offset), order);
- Real phi101 = phi.getInterpolatedHi(centre + Vec3(+offset, -offset, +offset), order);
- Real phi110 = phi.getInterpolatedHi(centre + Vec3(+offset, +offset, -offset), order);
- Real phi111 = phi.getInterpolatedHi(centre + Vec3(+offset, +offset, +offset), order);
-
- volumes(i, j, k) = volumeFraction(
- phi000, phi100, phi010, phi110, phi001, phi101, phi011, phi111);
- }
- inline Grid<Real> &getArg0()
- {
- return volumes;
- }
- typedef Grid<Real> type0;
- inline const Grid<Real> &getArg1()
- {
- return phi;
- }
- typedef Grid<Real> type1;
- inline const Vec3 &getArg2()
- {
- return startCentre;
- }
- typedef Vec3 type2;
- inline const Real &getArg3()
- {
- return dx;
- }
- typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel KnEstimateVolumeFraction ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, volumes, phi, startCentre, dx);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, volumes, phi, startCentre, dx);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
- Grid<Real> &volumes;
- const Grid<Real> &phi;
- const Vec3 &startCentre;
- const Real dx;
-};
-
-struct KnUpdateVolumeGrid : public KernelBase {
- KnUpdateVolumeGrid(Grid<Real> &cVolLiquid,
- Grid<Real> &uVolLiquid,
- Grid<Real> &vVolLiquid,
- Grid<Real> &wVolLiquid,
- Grid<Real> &exVolLiquid,
- Grid<Real> &eyVolLiquid,
- Grid<Real> &ezVolLiquid,
- const Grid<Real> &src)
- : KernelBase(&cVolLiquid, 0),
- cVolLiquid(cVolLiquid),
- uVolLiquid(uVolLiquid),
- vVolLiquid(vVolLiquid),
- wVolLiquid(wVolLiquid),
- exVolLiquid(exVolLiquid),
- eyVolLiquid(eyVolLiquid),
- ezVolLiquid(ezVolLiquid),
- src(src)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- Grid<Real> &cVolLiquid,
- Grid<Real> &uVolLiquid,
- Grid<Real> &vVolLiquid,
- Grid<Real> &wVolLiquid,
- Grid<Real> &exVolLiquid,
- Grid<Real> &eyVolLiquid,
- Grid<Real> &ezVolLiquid,
- const Grid<Real> &src) const
- {
- // Work out c
- cVolLiquid(i, j, k) = 0;
- FOR_INT_IJK(2)
- {
- cVolLiquid(i, j, k) += src(2 * i + i_off, 2 * j + j_off, 2 * k + k_off);
- }
- cVolLiquid(i, j, k) /= 8;
-
- // Work out u
- if (i >= 1) {
- uVolLiquid(i, j, k) = 0;
- int base_i = 2 * i - 1;
- int base_j = 2 * j;
- int base_k = 2 * k;
- FOR_INT_IJK(2)
- {
- uVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- uVolLiquid(i, j, k) /= 8;
- }
-
- // v
- if (j >= 1) {
- vVolLiquid(i, j, k) = 0;
- int base_i = 2 * i;
- int base_j = 2 * j - 1;
- int base_k = 2 * k;
- FOR_INT_IJK(2)
- {
- vVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- vVolLiquid(i, j, k) /= 8;
- }
-
- // w
- if (k >= 1) {
- wVolLiquid(i, j, k) = 0;
- int base_i = 2 * i;
- int base_j = 2 * j;
- int base_k = 2 * k - 1;
- FOR_INT_IJK(2)
- {
- wVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- wVolLiquid(i, j, k) /= 8;
- }
-
- // e-x
- if (j >= 1 && k >= 1) {
- exVolLiquid(i, j, k) = 0;
- int base_i = 2 * i;
- int base_j = 2 * j - 1;
- int base_k = 2 * k - 1;
- FOR_INT_IJK(2)
- {
- exVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- exVolLiquid(i, j, k) /= 8;
- }
-
- // e-y
- if (i >= 1 && k >= 1) {
- eyVolLiquid(i, j, k) = 0;
- int base_i = 2 * i - 1;
- int base_j = 2 * j;
- int base_k = 2 * k - 1;
- FOR_INT_IJK(2)
- {
- eyVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- eyVolLiquid(i, j, k) /= 8;
- }
-
- // e-z
- if (i >= 1 && j >= 1) {
- ezVolLiquid(i, j, k) = 0;
- int base_i = 2 * i - 1;
- int base_j = 2 * j - 1;
- int base_k = 2 * k;
- FOR_INT_IJK(2)
- {
- ezVolLiquid(i, j, k) += src(base_i + i_off, base_j + j_off, base_k + k_off);
- }
- ezVolLiquid(i, j, k) /= 8;
- }
- }
- inline Grid<Real> &getArg0()
- {
- return cVolLiquid;
- }
- typedef Grid<Real> type0;
- inline Grid<Real> &getArg1()
- {
- return uVolLiquid;
- }
- typedef Grid<Real> type1;
- inline Grid<Real> &getArg2()
- {
- return vVolLiquid;
- }
- typedef Grid<Real> type2;
- inline Grid<Real> &getArg3()
- {
- return wVolLiquid;
- }
- typedef Grid<Real> type3;
- inline Grid<Real> &getArg4()
- {
- return exVolLiquid;
- }
- typedef Grid<Real> type4;
- inline Grid<Real> &getArg5()
- {
- return eyVolLiquid;
- }
- typedef Grid<Real> type5;
- inline Grid<Real> &getArg6()
- {
- return ezVolLiquid;
- }
- typedef Grid<Real> type6;
- inline const Grid<Real> &getArg7()
- {
- return src;
- }
- typedef Grid<Real> type7;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateVolumeGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i,
- j,
- k,
- cVolLiquid,
- uVolLiquid,
- vVolLiquid,
- wVolLiquid,
- exVolLiquid,
- eyVolLiquid,
- ezVolLiquid,
- src);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i,
- j,
- k,
- cVolLiquid,
- uVolLiquid,
- vVolLiquid,
- wVolLiquid,
- exVolLiquid,
- eyVolLiquid,
- ezVolLiquid,
- src);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
- Grid<Real> &cVolLiquid;
- Grid<Real> &uVolLiquid;
- Grid<Real> &vVolLiquid;
- Grid<Real> &wVolLiquid;
- Grid<Real> &exVolLiquid;
- Grid<Real> &eyVolLiquid;
- Grid<Real> &ezVolLiquid;
- const Grid<Real> &src;
-};
-
-void computeWeights(const Grid<Real> &phi,
- Grid<Real> &doubleSized,
- Grid<Real> &cVolLiquid,
- Grid<Real> &uVolLiquid,
- Grid<Real> &vVolLiquid,
- Grid<Real> &wVolLiquid,
- Grid<Real> &exVolLiquid,
- Grid<Real> &eyVolLiquid,
- Grid<Real> &ezVolLiquid,
- Real dx)
-{
- KnEstimateVolumeFraction(doubleSized, phi, Vec3(0.25 * dx, 0.25 * dx, 0.25 * dx), 0.5 * dx);
- KnUpdateVolumeGrid(cVolLiquid,
- uVolLiquid,
- vVolLiquid,
- wVolLiquid,
- exVolLiquid,
- eyVolLiquid,
- ezVolLiquid,
- doubleSized);
-}
-
-struct KnUpdateFaceStates : public KernelBase {
- KnUpdateFaceStates(const FlagGrid &flags,
- Grid<int> &uState,
- Grid<int> &vState,
- Grid<int> &wState)
- : KernelBase(&flags, 0), flags(flags), uState(uState), vState(vState), wState(wState)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- const FlagGrid &flags,
- Grid<int> &uState,
- Grid<int> &vState,
- Grid<int> &wState) const
- {
- bool curObs = flags.isObstacle(i, j, k);
- uState(i, j, k) = (i > 0 && !flags.isObstacle(i - 1, j, k) && !curObs) ?
- FlagGrid::TypeFluid :
- FlagGrid::TypeObstacle;
- vState(i, j, k) = (j > 0 && !flags.isObstacle(i, j - 1, k) && !curObs) ?
- FlagGrid::TypeFluid :
- FlagGrid::TypeObstacle;
- wState(i, j, k) = (k > 0 && !flags.isObstacle(i, j, k - 1) && !curObs) ?
- FlagGrid::TypeFluid :
- FlagGrid::TypeObstacle;
- }
- inline const FlagGrid &getArg0()
- {
- return flags;
- }
- typedef FlagGrid type0;
- inline Grid<int> &getArg1()
- {
- return uState;
- }
- typedef Grid<int> type1;
- inline Grid<int> &getArg2()
- {
- return vState;
- }
- typedef Grid<int> type2;
- inline Grid<int> &getArg3()
- {
- return wState;
- }
- typedef Grid<int> type3;
- void runMessage()
- {
- debMsg("Executing kernel KnUpdateFaceStates ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, uState, vState, wState);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, uState, vState, wState);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
- const FlagGrid &flags;
- Grid<int> &uState;
- Grid<int> &vState;
- Grid<int> &wState;
-};
-
-struct KnApplyVelocities : public KernelBase {
- KnApplyVelocities(MACGrid &dst,
- const Grid<int> &uState,
- const Grid<int> &vState,
- const Grid<int> &wState,
- Grid<Real> &srcU,
- Grid<Real> &srcV,
- Grid<Real> &srcW)
- : KernelBase(&dst, 0),
- dst(dst),
- uState(uState),
- vState(vState),
- wState(wState),
- srcU(srcU),
- srcV(srcV),
- srcW(srcW)
- {
- runMessage();
- run();
- }
- inline void op(int i,
- int j,
- int k,
- MACGrid &dst,
- const Grid<int> &uState,
- const Grid<int> &vState,
- const Grid<int> &wState,
- Grid<Real> &srcU,
- Grid<Real> &srcV,
- Grid<Real> &srcW) const
- {
- dst(i, j, k).x = (uState(i, j, k) == FlagGrid::TypeFluid) ? srcU(i, j, k) : 0;
- dst(i, j, k).y = (vState(i, j, k) == FlagGrid::TypeFluid) ? srcV(i, j, k) : 0;
- if (dst.is3D())
- dst(i, j, k).z = (wState(i, j, k) == FlagGrid::TypeFluid) ? srcW(i, j, k) : 0;
- }
- inline MACGrid &getArg0()
- {
- return dst;
- }
- typedef MACGrid type0;
- inline const Grid<int> &getArg1()
- {
- return uState;
- }
- typedef Grid<int> type1;
- inline const Grid<int> &getArg2()
- {
- return vState;
- }
- typedef Grid<int> type2;
- inline const Grid<int> &getArg3()
- {
- return wState;
- }
- typedef Grid<int> type3;
- inline Grid<Real> &getArg4()
- {
- return srcU;
- }
- typedef Grid<Real> type4;
- inline Grid<Real> &getArg5()
- {
- return srcV;
- }
- typedef Grid<Real> type5;
- inline Grid<Real> &getArg6()
- {
- return srcW;
- }
- typedef Grid<Real> type6;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyVelocities ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- const int _maxX = maxX;
- const int _maxY = maxY;
- if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, dst, uState, vState, wState, srcU, srcV, srcW);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, dst, uState, vState, wState, srcU, srcV, srcW);
- }
- }
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
- MACGrid &dst;
- const Grid<int> &uState;
- const Grid<int> &vState;
- const Grid<int> &wState;
- Grid<Real> &srcU;
- Grid<Real> &srcV;
- Grid<Real> &srcW;
-};
-
-void solveViscosity(const FlagGrid &flags,
- MACGrid &vel,
- Grid<Real> &cVolLiquid,
- Grid<Real> &uVolLiquid,
- Grid<Real> &vVolLiquid,
- Grid<Real> &wVolLiquid,
- Grid<Real> &exVolLiquid,
- Grid<Real> &eyVolLiquid,
- Grid<Real> &ezVolLiquid,
- Grid<Real> &viscosity,
- const Real dt,
- const Real dx,
- const Real cgAccuracy,
- const Real cgMaxIterFac)
-{
- const Real factor = dt * square(1.0 / dx);
- const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4);
- GridCg<ApplyMatrixViscosityU> *uGcg;
- GridCg<ApplyMatrixViscosityV> *vGcg;
- GridCg<ApplyMatrixViscosityW> *wGcg;
-
- // Tmp grids for CG solve in U, V, W dimensions
- FluidSolver *parent = flags.getParent();
- Grid<Real> uResidual(parent);
- Grid<Real> vResidual(parent);
- Grid<Real> wResidual(parent);
- Grid<Real> uSearch(parent);
- Grid<Real> vSearch(parent);
- Grid<Real> wSearch(parent);
- Grid<Real> uTmp(parent);
- Grid<Real> vTmp(parent);
- Grid<Real> wTmp(parent);
- Grid<Real> uRhs(parent);
- Grid<Real> vRhs(parent);
- Grid<Real> wRhs(parent);
-
- // A matrix U grids
- Grid<Real> uA0(parent); // diagonal elements in A matrix
- Grid<Real> uAplusi(parent); // neighbor at i+1
- Grid<Real> uAplusj(parent); // neighbor at j+1
- Grid<Real> uAplusk(parent); // neighbor at k+1
- Grid<Real> uAminusi(parent); // neighbor at i-1
- Grid<Real> uAminusj(parent); // neighbor at j-1
- Grid<Real> uAminusk(parent); // neighbor at k-1
- Grid<Real> uAhelper1(parent); // additional helper grids for off diagonal elements
- Grid<Real> uAhelper2(parent);
- Grid<Real> uAhelper3(parent);
- Grid<Real> uAhelper4(parent);
- Grid<Real> uAhelper5(parent);
- Grid<Real> uAhelper6(parent);
- Grid<Real> uAhelper7(parent);
- Grid<Real> uAhelper8(parent);
-
- // A matrix V grids
- Grid<Real> vA0(parent);
- Grid<Real> vAplusi(parent);
- Grid<Real> vAplusj(parent);
- Grid<Real> vAplusk(parent);
- Grid<Real> vAminusi(parent);
- Grid<Real> vAminusj(parent);
- Grid<Real> vAminusk(parent);
- Grid<Real> vAhelper1(parent);
- Grid<Real> vAhelper2(parent);
- Grid<Real> vAhelper3(parent);
- Grid<Real> vAhelper4(parent);
- Grid<Real> vAhelper5(parent);
- Grid<Real> vAhelper6(parent);
- Grid<Real> vAhelper7(parent);
- Grid<Real> vAhelper8(parent);
-
- // A matrix W grids
- Grid<Real> wA0(parent);
- Grid<Real> wAplusi(parent);
- Grid<Real> wAplusj(parent);
- Grid<Real> wAplusk(parent);
- Grid<Real> wAminusi(parent);
- Grid<Real> wAminusj(parent);
- Grid<Real> wAminusk(parent);
- Grid<Real> wAhelper1(parent);
- Grid<Real> wAhelper2(parent);
- Grid<Real> wAhelper3(parent);
- Grid<Real> wAhelper4(parent);
- Grid<Real> wAhelper5(parent);
- Grid<Real> wAhelper6(parent);
- Grid<Real> wAhelper7(parent);
- Grid<Real> wAhelper8(parent);
-
- // Solution grids for CG solvers
- Grid<Real> uSolution(parent);
- Grid<Real> vSolution(parent);
- Grid<Real> wSolution(parent);
-
- // Save state of voxel face (fluid or obstacle)
- Grid<int> uState(parent);
- Grid<int> vState(parent);
- Grid<int> wState(parent);
-
- // Save state of voxel face (fluid or obstacle)
- KnUpdateFaceStates(flags, uState, vState, wState);
-
- // Shorter names for flags, we will use them often
- int isFluid = FlagGrid::TypeFluid;
- int isObstacle = FlagGrid::TypeObstacle;
-
- // Main viscosity loop: construct A matrices and rhs's in all dimensions
- FOR_IJK_BND(flags, 1)
- {
-
- // U-terms: 2u_xx+ v_xy +uyy + u_zz + w_xz
- if (uState(i, j, k) == isFluid) {
-
- uRhs(i, j, k) = uVolLiquid(i, j, k) * vel(i, j, k).x;
- uA0(i, j, k) = uVolLiquid(i, j, k);
-
- Real viscRight = viscosity(i, j, k);
- Real viscLeft = viscosity(i - 1, j, k);
- Real volRight = cVolLiquid(i, j, k);
- Real volLeft = cVolLiquid(i - 1, j, k);
-
- Real viscTop = 0.25 * (viscosity(i - 1, j + 1, k) + viscosity(i - 1, j, k) +
- viscosity(i, j + 1, k) + viscosity(i, j, k));
- Real viscBottom = 0.25 * (viscosity(i - 1, j, k) + viscosity(i - 1, j - 1, k) +
- viscosity(i, j, k) + viscosity(i, j - 1, k));
- Real volTop = ezVolLiquid(i, j + 1, k);
- Real volBottom = ezVolLiquid(i, j, k);
-
- Real viscFront = 0.25 * (viscosity(i - 1, j, k + 1) + viscosity(i - 1, j, k) +
- viscosity(i, j, k + 1) + viscosity(i, j, k));
- Real viscBack = 0.25 * (viscosity(i - 1, j, k) + viscosity(i - 1, j, k - 1) +
- viscosity(i, j, k) + viscosity(i, j, k - 1));
- Real volFront = eyVolLiquid(i, j, k + 1);
- Real volBack = eyVolLiquid(i, j, k);
-
- Real factorRight = 2 * factor * viscRight * volRight;
- Real factorLeft = 2 * factor * viscLeft * volLeft;
- Real factorTop = factor * viscTop * volTop;
- Real factorBottom = factor * viscBottom * volBottom;
- Real factorFront = factor * viscFront * volFront;
- Real factorBack = factor * viscBack * volBack;
-
- // u_x_right
- uA0(i, j, k) += factorRight;
- if (uState(i + 1, j, k) == isFluid) {
- uAplusi(i, j, k) += -factorRight;
- }
- else if (uState(i + 1, j, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight;
- }
-
- // u_x_left
- uA0(i, j, k) += factorLeft;
- if (uState(i - 1, j, k) == isFluid) {
- uAminusi(i, j, k) += -factorLeft;
- }
- else if (uState(i - 1, j, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i - 1, j, k).x * factorLeft;
- }
-
- // u_y_top
- uA0(i, j, k) += factorTop;
- if (uState(i, j + 1, k) == isFluid) {
- uAplusj(i, j, k) += -factorTop;
- }
- else if (uState(i, j + 1, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j + 1, k).x * factorTop;
- }
-
- // u_y_bottom
- uA0(i, j, k) += factorBottom;
- if (uState(i, j - 1, k) == isFluid) {
- uAminusj(i, j, k) += -factorBottom;
- }
- else if (uState(i, j - 1, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j - 1, k).x * factorBottom;
- }
-
- // u_z_front
- uA0(i, j, k) += factorFront;
- if (uState(i, j, k + 1) == isFluid) {
- uAplusk(i, j, k) += -factorFront;
- }
- else if (uState(i, j, k + 1) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j, k + 1).x * factorFront;
- }
-
- // u_z_back
- uA0(i, j, k) += factorBack;
- if (uState(i, j, k - 1) == isFluid) {
- uAminusk(i, j, k) += -factorBack;
- }
- else if (uState(i, j, k - 1) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j, k - 1).x * factorBack;
- }
-
- // v_x_top
- if (vState(i, j + 1, k) == isFluid) {
- uAhelper1(i, j, k) += -factorTop;
- }
- else if (vState(i, j + 1, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop;
- }
-
- if (vState(i - 1, j + 1, k) == isFluid) {
- uAhelper2(i, j, k) += factorTop;
- }
- else if (vState(i - 1, j + 1, k) == isObstacle) {
- uRhs(i, j, k) -= vel(i - 1, j + 1, k).y * factorTop;
- }
-
- // v_x_bottom
- if (vState(i, j, k) == isFluid) {
- uAhelper3(i, j, k) += factorBottom;
- }
- else if (vState(i, j, k) == isObstacle) {
- uRhs(i, j, k) -= vel(i, j, k).y * factorBottom;
- }
-
- if (vState(i - 1, j, k) == isFluid) {
- uAhelper4(i, j, k) += -factorBottom;
- }
- else if (vState(i - 1, j, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i - 1, j, k).y * factorBottom;
- }
-
- // w_x_front
- if (wState(i, j, k + 1) == isFluid) {
- uAhelper5(i, j, k) += -factorFront;
- }
- else if (wState(i, j, k + 1) == isObstacle) {
- uRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront;
- }
-
- if (wState(i - 1, j, k + 1) == isFluid) {
- uAhelper6(i, j, k) += factorFront;
- }
- else if (wState(i - 1, j, k + 1) == isObstacle) {
- uRhs(i, j, k) -= vel(i - 1, j, k + 1).z * factorFront;
- }
-
- // w_x_back
- if (wState(i, j, k) == isFluid) {
- uAhelper7(i, j, k) += factorBack;
- }
- else if (wState(i, j, k) == isObstacle) {
- uRhs(i, j, k) -= vel(i, j, k).z * factorBack;
- }
-
- if (wState(i - 1, j, k) == isFluid) {
- uAhelper8(i, j, k) += -factorBack;
- }
- else if (wState(i - 1, j, k) == isObstacle) {
- uRhs(i, j, k) -= -vel(i - 1, j, k).z * factorBack;
- }
- }
-
- // V-terms: vxx + 2vyy + vzz + u_yx + w_yz
- if (vState(i, j, k) == isFluid) {
-
- vRhs(i, j, k) = vVolLiquid(i, j, k) * vel(i, j, k).y;
- vA0(i, j, k) = vVolLiquid(i, j, k);
-
- Real viscRight = 0.25 * (viscosity(i, j - 1, k) + viscosity(i + 1, j - 1, k) +
- viscosity(i, j, k) + viscosity(i + 1, j, k));
- Real viscLeft = 0.25 * (viscosity(i, j - 1, k) + viscosity(i - 1, j - 1, k) +
- viscosity(i, j, k) + viscosity(i - 1, j, k));
- Real volRight = ezVolLiquid(i + 1, j, k);
- Real volLeft = ezVolLiquid(i, j, k);
-
- Real viscTop = viscosity(i, j, k);
- Real viscBottom = viscosity(i, j - 1, k);
- Real volTop = cVolLiquid(i, j, k);
- Real volBottom = cVolLiquid(i, j - 1, k);
-
- Real viscFront = 0.25 * (viscosity(i, j - 1, k) + viscosity(i, j - 1, k + 1) +
- viscosity(i, j, k) + viscosity(i, j, k + 1));
- Real viscBack = 0.25 * (viscosity(i, j - 1, k) + viscosity(i, j - 1, k - 1) +
- viscosity(i, j, k) + viscosity(i, j, k - 1));
- Real volFront = exVolLiquid(i, j, k + 1);
- Real volBack = exVolLiquid(i, j, k);
-
- Real factorRight = factor * viscRight * volRight;
- Real factorLeft = factor * viscLeft * volLeft;
- Real factorTop = 2 * factor * viscTop * volTop;
- Real factorBottom = 2 * factor * viscBottom * volBottom;
- Real factorFront = factor * viscFront * volFront;
- Real factorBack = factor * viscBack * volBack;
-
- // v_x_right
- vA0(i, j, k) += factorRight;
- if (vState(i + 1, j, k) == isFluid) {
- vAplusi(i, j, k) += -factorRight;
- }
- else if (vState(i + 1, j, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i + 1, j, k).y * factorRight;
- }
-
- // v_x_left
- vA0(i, j, k) += factorLeft;
- if (vState(i - 1, j, k) == isFluid) {
- vAminusi(i, j, k) += -factorLeft;
- }
- else if (vState(i - 1, j, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i - 1, j, k).y * factorLeft;
- }
-
- // vy_top
- vA0(i, j, k) += factorTop;
- if (vState(i, j + 1, k) == isFluid) {
- vAplusj(i, j, k) += -factorTop;
- }
- else if (vState(i, j + 1, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop;
- }
-
- // vy_bottom
- vA0(i, j, k) += factorBottom;
- if (vState(i, j - 1, k) == isFluid) {
- vAminusj(i, j, k) += -factorBottom;
- }
- else if (vState(i, j - 1, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j - 1, k).y * factorBottom;
- }
-
- // v_z_front
- vA0(i, j, k) += factorFront;
- if (vState(i, j, k + 1) == isFluid) {
- vAplusk(i, j, k) += -factorFront;
- }
- else if (vState(i, j, k + 1) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j, k + 1).y * factorFront;
- }
-
- // v_z_back
- vA0(i, j, k) += factorBack;
- if (vState(i, j, k - 1) == isFluid) {
- vAminusk(i, j, k) += -factorBack;
- }
- else if (vState(i, j, k - 1) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j, k - 1).y * factorBack;
- }
-
- // u_y_right
- if (uState(i + 1, j, k) == isFluid) {
- vAhelper1(i, j, k) += -factorRight;
- }
- else if (uState(i + 1, j, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight;
- }
-
- if (uState(i + 1, j - 1, k) == isFluid) {
- vAhelper2(i, j, k) += factorRight;
- }
- else if (uState(i + 1, j - 1, k) == isObstacle) {
- vRhs(i, j, k) -= vel(i + 1, j - 1, k).x * factorRight;
- }
-
- // u_y_left
- if (uState(i, j, k) == isFluid) {
- vAhelper3(i, j, k) += factorLeft;
- }
- else if (uState(i, j, k) == isObstacle) {
- vRhs(i, j, k) -= vel(i, j, k).x * factorLeft;
- }
-
- if (uState(i, j - 1, k) == isFluid) {
- vAhelper4(i, j, k) += -factorLeft;
- }
- else if (uState(i, j - 1, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j - 1, k).x * factorLeft;
- }
-
- // w_y_front
- if (wState(i, j, k + 1) == isFluid) {
- vAhelper5(i, j, k) += -factorFront;
- }
- else if (wState(i, j, k + 1) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront;
- }
-
- if (wState(i, j - 1, k + 1) == isFluid) {
- vAhelper6(i, j, k) += factorFront;
- }
- else if (wState(i, j - 1, k + 1) == isObstacle) {
- vRhs(i, j, k) -= vel(i, j - 1, k + 1).z * factorFront;
- }
-
- // w_y_back
- if (wState(i, j, k) == isFluid) {
- vAhelper7(i, j, k) += factorBack;
- }
- else if (wState(i, j, k) == isObstacle) {
- vRhs(i, j, k) -= vel(i, j, k).z * factorBack;
- }
-
- if (wState(i, j - 1, k) == isFluid) {
- vAhelper8(i, j, k) += -factorBack;
- }
- else if (wState(i, j - 1, k) == isObstacle) {
- vRhs(i, j, k) -= -vel(i, j - 1, k).z * factorBack;
- }
- }
-
- // W-terms: wxx+ wyy+ 2wzz + u_zx + v_zy
- if (wState(i, j, k) == isFluid) {
-
- wRhs(i, j, k) = wVolLiquid(i, j, k) * vel(i, j, k).z;
- wA0(i, j, k) = wVolLiquid(i, j, k);
-
- Real viscRight = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) +
- viscosity(i + 1, j, k) + viscosity(i + 1, j, k - 1));
- Real viscLeft = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) +
- viscosity(i - 1, j, k) + viscosity(i - 1, j, k - 1));
- Real volRight = eyVolLiquid(i + 1, j, k);
- Real volLeft = eyVolLiquid(i, j, k);
-
- Real viscTop = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) + viscosity(i, j + 1, k) +
- viscosity(i, j + 1, k - 1));
- Real viscBottom = 0.25 * (viscosity(i, j, k) + viscosity(i, j, k - 1) +
- viscosity(i, j - 1, k) + viscosity(i, j - 1, k - 1));
- Real volTop = exVolLiquid(i, j + 1, k);
- Real volBottom = exVolLiquid(i, j, k);
-
- Real viscFront = viscosity(i, j, k);
- Real viscBack = viscosity(i, j, k - 1);
- Real volFront = cVolLiquid(i, j, k);
- Real volBack = cVolLiquid(i, j, k - 1);
-
- Real factorRight = factor * viscRight * volRight;
- Real factorLeft = factor * viscLeft * volLeft;
- Real factorTop = factor * viscTop * volTop;
- Real factorBottom = factor * viscBottom * volBottom;
- Real factorFront = 2 * factor * viscFront * volFront;
- Real factorBack = 2 * factor * viscBack * volBack;
-
- // w_x_right
- wA0(i, j, k) += factorRight;
- if (wState(i + 1, j, k) == isFluid) {
- wAplusi(i, j, k) += -factorRight;
- }
- else if (wState(i + 1, j, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i + 1, j, k).z * factorRight;
- }
-
- // w_x_left
- wA0(i, j, k) += factorLeft;
- if (wState(i - 1, j, k) == isFluid) {
- wAminusi(i, j, k) += -factorLeft;
- }
- else if (wState(i - 1, j, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i - 1, j, k).z * factorLeft;
- }
-
- // w_y_top
- wA0(i, j, k) += factorTop;
- if (wState(i, j + 1, k) == isFluid) {
- wAplusj(i, j, k) += -factorTop;
- }
- else if (wState(i, j + 1, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j + 1, k).z * factorTop;
- }
-
- // w_y_bottom
- wA0(i, j, k) += factorBottom;
- if (wState(i, j - 1, k) == isFluid) {
- wAminusj(i, j, k) += -factorBottom;
- }
- else if (wState(i, j - 1, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j - 1, k).z * factorBottom;
- }
-
- // w_z_front
- wA0(i, j, k) += factorFront;
- if (wState(i, j, k + 1) == isFluid) {
- wAplusk(i, j, k) += -factorFront;
- }
- else if (wState(i, j, k + 1) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j, k + 1).z * factorFront;
- }
-
- // w_z_back
- wA0(i, j, k) += factorBack;
- if (wState(i, j, k - 1) == isFluid) {
- wAminusk(i, j, k) += -factorBack;
- }
- else if (wState(i, j, k - 1) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j, k - 1).z * factorBack;
- }
-
- // u_z_right
- if (uState(i + 1, j, k) == isFluid) {
- wAhelper1(i, j, k) += -factorRight;
- }
- else if (uState(i + 1, j, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i + 1, j, k).x * factorRight;
- }
-
- if (uState(i + 1, j, k - 1) == isFluid) {
- wAhelper2(i, j, k) += factorRight;
- }
- else if (uState(i + 1, j, k - 1) == isObstacle) {
- wRhs(i, j, k) -= vel(i + 1, j, k - 1).x * factorRight;
- }
-
- // u_z_left
- if (uState(i, j, k) == isFluid) {
- wAhelper3(i, j, k) += factorLeft;
- }
- else if (uState(i, j, k) == isObstacle) {
- wRhs(i, j, k) -= vel(i, j, k).x * factorLeft;
- }
-
- if (uState(i, j, k - 1) == isFluid) {
- wAhelper4(i, j, k) += -factorLeft;
- }
- else if (uState(i, j, k - 1) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j, k - 1).x * factorLeft;
- }
-
- // v_z_top
- if (vState(i, j + 1, k) == isFluid) {
- wAhelper5(i, j, k) += -factorTop;
- }
- else if (vState(i, j + 1, k) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j + 1, k).y * factorTop;
- }
-
- if (vState(i, j + 1, k - 1) == isFluid) {
- wAhelper6(i, j, k) += factorTop;
- }
- else if (vState(i, j + 1, k - 1) == isObstacle) {
- wRhs(i, j, k) -= vel(i, j + 1, k - 1).y * factorTop;
- }
-
- // v_z_bottom
- if (vState(i, j, k) == isFluid) {
- wAhelper7(i, j, k) += factorBottom;
- }
- else if (vState(i, j, k) == isObstacle) {
- wRhs(i, j, k) -= vel(i, j, k).y * factorBottom;
- }
-
- if (vState(i, j, k - 1) == isFluid) {
- wAhelper8(i, j, k) += -factorBottom;
- }
- else if (vState(i, j, k - 1) == isObstacle) {
- wRhs(i, j, k) -= -vel(i, j, k - 1).y * factorBottom;
- }
- }
- }
-
- // CG solver for U
- if (flags.is3D()) {
- vector<Grid<Real> *> uMatA{&uA0,
- &uAplusi,
- &uAplusj,
- &uAplusk,
- &uAminusi,
- &uAminusj,
- &uAminusk,
- &uAhelper1,
- &uAhelper2,
- &uAhelper3,
- &uAhelper4,
- &uAhelper5,
- &uAhelper6,
- &uAhelper7,
- &uAhelper8};
- vector<Grid<Real> *> uVecRhs{&vRhs, &wRhs};
- uGcg = new GridCg<ApplyMatrixViscosityU>(
- uSolution, uRhs, uResidual, uSearch, flags, uTmp, uMatA, uVecRhs);
- }
- else {
- errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver");
- }
-
- // CG solver for V
- if (flags.is3D()) {
- vector<Grid<Real> *> vMatA{&vA0,
- &vAplusi,
- &vAplusj,
- &vAplusk,
- &vAminusi,
- &vAminusj,
- &vAminusk,
- &vAhelper1,
- &vAhelper2,
- &vAhelper3,
- &vAhelper4,
- &vAhelper5,
- &vAhelper6,
- &vAhelper7,
- &vAhelper8};
- vector<Grid<Real> *> vVecRhs{&uRhs, &wRhs};
- vGcg = new GridCg<ApplyMatrixViscosityV>(
- vSolution, vRhs, vResidual, vSearch, flags, vTmp, vMatA, vVecRhs);
- }
- else {
- errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver");
- }
-
- // CG solver for W
- if (flags.is3D()) {
- vector<Grid<Real> *> wMatA{&wA0,
- &wAplusi,
- &wAplusj,
- &wAplusk,
- &wAminusi,
- &wAminusj,
- &wAminusk,
- &wAhelper1,
- &wAhelper2,
- &wAhelper3,
- &wAhelper4,
- &wAhelper5,
- &wAhelper6,
- &wAhelper7,
- &wAhelper8};
- vector<Grid<Real> *> wVecRhs{&uRhs, &vRhs};
- wGcg = new GridCg<ApplyMatrixViscosityW>(
- wSolution, wRhs, wResidual, wSearch, flags, wTmp, wMatA, wVecRhs);
- }
- else {
- errMsg("Viscosity: 2D Matrix application not yet supported in viscosity solver");
- }
-
- // Same accuracy for all dimensions
- uGcg->setAccuracy(cgAccuracy);
- vGcg->setAccuracy(cgAccuracy);
- wGcg->setAccuracy(cgAccuracy);
-
- // CG solve. Preconditioning not supported yet. Instead, U, V, W can optionally be solved in
- // parallel.
- for (int uIter = 0, vIter = 0, wIter = 0; uIter < maxIter || vIter < maxIter || wIter < maxIter;
- uIter++, vIter++, wIter++) {
-#if ENABLE_PARALLEL == 1
- parallel_block do_parallel
-#endif
- if (uIter < maxIter && !uGcg->iterate()) uIter = maxIter;
-#if ENABLE_PARALLEL == 1
- do_end do_parallel
-#endif
- if (vIter < maxIter && !vGcg->iterate()) vIter = maxIter;
-#if ENABLE_PARALLEL == 1
- do_end do_parallel
-#endif
- if (wIter < maxIter && !wGcg->iterate()) wIter = maxIter;
-#if ENABLE_PARALLEL == 1
- do_end block_end
-#endif
-
- // Make sure that next CG iteration has updated rhs grids
- uRhs.copyFrom(uSearch);
- vRhs.copyFrom(vSearch);
- wRhs.copyFrom(wSearch);
- }
- debMsg(
- "Viscosity: solveViscosity() done. "
- "Iterations (u,v,w): ("
- << uGcg->getIterations() << "," << vGcg->getIterations() << "," << wGcg->getIterations()
- << "), "
- "Residual (u,v,w): ("
- << uGcg->getResNorm() << "," << vGcg->getResNorm() << "," << wGcg->getResNorm() << ")",
- 2);
-
- delete uGcg;
- delete vGcg;
- delete wGcg;
-
- // Apply solutions to global velocity grid
- KnApplyVelocities(vel, uState, vState, wState, uSolution, vSolution, wSolution);
-}
-
-//! To use the viscosity plugin, scenes must call this function before solving pressure.
-//! Note that the 'volumes' grid uses 2x the base resolution
-
-void applyViscosity(const FlagGrid &flags,
- const Grid<Real> &phi,
- MACGrid &vel,
- Grid<Real> &volumes,
- Grid<Real> &viscosity,
- const Real cgAccuracy = 1e-9,
- const Real cgMaxIterFac = 1.5)
-{
- const Real dx = flags.getParent()->getDx();
- const Real dt = flags.getParent()->getDt();
-
- // Reserve temp grids for volume weight calculation
- FluidSolver *parent = flags.getParent();
- Grid<Real> cVolLiquid(parent);
- Grid<Real> uVolLiquid(parent);
- Grid<Real> vVolLiquid(parent);
- Grid<Real> wVolLiquid(parent);
- Grid<Real> exVolLiquid(parent);
- Grid<Real> eyVolLiquid(parent);
- Grid<Real> ezVolLiquid(parent);
-
- // Ensure final weight grid gets cleared at every step
- volumes.clear();
-
- // Save viscous fluid volume in double-sized volumes grid
- computeWeights(phi,
- volumes,
- cVolLiquid,
- uVolLiquid,
- vVolLiquid,
- wVolLiquid,
- exVolLiquid,
- eyVolLiquid,
- ezVolLiquid,
- dx);
-
- // Set up A matrix and rhs. Solve with CG. Update velocity grid.
- solveViscosity(flags,
- vel,
- cVolLiquid,
- uVolLiquid,
- vVolLiquid,
- wVolLiquid,
- exVolLiquid,
- eyVolLiquid,
- ezVolLiquid,
- viscosity,
- dt,
- dx,
- cgAccuracy,
- cgMaxIterFac);
-}
-static PyObject *_W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
-{
- try {
- PbArgs _args(_linargs, _kwds);
- FluidSolver *parent = _args.obtainParent();
- bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
- pbPreparePlugin(parent, "applyViscosity", !noTiming);
- PyObject *_retval = nullptr;
- {
- ArgLocker _lock;
- const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 0, &_lock);
- const Grid<Real> &phi = *_args.getPtr<Grid<Real>>("phi", 1, &_lock);
- MACGrid &vel = *_args.getPtr<MACGrid>("vel", 2, &_lock);
- Grid<Real> &volumes = *_args.getPtr<Grid<Real>>("volumes", 3, &_lock);
- Grid<Real> &viscosity = *_args.getPtr<Grid<Real>>("viscosity", 4, &_lock);
- const Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 5, 1e-9, &_lock);
- const Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 6, 1.5, &_lock);
- _retval = getPyNone();
- applyViscosity(flags, phi, vel, volumes, viscosity, cgAccuracy, cgMaxIterFac);
- _args.check();
- }
- pbFinalizePlugin(parent, "applyViscosity", !noTiming);
- return _retval;
- }
- catch (std::exception &e) {
- pbSetError("applyViscosity", e.what());
- return 0;
- }
-}
-static const Pb::Register _RP_applyViscosity("", "applyViscosity", _W_0);
-extern "C" {
-void PbRegister_applyViscosity()
-{
- KEEP_UNUSED(_RP_applyViscosity);
-}
-}
-
-} // namespace Manta
-
-#if ENABLE_PARALLEL == 1
-
-# undef parallel_block
-# undef do_parallel
-# undef do_end
-# undef block_end
-# undef parallel_for
-# undef parallel_end
-
-#endif
diff --git a/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp b/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp
index 18222c4ccda..6386e835447 100644
--- a/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp
+++ b/extern/mantaflow/preprocessed/plugin/vortexplugins.cpp
@@ -188,8 +188,7 @@ struct KnAcceleration : public KernelBase {
runMessage();
run();
}
- inline void op(
- IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt) const
+ inline void op(IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt)
{
a[idx] = (v1[idx] - v0[idx]) * idt;
}
@@ -213,21 +212,17 @@ struct KnAcceleration : public KernelBase {
return idt;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel KnAcceleration ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, a, v1, v0, idt);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, a, v1, v0, idt);
+ }
}
MACGrid &a;
const MACGrid &v1;
@@ -576,18 +571,17 @@ void VICintegration(VortexSheetMesh &mesh,
// prepare CG solver
const int maxIter = (int)(cgMaxIterFac * vel.getSize().max());
- vector<Grid<Real> *> matA{&A0, &Ai, &Aj, &Ak};
-
GridCgInterface *gcg = new GridCg<ApplyMatrix>(
- solution, rhs, residual, search, flags, temp1, matA);
+ solution, rhs, residual, search, flags, temp1, &A0, &Ai, &Aj, &Ak);
gcg->setAccuracy(cgAccuracy);
gcg->setUseL2Norm(true);
gcg->setICPreconditioner(
(GridCgInterface::PreconditionType)precondition, &pca0, &pca1, &pca2, &pca3);
// iterations
+ Real time = 0;
for (int iter = 0; iter < maxIter; iter++) {
- if (!gcg->iterate())
+ if (!gcg->iterate(time))
iter = maxIter;
}
debMsg("VICintegration CG iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(),
diff --git a/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp b/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp
index 7d867542132..20e6a098d72 100644
--- a/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp
+++ b/extern/mantaflow/preprocessed/plugin/waveletturbulence.cpp
@@ -170,7 +170,7 @@ struct KnInterpolateMACGrid : public KernelBase {
const MACGrid &source,
const Vec3 &sourceFactor,
const Vec3 &off,
- int orderSpace) const
+ int orderSpace)
{
Vec3 pos = Vec3(i, j, k) * sourceFactor + off;
@@ -207,37 +207,35 @@ struct KnInterpolateMACGrid : public KernelBase {
return orderSpace;
}
typedef int type4;
- void runMessage()
- {
- debMsg("Executing kernel KnInterpolateMACGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, source, sourceFactor, off, orderSpace);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, source, sourceFactor, off, orderSpace);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, target, source, sourceFactor, off, orderSpace);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, target, source, sourceFactor, off, orderSpace);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid &target;
const MACGrid &source;
const Vec3 &sourceFactor;
@@ -319,7 +317,7 @@ struct knApplySimpleNoiseVec3 : public KernelBase {
Grid<Vec3> &target,
const WaveletNoiseField &noise,
Real scale,
- const Grid<Real> *weight) const
+ const Grid<Real> *weight)
{
if (!flags.isFluid(i, j, k))
return;
@@ -353,37 +351,35 @@ struct knApplySimpleNoiseVec3 : public KernelBase {
return weight;
}
typedef Grid<Real> type4;
- void runMessage()
- {
- debMsg("Executing kernel knApplySimpleNoiseVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, noise, scale, weight);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, noise, scale, weight);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, noise, scale, weight);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, noise, scale, weight);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Vec3> &target;
const WaveletNoiseField &noise;
@@ -461,7 +457,7 @@ struct knApplySimpleNoiseReal : public KernelBase {
Grid<Real> &target,
const WaveletNoiseField &noise,
Real scale,
- const Grid<Real> *weight) const
+ const Grid<Real> *weight)
{
if (!flags.isFluid(i, j, k))
return;
@@ -495,37 +491,35 @@ struct knApplySimpleNoiseReal : public KernelBase {
return weight;
}
typedef Grid<Real> type4;
- void runMessage()
- {
- debMsg("Executing kernel knApplySimpleNoiseReal ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, noise, scale, weight);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, noise, scale, weight);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, target, noise, scale, weight);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, target, noise, scale, weight);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &target;
const WaveletNoiseField &noise;
@@ -615,7 +609,7 @@ struct knApplyNoiseVec3 : public KernelBase {
const Grid<Real> *weight,
const Grid<Vec3> *uv,
bool uvInterpol,
- const Vec3 &sourceFactor) const
+ const Vec3 &sourceFactor)
{
if (!flags.isFluid(i, j, k))
return;
@@ -694,19 +688,40 @@ struct knApplyNoiseVec3 : public KernelBase {
return sourceFactor;
}
typedef Vec3 type8;
- void runMessage()
- {
- debMsg("Executing kernel knApplyNoiseVec3 ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i,
+ j,
+ k,
+ flags,
+ target,
+ noise,
+ scale,
+ scaleSpatial,
+ weight,
+ uv,
+ uvInterpol,
+ sourceFactor);
+ }
+ }
+ else {
+ const int k = 0;
+#pragma omp parallel
+ {
+
+#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i,
@@ -721,32 +736,9 @@ struct knApplyNoiseVec3 : public KernelBase {
uv,
uvInterpol,
sourceFactor);
- }
- else {
- const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i,
- j,
- k,
- flags,
- target,
- noise,
- scale,
- scaleSpatial,
- weight,
- uv,
- uvInterpol,
- sourceFactor);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
Grid<Vec3> &target;
const WaveletNoiseField &noise;
@@ -834,7 +826,7 @@ struct KnApplyComputeEnergy : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy) const
+ int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy)
{
Real e = 0.f;
if (flags.isFluid(i, j, k)) {
@@ -858,37 +850,35 @@ struct KnApplyComputeEnergy : public KernelBase {
return energy;
}
typedef Grid<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel KnApplyComputeEnergy ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, energy);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, energy);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, flags, vel, energy);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, flags, vel, energy);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const FlagGrid &flags;
const MACGrid &vel;
Grid<Real> &energy;
@@ -1019,7 +1009,7 @@ struct KnComputeStrainRateMag : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod) const
+ int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod)
{
// compute Sij = 1/2 * (dU_i/dx_j + dU_j/dx_i)
Vec3 diag = Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.) - vel(i, j, k);
@@ -1056,37 +1046,35 @@ struct KnComputeStrainRateMag : public KernelBase {
return prod;
}
typedef Grid<Real> type2;
- void runMessage()
- {
- debMsg("Executing kernel KnComputeStrainRateMag ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, velCenter, prod);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, velCenter, prod);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, vel, velCenter, prod);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, vel, velCenter, prod);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const MACGrid &vel;
const Grid<Vec3> &velCenter;
Grid<Real> &prod;
diff --git a/extern/mantaflow/preprocessed/plugin/waves.cpp b/extern/mantaflow/preprocessed/plugin/waves.cpp
index 53c56b8c506..8becb5f6341 100644
--- a/extern/mantaflow/preprocessed/plugin/waves.cpp
+++ b/extern/mantaflow/preprocessed/plugin/waves.cpp
@@ -38,7 +38,7 @@ struct knCalcSecDeriv2d : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret) const
+ inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret)
{
ret(i, j, k) = (-4. * v(i, j, k) + v(i - 1, j, k) + v(i + 1, j, k) + v(i, j - 1, k) +
v(i, j + 1, k));
@@ -53,37 +53,35 @@ struct knCalcSecDeriv2d : public KernelBase {
return ret;
}
typedef Grid<Real> type1;
- void runMessage()
- {
- debMsg("Executing kernel knCalcSecDeriv2d ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, v, ret);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, v, ret);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, v, ret);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, v, ret);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const Grid<Real> &v;
Grid<Real> &ret;
};
@@ -151,44 +149,43 @@ struct knTotalSum : public KernelBase {
return h;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel knTotalSum ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, h, sum);
+
+#pragma omp parallel
+ {
+ double sum = 0;
+#pragma omp for nowait
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, h, sum);
+#pragma omp critical
+ {
+ this->sum += sum;
+ }
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, h, sum);
+#pragma omp parallel
+ {
+ double sum = 0;
+#pragma omp for nowait
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, h, sum);
+#pragma omp critical
+ {
+ this->sum += sum;
+ }
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
- knTotalSum(knTotalSum &o, tbb::split) : KernelBase(o), h(o.h), sum(0)
- {
- }
- void join(const knTotalSum &o)
- {
- sum += o.sum;
- }
Grid<Real> &h;
double sum;
};
@@ -296,7 +293,7 @@ struct MakeRhsWE : public KernelBase {
const Grid<Real> &ut,
const Grid<Real> &utm1,
Real s,
- bool crankNic = false) const
+ bool crankNic = false)
{
rhs(i, j, k) = (2. * ut(i, j, k) - utm1(i, j, k));
if (crankNic) {
@@ -334,37 +331,35 @@ struct MakeRhsWE : public KernelBase {
return crankNic;
}
typedef bool type5;
- void runMessage()
- {
- debMsg("Executing kernel MakeRhsWE ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 1; j < _maxY; j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 1; i < _maxX; i++)
- op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 1; j < _maxY; j++)
+ for (int i = 1; i < _maxX; i++)
+ op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
- }
const FlagGrid &flags;
Grid<Real> &rhs;
const Grid<Real> &ut;
@@ -423,21 +418,17 @@ void cgSolveWE(const FlagGrid &flags,
const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4);
GridCgInterface *gcg;
- vector<Grid<Real> *> matA{&A0, &Ai, &Aj};
-
- if (flags.is3D()) {
- matA.push_back(&Ak);
- gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, matA);
- }
- else {
- gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, matA);
- }
+ if (flags.is3D())
+ gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
+ else
+ gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg->setAccuracy(cgAccuracy);
// no preconditioning for now...
+ Real time = 0;
for (int iter = 0; iter < maxIter; iter++) {
- if (!gcg->iterate())
+ if (!gcg->iterate(time))
iter = maxIter;
}
debMsg("cgSolveWaveEq iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), 1);
diff --git a/extern/mantaflow/preprocessed/registration.cpp b/extern/mantaflow/preprocessed/registration.cpp
index fd32463b95f..dfbd4074d23 100644
--- a/extern/mantaflow/preprocessed/registration.cpp
+++ b/extern/mantaflow/preprocessed/registration.cpp
@@ -89,7 +89,6 @@ extern void PbRegister_processBurn();
extern void PbRegister_updateFlame();
extern void PbRegister_getSpiralVelocity();
extern void PbRegister_setGradientYWeight();
-extern void PbRegister_PD_fluid_guiding();
extern void PbRegister_releaseBlurPrecomp();
extern void PbRegister_KEpsilonComputeProduction();
extern void PbRegister_KEpsilonSources();
@@ -145,7 +144,6 @@ extern void PbRegister_flipComputeSurfaceNormals();
extern void PbRegister_flipUpdateNeighborRatio();
extern void PbRegister_particleSurfaceTurbulence();
extern void PbRegister_debugCheckParts();
-extern void PbRegister_applyViscosity();
extern void PbRegister_markAsFixed();
extern void PbRegister_texcoordInflow();
extern void PbRegister_meshSmokeInflow();
@@ -287,7 +285,6 @@ void MantaEnsureRegistration()
PbRegister_updateFlame();
PbRegister_getSpiralVelocity();
PbRegister_setGradientYWeight();
- PbRegister_PD_fluid_guiding();
PbRegister_releaseBlurPrecomp();
PbRegister_KEpsilonComputeProduction();
PbRegister_KEpsilonSources();
@@ -343,7 +340,6 @@ void MantaEnsureRegistration()
PbRegister_flipUpdateNeighborRatio();
PbRegister_particleSurfaceTurbulence();
PbRegister_debugCheckParts();
- PbRegister_applyViscosity();
PbRegister_markAsFixed();
PbRegister_texcoordInflow();
PbRegister_meshSmokeInflow();
diff --git a/extern/mantaflow/preprocessed/shapes.cpp b/extern/mantaflow/preprocessed/shapes.cpp
index 4095758cbc0..546d39a7ed9 100644
--- a/extern/mantaflow/preprocessed/shapes.cpp
+++ b/extern/mantaflow/preprocessed/shapes.cpp
@@ -52,8 +52,7 @@ template<class T> struct ApplyShapeToGrid : public KernelBase {
runMessage();
run();
}
- inline void op(
- int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags) const
+ inline void op(int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags)
{
if (respectFlags && respectFlags->isObstacle(i, j, k))
return;
@@ -80,37 +79,35 @@ template<class T> struct ApplyShapeToGrid : public KernelBase {
return respectFlags;
}
typedef FlagGrid type3;
- void runMessage()
- {
- debMsg("Executing kernel ApplyShapeToGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, shape, value, respectFlags);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, shape, value, respectFlags);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, shape, value, respectFlags);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, shape, value, respectFlags);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> *grid;
Shape *shape;
T value;
@@ -141,7 +138,7 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase {
Real sigma,
Real shift,
T value,
- FlagGrid *respectFlags) const
+ FlagGrid *respectFlags)
{
if (respectFlags && respectFlags->isObstacle(i, j, k))
return;
@@ -181,37 +178,35 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase {
return respectFlags;
}
typedef FlagGrid type5;
- void runMessage()
- {
- debMsg("Executing kernel ApplyShapeToGridSmooth ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<T> *grid;
Grid<Real> &phi;
Real sigma;
@@ -230,7 +225,7 @@ struct ApplyShapeToMACGrid : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags) const
+ int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags)
{
if (respectFlags && respectFlags->isObstacle(i, j, k))
return;
@@ -261,37 +256,35 @@ struct ApplyShapeToMACGrid : public KernelBase {
return respectFlags;
}
typedef FlagGrid type3;
- void runMessage()
- {
- debMsg("Executing kernel ApplyShapeToMACGrid ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, shape, value, respectFlags);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, shape, value, respectFlags);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, grid, shape, value, respectFlags);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, grid, shape, value, respectFlags);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
MACGrid *grid;
Shape *shape;
Vec3 value;
@@ -429,7 +422,7 @@ struct BoxSDF : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2) const
+ inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2)
{
const Vec3 p(i + 0.5, j + 0.5, k + 0.5);
if (p.x <= p2.x && p.x >= p1.x && p.y <= p2.y && p.y >= p1.y && p.z <= p2.z && p.z >= p1.z) {
@@ -505,37 +498,35 @@ struct BoxSDF : public KernelBase {
return p2;
}
typedef Vec3 type2;
- void runMessage()
- {
- debMsg("Executing kernel BoxSDF ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, p1, p2);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, p1, p2);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, p1, p2);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, p1, p2);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Real> &phi;
const Vec3 &p1;
const Vec3 &p2;
@@ -647,7 +638,7 @@ struct SphereSDF : public KernelBase {
runMessage();
run();
}
- inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale) const
+ inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale)
{
phi(i, j, k) = norm((Vec3(i + 0.5, j + 0.5, k + 0.5) - center) / scale) - radius;
}
@@ -671,37 +662,35 @@ struct SphereSDF : public KernelBase {
return scale;
}
typedef Vec3 type3;
- void runMessage()
- {
- debMsg("Executing kernel SphereSDF ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, center, radius, scale);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, center, radius, scale);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, center, radius, scale);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, center, radius, scale);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Real> &phi;
Vec3 center;
Real radius;
@@ -774,7 +763,7 @@ struct CylinderSDF : public KernelBase {
run();
}
inline void op(
- int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz) const
+ int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz)
{
Vec3 p = Vec3(i + 0.5, j + 0.5, k + 0.5) - center;
Real z = fabs(dot(p, zaxis));
@@ -820,37 +809,35 @@ struct CylinderSDF : public KernelBase {
return maxz;
}
typedef Real type4;
- void runMessage()
- {
- debMsg("Executing kernel CylinderSDF ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, center, radius, zaxis, maxz);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, center, radius, zaxis, maxz);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, phi, center, radius, zaxis, maxz);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, phi, center, radius, zaxis, maxz);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
Grid<Real> &phi;
Vec3 center;
Real radius;
@@ -920,13 +907,8 @@ struct SlopeSDF : public KernelBase {
runMessage();
run();
}
- inline void op(int i,
- int j,
- int k,
- const Vec3 &n,
- Grid<Real> &phiObs,
- const Real &fac,
- const Real &origin) const
+ inline void op(
+ int i, int j, int k, const Vec3 &n, Grid<Real> &phiObs, const Real &fac, const Real &origin)
{
phiObs(i, j, k) = (n.x * (double)i + n.y * (double)j + n.z * (double)k - origin) * fac;
@@ -951,37 +933,35 @@ struct SlopeSDF : public KernelBase {
return origin;
}
typedef Real type3;
- void runMessage()
- {
- debMsg("Executing kernel SlopeSDF ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
+ void runMessage(){};
+ void run()
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
- for (int k = __r.begin(); k != (int)__r.end(); k++)
- for (int j = 0; j < _maxY; j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, n, phiObs, fac, origin);
+
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int k = minZ; k < maxZ; k++)
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, n, phiObs, fac, origin);
+ }
}
else {
const int k = 0;
- for (int j = __r.begin(); j != (int)__r.end(); j++)
- for (int i = 0; i < _maxX; i++)
- op(i, j, k, n, phiObs, fac, origin);
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (int j = 0; j < _maxY; j++)
+ for (int i = 0; i < _maxX; i++)
+ op(i, j, k, n, phiObs, fac, origin);
+ }
}
}
- void run()
- {
- if (maxZ > 1)
- tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
- else
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
- }
const Vec3 &n;
Grid<Real> &phiObs;
const Real &fac;
diff --git a/extern/mantaflow/preprocessed/shapes.h b/extern/mantaflow/preprocessed/shapes.h
index 5a400eaed09..fa645389bfe 100644
--- a/extern/mantaflow/preprocessed/shapes.h
+++ b/extern/mantaflow/preprocessed/shapes.h
@@ -269,7 +269,6 @@ class Shape : public PbClass {
protected:
GridType mType;
-
public:
PbArgs _args;
}
@@ -320,7 +319,6 @@ class NullShape : public Shape {
{
gridSetConst<Real>(phi, 1000.0f);
}
-
public:
PbArgs _args;
}
@@ -396,7 +394,6 @@ class Box : public Shape {
protected:
Vec3 mP0, mP1;
-
public:
PbArgs _args;
}
@@ -458,7 +455,6 @@ class Sphere : public Shape {
protected:
Vec3 mCenter, mScale;
Real mRadius;
-
public:
PbArgs _args;
}
@@ -583,7 +579,6 @@ class Cylinder : public Shape {
protected:
Vec3 mCenter, mZDir;
Real mRadius, mZ;
-
public:
PbArgs _args;
}
@@ -660,7 +655,6 @@ class Slope : public Shape {
Real mAnglexy, mAngleyz;
Real mOrigin;
Vec3 mGs;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/test.cpp b/extern/mantaflow/preprocessed/test.cpp
index b90c886efe7..ae9b533a7ba 100644
--- a/extern/mantaflow/preprocessed/test.cpp
+++ b/extern/mantaflow/preprocessed/test.cpp
@@ -50,28 +50,21 @@ struct reductionTest : public KernelBase {
return v;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel reductionTest ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, v, sum);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- reductionTest(reductionTest &o, tbb::split) : KernelBase(o), v(o.v), sum(0)
- {
- }
- void join(const reductionTest &o)
- {
- sum += o.sum;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ double sum = 0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, v, sum);
+#pragma omp critical
+ {
+ this->sum += sum;
+ }
+ }
}
const Grid<Real> &v;
double sum;
@@ -101,28 +94,21 @@ struct minReduction : public KernelBase {
return v;
}
typedef Grid<Real> type0;
- void runMessage()
- {
- debMsg("Executing kernel minReduction ", 3);
- debMsg("Kernel range"
- << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r)
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, v, sum);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- minReduction(minReduction &o, tbb::split) : KernelBase(o), v(o.v), sum(0)
- {
- }
- void join(const minReduction &o)
- {
- sum = min(sum, o.sum);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+ double sum = 0;
+#pragma omp for nowait
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, v, sum);
+#pragma omp critical
+ {
+ this->sum = min(sum, this->sum);
+ }
+ }
}
const Grid<Real> &v;
double sum;
diff --git a/extern/mantaflow/preprocessed/turbulencepart.cpp b/extern/mantaflow/preprocessed/turbulencepart.cpp
index 168ae9cc2f2..0dddf8f9cc6 100644
--- a/extern/mantaflow/preprocessed/turbulencepart.cpp
+++ b/extern/mantaflow/preprocessed/turbulencepart.cpp
@@ -136,7 +136,7 @@ struct KnSynthesizeTurbulence : public KernelBase {
int octaves,
Real scale,
Real invL0,
- Real kmin) const
+ Real kmin)
{
const Real PERSISTENCE = 0.56123f;
@@ -217,21 +217,17 @@ struct KnSynthesizeTurbulence : public KernelBase {
return kmin;
}
typedef Real type9;
- void runMessage()
- {
- debMsg("Executing kernel KnSynthesizeTurbulence ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin);
- }
+ void runMessage(){};
void run()
{
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin);
+ }
}
TurbulenceParticleSystem &p;
FlagGrid &flags;
diff --git a/extern/mantaflow/preprocessed/turbulencepart.h b/extern/mantaflow/preprocessed/turbulencepart.h
index 5177aeb2d96..81c94d77722 100644
--- a/extern/mantaflow/preprocessed/turbulencepart.h
+++ b/extern/mantaflow/preprocessed/turbulencepart.h
@@ -199,7 +199,6 @@ class TurbulenceParticleSystem : public ParticleSystem<TurbulenceParticleData> {
private:
WaveletNoiseField &noise;
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/vortexpart.cpp b/extern/mantaflow/preprocessed/vortexpart.cpp
index 0eba2743ee8..db22ff85208 100644
--- a/extern/mantaflow/preprocessed/vortexpart.cpp
+++ b/extern/mantaflow/preprocessed/vortexpart.cpp
@@ -60,56 +60,24 @@ inline Vec3 VortexKernel(const Vec3 &p, const vector<VortexParticleData> &vp, Re
return u;
}
-struct _KnVpAdvectMesh : public KernelBase {
- _KnVpAdvectMesh(const KernelBase &base,
- vector<Node> &nodes,
- const vector<VortexParticleData> &vp,
- Real scale,
- vector<Vec3> &u)
- : KernelBase(base), nodes(nodes), vp(vp), scale(scale), u(u)
+struct KnVpAdvectMesh : public KernelBase {
+ KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale)
+ : KernelBase(nodes.size()), nodes(nodes), vp(vp), scale(scale), u((size))
{
+ runMessage();
+ run();
}
inline void op(IndexInt idx,
vector<Node> &nodes,
const vector<VortexParticleData> &vp,
Real scale,
- vector<Vec3> &u) const
+ vector<Vec3> &u)
{
if (nodes[idx].flags & Mesh::NfFixed)
u[idx] = 0.0;
else
u[idx] = VortexKernel(nodes[idx].pos, vp, scale);
}
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, nodes, vp, scale, u);
- }
- void run()
- {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- vector<Node> &nodes;
- const vector<VortexParticleData> &vp;
- Real scale;
- vector<Vec3> &u;
-};
-struct KnVpAdvectMesh : public KernelBase {
- KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale)
- : KernelBase(nodes.size()),
- _inner(KernelBase(nodes.size()), nodes, vp, scale, u),
- nodes(nodes),
- vp(vp),
- scale(scale),
- u((size))
- {
- runMessage();
- run();
- }
- void run()
- {
- _inner.run();
- }
inline operator vector<Vec3>()
{
return u;
@@ -133,62 +101,37 @@ struct KnVpAdvectMesh : public KernelBase {
return scale;
}
typedef Real type2;
- void runMessage()
+ void runMessage(){};
+ void run()
{
- debMsg("Executing kernel KnVpAdvectMesh ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- _KnVpAdvectMesh _inner;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, nodes, vp, scale, u);
+ }
+ }
vector<Node> &nodes;
const vector<VortexParticleData> &vp;
Real scale;
vector<Vec3> u;
};
-struct _KnVpAdvectSelf : public KernelBase {
- _KnVpAdvectSelf(const KernelBase &base,
- vector<VortexParticleData> &vp,
- Real scale,
- vector<Vec3> &u)
- : KernelBase(base), vp(vp), scale(scale), u(u)
- {
- }
- inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u) const
- {
- if (vp[idx].flag & ParticleBase::PDELETE)
- u[idx] = 0.0;
- else
- u[idx] = VortexKernel(vp[idx].pos, vp, scale);
- }
- void operator()(const tbb::blocked_range<IndexInt> &__r) const
- {
- for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
- op(idx, vp, scale, u);
- }
- void run()
- {
- tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
- }
- vector<VortexParticleData> &vp;
- Real scale;
- vector<Vec3> &u;
-};
struct KnVpAdvectSelf : public KernelBase {
KnVpAdvectSelf(vector<VortexParticleData> &vp, Real scale)
- : KernelBase(vp.size()),
- _inner(KernelBase(vp.size()), vp, scale, u),
- vp(vp),
- scale(scale),
- u((size))
+ : KernelBase(vp.size()), vp(vp), scale(scale), u((size))
{
runMessage();
run();
}
- void run()
+ inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u)
{
- _inner.run();
+ if (vp[idx].flag & ParticleBase::PDELETE)
+ u[idx] = 0.0;
+ else
+ u[idx] = VortexKernel(vp[idx].pos, vp, scale);
}
inline operator vector<Vec3>()
{
@@ -208,14 +151,18 @@ struct KnVpAdvectSelf : public KernelBase {
return scale;
}
typedef Real type1;
- void runMessage()
+ void runMessage(){};
+ void run()
{
- debMsg("Executing kernel KnVpAdvectSelf ", 3);
- debMsg("Kernel range"
- << " size " << size << " ",
- 4);
- };
- _KnVpAdvectSelf _inner;
+ const IndexInt _sz = size;
+#pragma omp parallel
+ {
+
+#pragma omp for
+ for (IndexInt i = 0; i < _sz; i++)
+ op(i, vp, scale, u);
+ }
+ }
vector<VortexParticleData> &vp;
Real scale;
vector<Vec3> u;
diff --git a/extern/mantaflow/preprocessed/vortexpart.h b/extern/mantaflow/preprocessed/vortexpart.h
index 8f80cf910eb..e48fbc7f507 100644
--- a/extern/mantaflow/preprocessed/vortexpart.h
+++ b/extern/mantaflow/preprocessed/vortexpart.h
@@ -127,7 +127,6 @@ class VortexParticleSystem : public ParticleSystem<VortexParticleData> {
}
virtual ParticleBase *clone();
-
public:
PbArgs _args;
}
diff --git a/extern/mantaflow/preprocessed/vortexsheet.h b/extern/mantaflow/preprocessed/vortexsheet.h
index 0fc0f3a1258..01c32e4e806 100644
--- a/extern/mantaflow/preprocessed/vortexsheet.h
+++ b/extern/mantaflow/preprocessed/vortexsheet.h
@@ -240,7 +240,6 @@ class VortexSheetMesh : public Mesh {
VorticityChannel mVorticity;
TexCoord3Channel mTex1, mTex2;
TurbulenceChannel mTurb;
-
public:
PbArgs _args;
}
diff --git a/intern/mantaflow/intern/MANTA_main.cpp b/intern/mantaflow/intern/MANTA_main.cpp
index 6c8e45ceeb4..0959e017352 100644
--- a/intern/mantaflow/intern/MANTA_main.cpp
+++ b/intern/mantaflow/intern/MANTA_main.cpp
@@ -59,8 +59,8 @@ int MANTA::with_debug(0);
MANTA::MANTA(int *res, FluidModifierData *fmd)
: mCurrentID(++solverID), mMaxRes(fmd->domain->maxres)
{
- if (with_debug)
- cout << "FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2]
+ //if (with_debug)
+ cout << "============= FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2]
<< ")" << endl;
FluidDomainSettings *fds = fmd->domain;
@@ -279,6 +279,7 @@ MANTA::MANTA(int *res, FluidModifierData *fmd)
}
/* All requested initializations must not fail in constructor. */
BLI_assert(initSuccess);
+ UNUSED_VARS(initSuccess);
updatePointers(fmd);
}
diff --git a/intern/mantaflow/intern/strings/fluid_script.h b/intern/mantaflow/intern/strings/fluid_script.h
index 3bf8e66c110..eb29b95affb 100644
--- a/intern/mantaflow/intern/strings/fluid_script.h
+++ b/intern/mantaflow/intern/strings/fluid_script.h
@@ -273,13 +273,32 @@ def fluid_adapt_time_step_$ID$():\n\
const std::string fluid_alloc =
"\n\
mantaMsg('Fluid alloc data')\n\
-flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$')\n\
-vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True)\n\
+flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$', offload=True)\n\
+mantaMsg('Fluid alloc data vel')\n\
+vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True, offload=True)\n\
velTmp_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITYTMP$', sparse=True)\n\
x_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_X$')\n\
y_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Y$')\n\
z_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Z$')\n\
-pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$')\n\
+mantaMsg('Fluid alloc data pressure')\n\
+pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$', offload=True)\n\
+mantaMsg('Fluid alloc data rhs')\n\
+rhs_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data A0')\n\
+A0_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data Ai')\n\
+Ai_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data Aj')\n\
+Aj_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data Ak')\n\
+Ak_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data search')\n\
+search_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data residual')\n\
+residual_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data tmp')\n\
+tmp_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
+mantaMsg('Fluid alloc data 6')\n\
phiObs_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIOBS$')\n\
phiSIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHISIN$') # helper for static flow objects\n\
phiIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIIN$')\n\
@@ -298,7 +317,8 @@ phiOut_s$ID$.setConst(9999)\n\
\n\
# Keep track of important objects in dict to load them later on\n\
fluid_data_dict_final_s$ID$ = { 'vel' : vel_s$ID$ }\n\
-fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n";
+fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n\
+mantaMsg('Fluid alloc DONE')\n";
const std::string fluid_alloc_obstacle =
"\n\
@@ -477,7 +497,8 @@ mantaMsg('Delete guiding solver')\n\
if 'sg$ID$' in globals(): del sg$ID$\n\
\n\
# Release unreferenced memory (if there is some left)\n\
-gc.collect()\n";
+gc.collect()\n\
+mantaMsg('Done deleting')\n";
//////////////////////////////////////////////////////////////////////
// BAKE
diff --git a/intern/mantaflow/intern/strings/liquid_script.h b/intern/mantaflow/intern/strings/liquid_script.h
index c44727bd47e..1773d4869aa 100644
--- a/intern/mantaflow/intern/strings/liquid_script.h
+++ b/intern/mantaflow/intern/strings/liquid_script.h
@@ -274,9 +274,14 @@ def liquid_step_$ID$():\n\
velTmp_s$ID$.copyFrom(vel_s$ID$)\n\
\n\
mantaMsg('Advecting phi')\n\
+ #phi_s$ID$.updateToOmp()\n\
+ #vel_s$ID$.updateToOmp()\n\
advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=phi_s$ID$, order=1) # first order is usually enough\n\
+ \n\
mantaMsg('Advecting velocity')\n\
advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=vel_s$ID$, order=2)\n\
+ #phi_s$ID$.updateFromOmp()\n\
+ #vel_s$ID$.updateFromOmp()\n\
\n\
# create level set of particles\n\
gridParticleIndex(parts=pp_s$ID$, flags=flags_s$ID$, indexSys=pindex_s$ID$, index=gpi_s$ID$)\n\
@@ -323,21 +328,45 @@ def liquid_step_$ID$():\n\
getLaplacian(laplacian=curvature_s$ID$, grid=phi_s$ID$)\n\
curvature_s$ID$.clamp(-1.0, 1.0)\n\
\n\
+ #vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
+ #vel_s$ID$.updateFromOmp()\n\
+ \n\
if using_viscosity_s$ID$:\n\
viscosity_s$ID$.setConst(viscosityValue_s$ID$)\n\
applyViscosity(flags=flags_s$ID$, phi=phi_s$ID$, vel=vel_s$ID$, volumes=volumes_s$ID$, viscosity=viscosity_s$ID$)\n\
\n\
+ #vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
+ #vel_s$ID$.updateFromOmp()\n\
+ \n\
if using_guiding_s$ID$:\n\
mantaMsg('Guiding and pressure')\n\
PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\
else:\n\
- mantaMsg('Pressure')\n\
- solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\
+ print('Pressure')\n\
+ # openmp sync to device\n\
+ flags_s$ID$.updateToOmp()\n\
+ vel_s$ID$.updateToOmp()\n\
+ print('Pressure 2')\n\
+ \n\
+ #solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\
+ computePressureRhs(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\
+ print('Pressure 21')\n\
+ solvePressureSystem(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, useL2Norm=True, preconditioner=PcNone, residual=residual_s$ID$, search=search_s$ID$, A0=A0_s$ID$, Ai=Ai_s$ID$, Aj=Aj_s$ID$, Ak=Ak_s$ID$, tmp=tmp_s$ID$)\n\
+ print('Pressure 22')\n\
+ correctVelocity(vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\
+ \n\
+ print('Pressure 3')\n\
+ # openmp sync from device\n\
+ pressure_s$ID$.updateFromOmp()\n\
+ vel_s$ID$.updateFromOmp()\n\
\n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$, distance=4, intoObs=True if using_fractions_s$ID$ else False)\n\
+ \n\
+ #vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
+ #vel_s$ID$.updateFromOmp()\n\
\n\
if not using_fractions_s$ID$:\n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$)\n\
diff --git a/source/blender/makesdna/DNA_fluid_defaults.h b/source/blender/makesdna/DNA_fluid_defaults.h
index 4135c4d40a8..ddb1248741e 100644
--- a/source/blender/makesdna/DNA_fluid_defaults.h
+++ b/source/blender/makesdna/DNA_fluid_defaults.h
@@ -76,7 +76,7 @@
.adapt_margin = 4, \
.adapt_res = 0, \
.adapt_threshold = 0.02f, \
- .maxres = 32, \
+ .maxres = 64, \
.solver_res = 3, \
.border_collisions = 0, \
.flags = FLUID_DOMAIN_USE_DISSOLVE_LOG | FLUID_DOMAIN_USE_ADAPTIVE_TIME, \
diff --git a/source/creator/CMakeLists.txt b/source/creator/CMakeLists.txt
index 47fb2642da1..cf05bdca72f 100644
--- a/source/creator/CMakeLists.txt
+++ b/source/creator/CMakeLists.txt
@@ -688,6 +688,13 @@ if(UNIX AND NOT APPLE)
DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages
)
endif()
+ if(WITH_MOD_FLUID)
+ message(STATUS "============== Here")
+ install(
+ PROGRAMS $<TARGET_FILE:extern_mantaflow>
+ DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages
+ )
+ endif()
elseif(WIN32)
set(BLENDER_TEXT_FILES_DESTINATION ".")