From bd2e3bb7bd06bcbb2134e4853a72ab28f5f332b9 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 18 Nov 2021 00:41:04 +0100 Subject: Fix T93045: Cycles HIP not rendering OpenVDB volumes Build HIP kernels with NanoVDB, and patch NanoVDB to work with HIP. This is a header only library so no rebuild is needed. The changes are being submitted upstream to openvdb, so this patch should be temporary. Thanks Thomas for help testing this. --- build_files/build_environment/cmake/nanovdb.cmake | 1 + build_files/build_environment/patches/nanovdb.diff | 374 +++++++++++++++++++++ 2 files changed, 375 insertions(+) create mode 100644 build_files/build_environment/patches/nanovdb.diff (limited to 'build_files') diff --git a/build_files/build_environment/cmake/nanovdb.cmake b/build_files/build_environment/cmake/nanovdb.cmake index 0baaf80c254..66bbb9f10d7 100644 --- a/build_files/build_environment/cmake/nanovdb.cmake +++ b/build_files/build_environment/cmake/nanovdb.cmake @@ -42,6 +42,7 @@ ExternalProject_Add(nanovdb URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH} PREFIX ${BUILD_DIR}/nanovdb SOURCE_SUBDIR nanovdb + PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS} INSTALL_DIR ${LIBDIR}/nanovdb ) diff --git a/build_files/build_environment/patches/nanovdb.diff b/build_files/build_environment/patches/nanovdb.diff new file mode 100644 index 00000000000..fd833e61336 --- /dev/null +++ b/build_files/build_environment/patches/nanovdb.diff @@ -0,0 +1,374 @@ +Index: nanovdb/nanovdb/NanoVDB.h +=================================================================== +--- a/nanovdb/nanovdb/NanoVDB.h (revision 62751) ++++ b/nanovdb/nanovdb/NanoVDB.h (working copy) +@@ -152,8 +152,8 @@ + + #endif // __CUDACC_RTC__ + +-#ifdef __CUDACC__ +-// Only define __hostdev__ when using NVIDIA CUDA compiler ++#if defined(__CUDACC__) || defined(__HIP__) ++// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler + #define __hostdev__ __host__ __device__ + #else + #define __hostdev__ +@@ -461,7 +461,7 @@ + /// Maximum floating-point values + template + struct Maximum; +-#ifdef __CUDA_ARCH__ ++#if defined(__CUDA_ARCH__) || defined(__HIP__) + template<> + struct Maximum + { +@@ -1006,10 +1006,10 @@ + using Vec3i = Vec3; + + /// @brief Return a single precision floating-point vector of this coordinate +-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); } ++inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); } + + /// @brief Return a double precision floating-point vector of this coordinate +-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); } ++inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); } + + // ----------------------------> Vec4 <-------------------------------------- + +@@ -1820,7 +1820,7 @@ + }; // Map + + template +-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper) ++__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper) + { + float * mf = mMatF, *vf = mVecF; + float* mif = mInvMatF; +@@ -2170,7 +2170,7 @@ + }; // Class Grid + + template +-int Grid::findBlindDataForSemantic(GridBlindDataSemantic semantic) const ++__hostdev__ int Grid::findBlindDataForSemantic(GridBlindDataSemantic semantic) const + { + for (uint32_t i = 0, n = blindDataCount(); i < n; ++i) + if (blindMetaData(i).mSemantic == semantic) +@@ -2328,7 +2328,7 @@ + }; // Tree class + + template +-void Tree::extrema(ValueType& min, ValueType& max) const ++__hostdev__ void Tree::extrema(ValueType& min, ValueType& max) const + { + min = this->root().valueMin(); + max = this->root().valueMax(); +@@ -2336,7 +2336,7 @@ + + template + template +-const NodeT* Tree::getNode(uint32_t i) const ++__hostdev__ const NodeT* Tree::getNode(uint32_t i) const + { + static_assert(is_same, NodeT>::value, "Tree::getNode: unvalid node type"); + NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]); +@@ -2345,7 +2345,7 @@ + + template + template +-const typename TreeNode, LEVEL>::type* Tree::getNode(uint32_t i) const ++__hostdev__ const typename TreeNode, LEVEL>::type* Tree::getNode(uint32_t i) const + { + NANOVDB_ASSERT(i < DataType::mCount[LEVEL]); + return reinterpret_cast*>(reinterpret_cast(this) + DataType::mBytes[LEVEL]) + i; +@@ -2353,7 +2353,7 @@ + + template + template +-NodeT* Tree::getNode(uint32_t i) ++__hostdev__ NodeT* Tree::getNode(uint32_t i) + { + static_assert(is_same, NodeT>::value, "Tree::getNode: invalid node type"); + NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]); +@@ -2362,7 +2362,7 @@ + + template + template +-typename TreeNode, LEVEL>::type* Tree::getNode(uint32_t i) ++__hostdev__ typename TreeNode, LEVEL>::type* Tree::getNode(uint32_t i) + { + NANOVDB_ASSERT(i < DataType::mCount[LEVEL]); + return reinterpret_cast*>(reinterpret_cast(this) + DataType::mBytes[LEVEL]) + i; +@@ -2370,7 +2370,7 @@ + + template + template +-uint32_t Tree::getNodeID(const NodeT& node) const ++__hostdev__ uint32_t Tree::getNodeID(const NodeT& node) const + { + static_assert(is_same, NodeT>::value, "Tree::getNodeID: invalid node type"); + const NodeT* first = reinterpret_cast(reinterpret_cast(this) + DataType::mBytes[NodeT::LEVEL]); +@@ -2380,7 +2380,7 @@ + + template + template +-uint32_t Tree::getLinearOffset(const NodeT& node) const ++__hostdev__ uint32_t Tree::getLinearOffset(const NodeT& node) const + { + return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL]; + } +@@ -3366,7 +3366,7 @@ + }; // LeafNode class + + template class MaskT, uint32_t LOG2DIM> +-inline void LeafNode::updateBBox() ++inline __hostdev__ void LeafNode::updateBBox() + { + static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!"); + if (!this->isActive()) return; +Index: nanovdb/nanovdb/util/SampleFromVoxels.h +=================================================================== +--- a/nanovdb/nanovdb/util/SampleFromVoxels.h (revision 62751) ++++ b/nanovdb/nanovdb/util/SampleFromVoxels.h (working copy) +@@ -22,7 +22,7 @@ + #define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED + + // Only define __hostdev__ when compiling as NVIDIA CUDA +-#ifdef __CUDACC__ ++#if defined(__CUDACC__) || defined(__HIP__) + #define __hostdev__ __host__ __device__ + #else + #include // for floor +@@ -136,7 +136,7 @@ + + template + template +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const + { + const CoordT ijk = Round(xyz); + if (ijk != mPos) { +@@ -147,7 +147,7 @@ + } + + template +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT& ijk) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT& ijk) const + { + if (ijk != mPos) { + mPos = ijk; +@@ -158,7 +158,7 @@ + + template + template +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const Vec3T& xyz) const + { + return mAcc.getValue(Round(xyz)); + } +@@ -195,7 +195,7 @@ + }; // TrilinearSamplerBase + + template +-void TrilinearSampler::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const ++__hostdev__ void TrilinearSampler::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const + { + v[0][0][0] = mAcc.getValue(ijk); // i, j, k + +@@ -224,7 +224,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType TrilinearSampler::sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]) ++__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler::sample(const Vec3T &uvw, const ValueT (&v)[2][2][2]) + { + #if 0 + auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a +@@ -239,7 +239,7 @@ + + template + template class Vec3T> +-Vec3T TrilinearSampler::gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]) ++__hostdev__ Vec3T TrilinearSampler::gradient(const Vec3T &uvw, const ValueT (&v)[2][2][2]) + { + static_assert(std::is_floating_point::value, "TrilinearSampler::gradient requires a floating-point type"); + #if 0 +@@ -270,7 +270,7 @@ + } + + template +-bool TrilinearSampler::zeroCrossing(const ValueT (&v)[2][2][2]) ++__hostdev__ bool TrilinearSampler::zeroCrossing(const ValueT (&v)[2][2][2]) + { + static_assert(std::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); +@@ -363,7 +363,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + this->cache(xyz); + return BaseT::sample(xyz, mVal); +@@ -370,7 +370,7 @@ + } + + template +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const + { + return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk); + } +@@ -377,7 +377,7 @@ + + template + template class Vec3T> +-Vec3T SampleFromVoxels::gradient(Vec3T xyz) const ++__hostdev__ Vec3T SampleFromVoxels::gradient(Vec3T xyz) const + { + this->cache(xyz); + return BaseT::gradient(xyz, mVal); +@@ -393,7 +393,7 @@ + + template + template class Vec3T> +-void SampleFromVoxels::cache(Vec3T& xyz) const ++__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const + { + CoordT ijk = Floor(xyz); + if (ijk != mPos) { +@@ -406,7 +406,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); +@@ -418,7 +418,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }; + +@@ -463,7 +463,7 @@ + + template + template class Vec3T> +-inline Vec3T SampleFromVoxels::gradient(Vec3T xyz) const ++inline __hostdev__ Vec3T SampleFromVoxels::gradient(Vec3T xyz) const + { + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); +@@ -473,7 +473,7 @@ + + template + template class Vec3T> +-bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const ++__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const + { + ValueT val[2][2][2]; + CoordT ijk = Floor(xyz); +@@ -510,7 +510,7 @@ + }; // TriquadraticSamplerBase + + template +-void TriquadraticSampler::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const ++__hostdev__ void TriquadraticSampler::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const + { + CoordT p(ijk[0] - 1, 0, 0); + for (int dx = 0; dx < 3; ++dx, ++p[0]) { +@@ -526,7 +526,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType TriquadraticSampler::sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]) ++__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler::sample(const Vec3T &uvw, const ValueT (&v)[3][3][3]) + { + auto kernel = [](const ValueT* value, double weight)->ValueT { + return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + +@@ -545,7 +545,7 @@ + } + + template +-bool TriquadraticSampler::zeroCrossing(const ValueT (&v)[3][3][3]) ++__hostdev__ bool TriquadraticSampler::zeroCrossing(const ValueT (&v)[3][3][3]) + { + static_assert(std::is_floating_point::value, "TrilinearSampler::zeroCrossing requires a floating-point type"); + const bool less = v[0][0][0] < ValueT(0); +@@ -624,7 +624,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + this->cache(xyz); + return BaseT::sample(xyz, mVal); +@@ -631,7 +631,7 @@ + } + + template +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(const CoordT &ijk) const + { + return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk); + } +@@ -646,7 +646,7 @@ + + template + template class Vec3T> +-void SampleFromVoxels::cache(Vec3T& xyz) const ++__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const + { + CoordT ijk = Floor(xyz); + if (ijk != mPos) { +@@ -657,7 +657,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); +@@ -667,7 +667,7 @@ + + template + template class Vec3T> +-bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const ++__hostdev__ bool SampleFromVoxels::zeroCrossing(Vec3T xyz) const + { + ValueT val[3][3][3]; + CoordT ijk = Floor(xyz); +@@ -710,7 +710,7 @@ + }; // TricubicSampler + + template +-void TricubicSampler::stencil(const CoordT& ijk, ValueT (&C)[64]) const ++__hostdev__ void TricubicSampler::stencil(const CoordT& ijk, ValueT (&C)[64]) const + { + auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }; + +@@ -929,7 +929,7 @@ + + template + template class Vec3T> +-typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const ++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels::operator()(Vec3T xyz) const + { + this->cache(xyz); + return BaseT::sample(xyz, mC); +@@ -937,7 +937,7 @@ + + template + template class Vec3T> +-void SampleFromVoxels::cache(Vec3T& xyz) const ++__hostdev__ void SampleFromVoxels::cache(Vec3T& xyz) const + { + CoordT ijk = Floor(xyz); + if (ijk != mPos) { -- cgit v1.2.3