From bd2e3bb7bd06bcbb2134e4853a72ab28f5f332b9 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Thu, 18 Nov 2021 00:41:04 +0100
Subject: Fix T93045: Cycles HIP not rendering OpenVDB volumes

Build HIP kernels with NanoVDB, and patch NanoVDB to work with HIP.

This is a header only library so no rebuild is needed. The changes are being
submitted upstream to openvdb, so this patch should be temporary.

Thanks Thomas for help testing this.
---
 build_files/build_environment/cmake/nanovdb.cmake  |   1 +
 build_files/build_environment/patches/nanovdb.diff | 374 +++++++++++++++++++++
 intern/cycles/kernel/CMakeLists.txt                |   6 +
 3 files changed, 381 insertions(+)
 create mode 100644 build_files/build_environment/patches/nanovdb.diff
diff --git a/build_files/build_environment/cmake/nanovdb.cmake b/build_files/build_environment/cmake/nanovdb.cmake
index 0baaf80c254..66bbb9f10d7 100644
--- a/build_files/build_environment/cmake/nanovdb.cmake
+++ b/build_files/build_environment/cmake/nanovdb.cmake
@@ -42,6 +42,7 @@ ExternalProject_Add(nanovdb
   URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
   PREFIX ${BUILD_DIR}/nanovdb
   SOURCE_SUBDIR nanovdb
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff
   CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
   INSTALL_DIR ${LIBDIR}/nanovdb
 )
diff --git a/build_files/build_environment/patches/nanovdb.diff b/build_files/build_environment/patches/nanovdb.diff
new file mode 100644
index 00000000000..fd833e61336
--- /dev/null
+++ b/build_files/build_environment/patches/nanovdb.diff
@@ -0,0 +1,374 @@
+Index: nanovdb/nanovdb/NanoVDB.h
+===================================================================
+--- a/nanovdb/nanovdb/NanoVDB.h	(revision 62751)
++++ b/nanovdb/nanovdb/NanoVDB.h	(working copy)
+@@ -152,8 +152,8 @@
+ 
+ #endif // __CUDACC_RTC__
+ 
+-#ifdef __CUDACC__
+-// Only define __hostdev__ when using NVIDIA CUDA compiler
++#if defined(__CUDACC__) || defined(__HIP__)
++// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler
+ #define __hostdev__ __host__ __device__
+ #else
+ #define __hostdev__
+@@ -461,7 +461,7 @@
+ /// Maximum floating-point values
+ template<typename T>
+ struct Maximum;
+-#ifdef __CUDA_ARCH__
++#if defined(__CUDA_ARCH__) || defined(__HIP__)
+ template<>
+ struct Maximum<int>
+ {
+@@ -1006,10 +1006,10 @@
+ using Vec3i = Vec3<int>;
+ 
+ /// @brief Return a single precision floating-point vector of this coordinate
+-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
++inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
+ 
+ /// @brief Return a double precision floating-point vector of this coordinate
+-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
++inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
+ 
+ // ----------------------------> Vec4 <--------------------------------------
+ 
+@@ -1820,7 +1820,7 @@
+ }; // Map
+ 
+ template<typename Mat4T>
+-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
++__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
+ {
+     float * mf = mMatF, *vf = mVecF;
+     float*  mif = mInvMatF;
+@@ -2170,7 +2170,7 @@
+ }; // Class Grid
+ 
+ template<typename TreeT>
+-int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
++__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
+ {
+     for (uint32_t i = 0, n = blindDataCount(); i < n; ++i)
+         if (blindMetaData(i).mSemantic == semantic)
+@@ -2328,7 +2328,7 @@
+ }; // Tree class
+ 
+ template<typename RootT>
+-void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
++__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
+ {
+     min = this->root().valueMin();
+     max = this->root().valueMax();
+@@ -2336,7 +2336,7 @@
+ 
+ template<typename RootT>
+ template<typename NodeT>
+-const NodeT* Tree<RootT>::getNode(uint32_t i) const
++__hostdev__ const NodeT* Tree<RootT>::getNode(uint32_t i) const
+ {
+     static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: unvalid node type");
+     NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
+@@ -2345,7 +2345,7 @@
+ 
+ template<typename RootT>
+ template<int LEVEL>
+-const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
++__hostdev__ const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
+ {
+     NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
+     return reinterpret_cast<const TreeNodeT<LEVEL>*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
+@@ -2353,7 +2353,7 @@
+ 
+ template<typename RootT>
+ template<typename NodeT>
+-NodeT* Tree<RootT>::getNode(uint32_t i)
++__hostdev__ NodeT* Tree<RootT>::getNode(uint32_t i)
+ {
+     static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: invalid node type");
+     NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
+@@ -2362,7 +2362,7 @@
+ 
+ template<typename RootT>
+ template<int LEVEL>
+-typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
++__hostdev__ typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
+ {
+     NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
+     return reinterpret_cast<TreeNodeT<LEVEL>*>(reinterpret_cast<uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
+@@ -2370,7 +2370,7 @@
+ 
+ template<typename RootT>
+ template<typename NodeT>
+-uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
++__hostdev__ uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
+ {
+     static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNodeID: invalid node type");
+     const NodeT* first = reinterpret_cast<const NodeT*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[NodeT::LEVEL]);
+@@ -2380,7 +2380,7 @@
+ 
+ template<typename RootT>
+ template<typename NodeT>
+-uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
++__hostdev__ uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
+ {
+     return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL];
+ }
+@@ -3366,7 +3366,7 @@
+ }; // LeafNode class
+ 
+ template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
+-inline void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
++inline __hostdev__ void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
+ {
+     static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
+     if (!this->isActive()) return;
+Index: nanovdb/nanovdb/util/SampleFromVoxels.h
+===================================================================
+--- a/nanovdb/nanovdb/util/SampleFromVoxels.h	(revision 62751)
++++ b/nanovdb/nanovdb/util/SampleFromVoxels.h	(working copy)
+@@ -22,7 +22,7 @@
+ #define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+ 
+ // Only define __hostdev__ when compiling as NVIDIA CUDA
+-#ifdef __CUDACC__
++#if defined(__CUDACC__) || defined(__HIP__)
+ #define __hostdev__ __host__ __device__
+ #else
+ #include <cmath> // for floor
+@@ -136,7 +136,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
+ {
+     const CoordT ijk = Round<CoordT>(xyz);
+     if (ijk != mPos) {
+@@ -147,7 +147,7 @@
+ }
+ 
+ template<typename TreeOrAccT>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
+ {
+     if (ijk != mPos) {
+         mPos = ijk;
+@@ -158,7 +158,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
+ {
+     return mAcc.getValue(Round<CoordT>(xyz));
+ }
+@@ -195,7 +195,7 @@
+ }; // TrilinearSamplerBase
+ 
+ template<typename TreeOrAccT>
+-void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
++__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
+ {
+     v[0][0][0] = mAcc.getValue(ijk); // i, j, k
+ 
+@@ -224,7 +224,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
++__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
+ {
+ #if 0
+   auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
+@@ -239,7 +239,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
++__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
+ {
+     static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
+ #if 0
+@@ -270,7 +270,7 @@
+ }
+ 
+ template<typename TreeOrAccT>
+-bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
++__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
+ {
+     static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+     const bool less = v[0][0][0] < ValueT(0);
+@@ -363,7 +363,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
+ {
+     this->cache(xyz);
+     return BaseT::sample(xyz, mVal);
+@@ -370,7 +370,7 @@
+ }
+ 
+ template<typename TreeOrAccT>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
+ {
+     return  ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
+ }
+@@ -377,7 +377,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
++__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
+ {
+     this->cache(xyz);
+     return BaseT::gradient(xyz, mVal);
+@@ -393,7 +393,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
++__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
+ {
+     CoordT ijk = Floor<CoordT>(xyz);
+     if (ijk != mPos) {
+@@ -406,7 +406,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+ {
+     ValueT val[2][2][2];
+     CoordT ijk = Floor<CoordT>(xyz);
+@@ -418,7 +418,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+ {
+     auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
+ 
+@@ -463,7 +463,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
++inline __hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
+ {
+     ValueT val[2][2][2];
+     CoordT ijk = Floor<CoordT>(xyz);
+@@ -473,7 +473,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
++__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
+ {
+     ValueT val[2][2][2];
+     CoordT ijk = Floor<CoordT>(xyz);
+@@ -510,7 +510,7 @@
+ }; // TriquadraticSamplerBase
+ 
+ template<typename TreeOrAccT>
+-void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
++__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
+ {
+     CoordT p(ijk[0] - 1, 0, 0);
+     for (int dx = 0; dx < 3; ++dx, ++p[0]) {
+@@ -526,7 +526,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
++__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
+ {
+     auto kernel = [](const ValueT* value, double weight)->ValueT {
+         return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + 
+@@ -545,7 +545,7 @@
+ }
+ 
+ template<typename TreeOrAccT>
+-bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
++__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
+ {
+     static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+     const bool less = v[0][0][0] < ValueT(0);
+@@ -624,7 +624,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
+ {
+     this->cache(xyz);
+     return BaseT::sample(xyz, mVal);
+@@ -631,7 +631,7 @@
+ }
+ 
+ template<typename TreeOrAccT>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
+ {
+     return  ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
+ }
+@@ -646,7 +646,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
++__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
+ {
+     CoordT ijk = Floor<CoordT>(xyz);
+     if (ijk != mPos) {
+@@ -657,7 +657,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
+ {
+     ValueT val[3][3][3];
+     CoordT ijk = Floor<CoordT>(xyz);
+@@ -667,7 +667,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
++__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
+ {
+     ValueT val[3][3][3];
+     CoordT ijk = Floor<CoordT>(xyz);
+@@ -710,7 +710,7 @@
+ }; // TricubicSampler
+ 
+ template<typename TreeOrAccT>
+-void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
++__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
+ {
+     auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
+ 
+@@ -929,7 +929,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
++__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
+ {
+     this->cache(xyz);
+     return BaseT::sample(xyz, mC);
+@@ -937,7 +937,7 @@
+ 
+ template<typename TreeOrAccT>
+ template<typename RealT, template<typename...> class Vec3T>
+-void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
++__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
+ {
+     CoordT ijk = Floor<CoordT>(xyz);
+     if (ijk != mPos) {
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 29ff69df864..1a254f5eddc 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -565,6 +565,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
       set(name ${name}_experimental)
     endif()
 
+    if(WITH_NANOVDB)
+      set(hip_flags ${hip_flags}
+        -D WITH_NANOVDB
+        -I "${NANOVDB_INCLUDE_DIR}")
+    endif()
+
     if(WITH_CYCLES_DEBUG)
       set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
     endif()
-- 
cgit v1.2.3


From 3ad2bf1327cac5f036d763e1cc690b1b2da8e1c4 Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Thu, 18 Nov 2021 11:25:39 +0100
Subject: Cycles: Fix command line render overshooting time limit

The calculation based on preserving device occupancy was conflicting
with the fact that time limit needs to render less samples at the last
round of render work.

For example, rendering BMW27 for 30sec on i9-11900k was actually
rendering for almost a minute. Now the render time limit is respected
much more close.

Differential Revision: https://developer.blender.org/D13269
---
 intern/cycles/integrator/render_scheduler.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index f776d01ef67..276453f7aec 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -827,6 +827,26 @@ int RenderScheduler::get_num_samples_to_path_trace() const
       num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
     }
 
+    /* When time limit is used clamp the calculated number of samples to keep occupancy.
+     * This is because time limit causes the last render iteration to happen with less number of
+     * samples, which conflicts with the occupancy (lower number of samples causes lower
+     * occupancy, also the calculation is based on number of previously rendered samples).
+     *
+     * When time limit is not used the number of samples per render iteration is either increasing
+     * or stays the same, so there is no need to clamp number of samples calculated for occupancy.
+     */
+    if (time_limit_ && state_.start_render_time) {
+      const double remaining_render_time = max(
+          0.0, time_limit_ - (time_dt() - state_.start_render_time));
+      const double time_per_sample_average = path_trace_time_.get_average();
+      const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
+
+      if (predicted_render_time > remaining_render_time) {
+        num_samples_to_occupy = lround(num_samples_to_occupy *
+                                       (remaining_render_time / predicted_render_time));
+      }
+    }
+
     num_samples_to_render = max(num_samples_to_render,
                                 min(num_samples_to_occupy, max_num_samples_to_render));
   }
-- 
cgit v1.2.3


From f71813204c405821bb2efb8e4ad65d240d390eaf Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Fri, 12 Nov 2021 16:12:05 +0100
Subject: Cycles: Don't tile if image area fits into tile area

Previously the check was done based on dimension of image and if any
of dimensions were larger than tile size tiling was used.

This change makes it so that if image does not exceed number of pixels
in the tile no tile will be used. Allows to render widescreen images
without tiling.

Differential Revision: https://developer.blender.org/D13206
---
 intern/cycles/session/session.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/intern/cycles/session/session.cpp b/intern/cycles/session/session.cpp
index 65c46d0dd3c..530baa8cafb 100644
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@@ -367,14 +367,26 @@ void Session::draw()
 
 int2 Session::get_effective_tile_size() const
 {
+  const int image_width = buffer_params_.width;
+  const int image_height = buffer_params_.height;
+
   /* No support yet for baking with tiles. */
   if (!params.use_auto_tile || scene->bake_manager->get_baking()) {
-    return make_int2(buffer_params_.width, buffer_params_.height);
+    return make_int2(image_width, image_height);
   }
 
-  /* TODO(sergey): Take available memory into account, and if there is enough memory do not tile
-   * and prefer optimal performance. */
+  const int64_t image_area = static_cast<int64_t>(image_width) * image_height;
+
+  /* TODO(sergey): Take available memory into account, and if there is enough memory do not
+   * tile and prefer optimal performance. */
+
   const int tile_size = tile_manager_.compute_render_tile_size(params.tile_size);
+  const int64_t actual_tile_area = static_cast<int64_t>(tile_size) * tile_size;
+
+  if (actual_tile_area >= image_area) {
+    return make_int2(image_width, image_height);
+  }
+
   return make_int2(tile_size, tile_size);
 }
 
-- 
cgit v1.2.3