7 files changed, 80 insertions, 22 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index de17efafcf2..faba411c769 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -34,7 +34,7 @@
 
 #else /* __KERNEL_GPU__ */
 
-#  ifdef __KERNEL_CUDA__
+#  if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
 
 #    define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
 
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 1d598725c84..2245668d02f 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -59,12 +59,23 @@ DebugFlags::CUDA::CUDA() : adaptive_compile(false)
   reset();
 }
 
+DebugFlags::HIP::HIP() : adaptive_compile(false)
+{
+  reset();
+}
+
 void DebugFlags::CUDA::reset()
 {
   if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
     adaptive_compile = true;
 }
 
+void DebugFlags::HIP::reset()
+{
+  if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
+    adaptive_compile = true;
+}
+
 DebugFlags::OptiX::OptiX()
 {
   reset();
@@ -103,6 +114,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
 
   os << "OptiX flags:\n"
      << "  Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
+
+  os << "HIP flags:\n"
+     << "  HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n";
+
   return os;
 }
 
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 99e2723180c..81677201790 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -89,7 +89,18 @@ class DebugFlags {
     void reset();
 
     /* Whether adaptive feature based runtime compile is enabled or not.
-     * Requires the CUDA Toolkit and only works on Linux atm. */
+     * Requires the CUDA Toolkit and only works on Linux at the moment. */
+    bool adaptive_compile;
+  };
+
+  /* Descriptor of HIP feature-set to be used. */
+  struct HIP {
+    HIP();
+
+    /* Reset flags to their defaults. */
+    void reset();
+
+    /* Whether adaptive feature based runtime compile is enabled or not.*/
     bool adaptive_compile;
   };
 
@@ -124,6 +135,9 @@ class DebugFlags {
   /* Requested OptiX flags. */
   OptiX optix;
 
+  /* Requested HIP flags. */
+  HIP hip;
+
  private:
   DebugFlags();
 
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index d9edfec5da3..f36a492a1b0 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
 /* Half Floats */
 
 /* CUDA has its own half data type, no need to define then */
-#ifndef __KERNEL_CUDA__
+#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
 /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
  * unsigned shorts. */
 class half {
@@ -59,7 +59,7 @@ struct half4 {
   half x, y, z, w;
 };
 
-#ifdef __KERNEL_CUDA__
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
 
 ccl_device_inline void float4_store_half(half *h, float4 f)
 {
@@ -73,6 +73,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
 
 ccl_device_inline void float4_store_half(half *h, float4 f)
 {
+
 #  ifndef __KERNEL_SSE2__
   for (int i = 0; i < 4; i++) {
     /* optimized float to half for pixels:
@@ -109,6 +110,8 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
 #  endif
 }
 
+#  ifndef __KERNEL_HIP__
+
 ccl_device_inline float half_to_float(half h)
 {
   float f;
@@ -117,6 +120,23 @@ ccl_device_inline float half_to_float(half h)
 
   return f;
 }
+#  else
+
+ccl_device_inline float half_to_float(std::uint32_t a) noexcept
+{
+
+  std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
+
+  std::uint32_t v = __float_as_uint(__uint_as_float(u) *
+                                    __uint_as_float(0x77800000U) /*0x1.0p+112f*/) +
+                    0x38000000U;
+
+  u = (a & 0x7fff) != 0 ? v : u;
+
+  return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/;
+}
+
+#  endif /* __KERNEL_HIP__ */
 
 ccl_device_inline float4 half4_to_float4(half4 h)
 {
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6d728dde679..cb1e94c838c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -26,6 +26,10 @@
 #  include <cmath>
 #endif
 
+#ifdef __HIP__
+#  include <hip/hip_vector_types.h>
+#endif
+
 #include <float.h>
 #include <math.h>
 #include <stdio.h>
@@ -83,7 +87,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Scalar */
 
-#ifdef _WIN32
+#ifndef __HIP__
+#  ifdef _WIN32
 ccl_device_inline float fmaxf(float a, float b)
 {
   return (a > b) ? a : b;
@@ -93,7 +98,9 @@ ccl_device_inline float fminf(float a, float b)
 {
   return (a < b) ? a : b;
 }
-#endif /* _WIN32 */
+
+#  endif /* _WIN32 */
+#endif   /* __HIP__ */
 
 #ifndef __KERNEL_GPU__
 using std::isfinite;
@@ -199,6 +206,7 @@ ccl_device_inline uint as_uint(float f)
   return u.i;
 }
 
+#ifndef __HIP__
 ccl_device_inline int __float_as_int(float f)
 {
   union {
@@ -238,6 +246,7 @@ ccl_device_inline float __uint_as_float(uint i)
   u.i = i;
   return u.f;
 }
+#endif
 
 ccl_device_inline int4 __float4_as_int4(float4 f)
 {
@@ -669,7 +678,7 @@ ccl_device float bits_to_01(uint bits)
 
 ccl_device_inline uint count_leading_zeros(uint x)
 {
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
   return __clz(x);
 #else
   assert(x != 0);
@@ -685,7 +694,7 @@ ccl_device_inline uint count_leading_zeros(uint x)
 
 ccl_device_inline uint count_trailing_zeros(uint x)
 {
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
   return (__ffs(x) - 1);
 #else
   assert(x != 0);
@@ -701,7 +710,7 @@ ccl_device_inline uint count_trailing_zeros(uint x)
 
 ccl_device_inline uint find_first_set(uint x)
 {
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
   return __ffs(x);
 #else
 #  ifdef _MSC_VER
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index fa3a541eea9..fd0c9124345 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -40,7 +40,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
       /* Ray  points away from sphere. */
       return false;
     }
-    const float dsq = tsq - tp * tp; /* pythagoras */
+    const float dsq = tsq - tp * tp; /* Pythagoras. */
     if (dsq > radiussq) {
       /* Closest point on ray outside sphere. */
       return false;
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index dca8d3d0ab5..176ee11e1e9 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -100,7 +100,7 @@ class Progress {
     cancel = true;
   }
 
-  bool get_cancel()
+  bool get_cancel() const
   {
     if (!cancel && cancel_cb)
       cancel_cb();
@@ -108,7 +108,7 @@ class Progress {
     return cancel;
   }
 
-  string get_cancel_message()
+  string get_cancel_message() const
   {
     thread_scoped_lock lock(progress_mutex);
     return cancel_message;
@@ -130,12 +130,12 @@ class Progress {
     cancel = true;
   }
 
-  bool get_error()
+  bool get_error() const
   {
     return error;
   }
 
-  string get_error_message()
+  string get_error_message() const
   {
     thread_scoped_lock lock(progress_mutex);
     return error_message;
@@ -168,7 +168,7 @@ class Progress {
     }
   }
 
-  void get_time(double &total_time_, double &render_time_)
+  void get_time(double &total_time_, double &render_time_) const
   {
     thread_scoped_lock lock(progress_mutex);
 
@@ -200,7 +200,7 @@ class Progress {
     total_pixel_samples = total_pixel_samples_;
   }
 
-  float get_progress()
+  float get_progress() const
   {
     thread_scoped_lock lock(progress_mutex);
 
@@ -236,7 +236,7 @@ class Progress {
     }
   }
 
-  int get_current_sample()
+  int get_current_sample() const
   {
     thread_scoped_lock lock(progress_mutex);
     /* Note that the value here always belongs to the last tile that updated,
@@ -244,13 +244,13 @@ class Progress {
     return current_tile_sample;
   }
 
-  int get_rendered_tiles()
+  int get_rendered_tiles() const
   {
     thread_scoped_lock lock(progress_mutex);
     return rendered_tiles;
   }
 
-  int get_denoised_tiles()
+  int get_denoised_tiles() const
   {
     thread_scoped_lock lock(progress_mutex);
     return denoised_tiles;
@@ -300,7 +300,7 @@ class Progress {
     set_update();
   }
 
-  void get_status(string &status_, string &substatus_)
+  void get_status(string &status_, string &substatus_) const
   {
     thread_scoped_lock lock(progress_mutex);
 
@@ -330,8 +330,8 @@ class Progress {
   }
 
  protected:
-  thread_mutex progress_mutex;
-  thread_mutex update_mutex;
+  mutable thread_mutex progress_mutex;
+  mutable thread_mutex update_mutex;
   function<void()> update_cb;
   function<void()> cancel_cb;