Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_atomic.h2
-rw-r--r--intern/cycles/util/util_debug.cpp15
-rw-r--r--intern/cycles/util/util_debug.h14
-rw-r--r--intern/cycles/util/util_half.h24
-rw-r--r--intern/cycles/util/util_math.h19
5 files changed, 66 insertions, 8 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index de17efafcf2..faba411c769 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -34,7 +34,7 @@
#else /* __KERNEL_GPU__ */
-# ifdef __KERNEL_CUDA__
+# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 1d598725c84..2245668d02f 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -59,12 +59,23 @@ DebugFlags::CUDA::CUDA() : adaptive_compile(false)
reset();
}
+DebugFlags::HIP::HIP() : adaptive_compile(false)
+{
+ reset();
+}
+
void DebugFlags::CUDA::reset()
{
if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
}
+void DebugFlags::HIP::reset()
+{
+ if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
+ adaptive_compile = true;
+}
+
DebugFlags::OptiX::OptiX()
{
reset();
@@ -103,6 +114,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
os << "OptiX flags:\n"
<< " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
+
+ os << "HIP flags:\n"
+ << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n";
+
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index a2acaea5675..81677201790 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -93,6 +93,17 @@ class DebugFlags {
bool adaptive_compile;
};
+ /* Descriptor of HIP feature-set to be used. */
+ struct HIP {
+ HIP();
+
+ /* Reset flags to their defaults. */
+ void reset();
+
+ /* Whether adaptive feature based runtime compile is enabled or not.*/
+ bool adaptive_compile;
+ };
+
/* Descriptor of OptiX feature-set to be used. */
struct OptiX {
OptiX();
@@ -124,6 +135,9 @@ class DebugFlags {
/* Requested OptiX flags. */
OptiX optix;
+ /* Requested HIP flags. */
+ HIP hip;
+
private:
DebugFlags();
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index d9edfec5da3..f36a492a1b0 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
/* CUDA has its own half data type, no need to define then */
-#ifndef __KERNEL_CUDA__
+#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half {
@@ -59,7 +59,7 @@ struct half4 {
half x, y, z, w;
};
-#ifdef __KERNEL_CUDA__
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
ccl_device_inline void float4_store_half(half *h, float4 f)
{
@@ -73,6 +73,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
ccl_device_inline void float4_store_half(half *h, float4 f)
{
+
# ifndef __KERNEL_SSE2__
for (int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
@@ -109,6 +110,8 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
# endif
}
+# ifndef __KERNEL_HIP__
+
ccl_device_inline float half_to_float(half h)
{
float f;
@@ -117,6 +120,23 @@ ccl_device_inline float half_to_float(half h)
return f;
}
+# else
+
+ccl_device_inline float half_to_float(std::uint32_t a) noexcept
+{
+
+ std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
+
+ std::uint32_t v = __float_as_uint(__uint_as_float(u) *
+ __uint_as_float(0x77800000U) /*0x1.0p+112f*/) +
+ 0x38000000U;
+
+ u = (a & 0x7fff) != 0 ? v : u;
+
+ return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/;
+}
+
+# endif /* __KERNEL_HIP__ */
ccl_device_inline float4 half4_to_float4(half4 h)
{
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6d728dde679..cb1e94c838c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -26,6 +26,10 @@
# include <cmath>
#endif
+#ifdef __HIP__
+# include <hip/hip_vector_types.h>
+#endif
+
#include <float.h>
#include <math.h>
#include <stdio.h>
@@ -83,7 +87,8 @@ CCL_NAMESPACE_BEGIN
/* Scalar */
-#ifdef _WIN32
+#ifndef __HIP__
+# ifdef _WIN32
ccl_device_inline float fmaxf(float a, float b)
{
return (a > b) ? a : b;
@@ -93,7 +98,9 @@ ccl_device_inline float fminf(float a, float b)
{
return (a < b) ? a : b;
}
-#endif /* _WIN32 */
+
+# endif /* _WIN32 */
+#endif /* __HIP__ */
#ifndef __KERNEL_GPU__
using std::isfinite;
@@ -199,6 +206,7 @@ ccl_device_inline uint as_uint(float f)
return u.i;
}
+#ifndef __HIP__
ccl_device_inline int __float_as_int(float f)
{
union {
@@ -238,6 +246,7 @@ ccl_device_inline float __uint_as_float(uint i)
u.i = i;
return u.f;
}
+#endif
ccl_device_inline int4 __float4_as_int4(float4 f)
{
@@ -669,7 +678,7 @@ ccl_device float bits_to_01(uint bits)
ccl_device_inline uint count_leading_zeros(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return __clz(x);
#else
assert(x != 0);
@@ -685,7 +694,7 @@ ccl_device_inline uint count_leading_zeros(uint x)
ccl_device_inline uint count_trailing_zeros(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return (__ffs(x) - 1);
#else
assert(x != 0);
@@ -701,7 +710,7 @@ ccl_device_inline uint count_trailing_zeros(uint x)
ccl_device_inline uint find_first_set(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return __ffs(x);
#else
# ifdef _MSC_VER