diff options
author | Brian Savery <bsavery> | 2021-09-28 17:51:14 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-09-28 20:18:55 +0300 |
commit | 044a77352f8a8a0e1f60190369d69ef26587b65f (patch) | |
tree | 22096da4d5214cbd7419d1a5e0dadc70e6cacea3 /intern/cycles/util | |
parent | 262b2118565826177133013c324212c66d882456 (diff) |
Cycles: add HIP device support for AMD GPUs
NOTE: this feature is not ready for user testing, and not yet enabled in daily
builds. It is being merged now for easier collaboration on development.
HIP is a heterogenous compute interface allowing C++ code to be executed on
GPUs similar to CUDA. It is intended to bring back AMD GPU rendering support
on Windows and Linux.
https://github.com/ROCm-Developer-Tools/HIP.
As of the time of writing, it should compile and run on Linux with existing
HIP compilers and driver runtimes. Publicly available compilers and drivers
for Windows will come later.
See task T91571 for more details on the current status and work remaining
to be done.
Credits:
Sayak Biswas (AMD)
Arya Rafii (AMD)
Brian Savery (AMD)
Differential Revision: https://developer.blender.org/D12578
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/util_atomic.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.cpp | 15 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.h | 14 | ||||
-rw-r--r-- | intern/cycles/util/util_half.h | 24 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 19 |
5 files changed, 66 insertions, 8 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index de17efafcf2..faba411c769 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -34,7 +34,7 @@ #else /* __KERNEL_GPU__ */ -# ifdef __KERNEL_CUDA__ +# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) # define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x)) diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index 1d598725c84..2245668d02f 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -59,12 +59,23 @@ DebugFlags::CUDA::CUDA() : adaptive_compile(false) reset(); } +DebugFlags::HIP::HIP() : adaptive_compile(false) +{ + reset(); +} + void DebugFlags::CUDA::reset() { if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) adaptive_compile = true; } +void DebugFlags::HIP::reset() +{ + if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL) + adaptive_compile = true; +} + DebugFlags::OptiX::OptiX() { reset(); @@ -103,6 +114,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags) os << "OptiX flags:\n" << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n"; + + os << "HIP flags:\n" + << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n"; + return os; } diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index a2acaea5675..81677201790 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -93,6 +93,17 @@ class DebugFlags { bool adaptive_compile; }; + /* Descriptor of HIP feature-set to be used. */ + struct HIP { + HIP(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Whether adaptive feature based runtime compile is enabled or not.*/ + bool adaptive_compile; + }; + /* Descriptor of OptiX feature-set to be used. */ struct OptiX { OptiX(); @@ -124,6 +135,9 @@ class DebugFlags { /* Requested OptiX flags. */ OptiX optix; + /* Requested HIP flags. */ + HIP hip; + private: DebugFlags(); diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index d9edfec5da3..f36a492a1b0 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN /* Half Floats */ /* CUDA has its own half data type, no need to define then */ -#ifndef __KERNEL_CUDA__ +#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from * unsigned shorts. */ class half { @@ -59,7 +59,7 @@ struct half4 { half x, y, z, w; }; -#ifdef __KERNEL_CUDA__ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) ccl_device_inline void float4_store_half(half *h, float4 f) { @@ -73,6 +73,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f) ccl_device_inline void float4_store_half(half *h, float4 f) { + # ifndef __KERNEL_SSE2__ for (int i = 0; i < 4; i++) { /* optimized float to half for pixels: @@ -109,6 +110,8 @@ ccl_device_inline void float4_store_half(half *h, float4 f) # endif } +# ifndef __KERNEL_HIP__ + ccl_device_inline float half_to_float(half h) { float f; @@ -117,6 +120,23 @@ ccl_device_inline float half_to_float(half h) return f; } +# else + +ccl_device_inline float half_to_float(std::uint32_t a) noexcept +{ + + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + + std::uint32_t v = __float_as_uint(__uint_as_float(u) * + __uint_as_float(0x77800000U) /*0x1.0p+112f*/) + + 0x38000000U; + + u = (a & 0x7fff) != 0 ? v : u; + + return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/; +} + +# endif /* __KERNEL_HIP__ */ ccl_device_inline float4 half4_to_float4(half4 h) { diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 6d728dde679..cb1e94c838c 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -26,6 +26,10 @@ # include <cmath> #endif +#ifdef __HIP__ +# include <hip/hip_vector_types.h> +#endif + #include <float.h> #include <math.h> #include <stdio.h> @@ -83,7 +87,8 @@ CCL_NAMESPACE_BEGIN /* Scalar */ -#ifdef _WIN32 +#ifndef __HIP__ +# ifdef _WIN32 ccl_device_inline float fmaxf(float a, float b) { return (a > b) ? a : b; @@ -93,7 +98,9 @@ ccl_device_inline float fminf(float a, float b) { return (a < b) ? a : b; } -#endif /* _WIN32 */ + +# endif /* _WIN32 */ +#endif /* __HIP__ */ #ifndef __KERNEL_GPU__ using std::isfinite; @@ -199,6 +206,7 @@ ccl_device_inline uint as_uint(float f) return u.i; } +#ifndef __HIP__ ccl_device_inline int __float_as_int(float f) { union { @@ -238,6 +246,7 @@ ccl_device_inline float __uint_as_float(uint i) u.i = i; return u.f; } +#endif ccl_device_inline int4 __float4_as_int4(float4 f) { @@ -669,7 +678,7 @@ ccl_device float bits_to_01(uint bits) ccl_device_inline uint count_leading_zeros(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return __clz(x); #else assert(x != 0); @@ -685,7 +694,7 @@ ccl_device_inline uint count_leading_zeros(uint x) ccl_device_inline uint count_trailing_zeros(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return (__ffs(x) - 1); #else assert(x != 0); @@ -701,7 +710,7 @@ ccl_device_inline uint count_trailing_zeros(uint x) ccl_device_inline uint find_first_set(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return __ffs(x); #else # ifdef _MSC_VER |