diff options
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/util_atomic.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.cpp | 15 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.h | 16 | ||||
-rw-r--r-- | intern/cycles/util/util_half.h | 24 | ||||
-rw-r--r-- | intern/cycles/util/util_math.h | 19 | ||||
-rw-r--r-- | intern/cycles/util/util_math_intersect.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/util_progress.h | 24 |
7 files changed, 80 insertions, 22 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index de17efafcf2..faba411c769 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -34,7 +34,7 @@ #else /* __KERNEL_GPU__ */ -# ifdef __KERNEL_CUDA__ +# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) # define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x)) diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index 1d598725c84..2245668d02f 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -59,12 +59,23 @@ DebugFlags::CUDA::CUDA() : adaptive_compile(false) reset(); } +DebugFlags::HIP::HIP() : adaptive_compile(false) +{ + reset(); +} + void DebugFlags::CUDA::reset() { if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) adaptive_compile = true; } +void DebugFlags::HIP::reset() +{ + if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL) + adaptive_compile = true; +} + DebugFlags::OptiX::OptiX() { reset(); @@ -103,6 +114,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags) os << "OptiX flags:\n" << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n"; + + os << "HIP flags:\n" + << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n"; + return os; } diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index 99e2723180c..81677201790 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -89,7 +89,18 @@ class DebugFlags { void reset(); /* Whether adaptive feature based runtime compile is enabled or not. - * Requires the CUDA Toolkit and only works on Linux atm. */ + * Requires the CUDA Toolkit and only works on Linux at the moment. */ + bool adaptive_compile; + }; + + /* Descriptor of HIP feature-set to be used. */ + struct HIP { + HIP(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Whether adaptive feature based runtime compile is enabled or not.*/ bool adaptive_compile; }; @@ -124,6 +135,9 @@ class DebugFlags { /* Requested OptiX flags. */ OptiX optix; + /* Requested HIP flags. */ + HIP hip; + private: DebugFlags(); diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index d9edfec5da3..f36a492a1b0 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN /* Half Floats */ /* CUDA has its own half data type, no need to define then */ -#ifndef __KERNEL_CUDA__ +#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from * unsigned shorts. */ class half { @@ -59,7 +59,7 @@ struct half4 { half x, y, z, w; }; -#ifdef __KERNEL_CUDA__ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) ccl_device_inline void float4_store_half(half *h, float4 f) { @@ -73,6 +73,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f) ccl_device_inline void float4_store_half(half *h, float4 f) { + # ifndef __KERNEL_SSE2__ for (int i = 0; i < 4; i++) { /* optimized float to half for pixels: @@ -109,6 +110,8 @@ ccl_device_inline void float4_store_half(half *h, float4 f) # endif } +# ifndef __KERNEL_HIP__ + ccl_device_inline float half_to_float(half h) { float f; @@ -117,6 +120,23 @@ ccl_device_inline float half_to_float(half h) return f; } +# else + +ccl_device_inline float half_to_float(std::uint32_t a) noexcept +{ + + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + + std::uint32_t v = __float_as_uint(__uint_as_float(u) * + __uint_as_float(0x77800000U) /*0x1.0p+112f*/) + + 0x38000000U; + + u = (a & 0x7fff) != 0 ? v : u; + + return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/; +} + +# endif /* __KERNEL_HIP__ */ ccl_device_inline float4 half4_to_float4(half4 h) { diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 6d728dde679..cb1e94c838c 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -26,6 +26,10 @@ # include <cmath> #endif +#ifdef __HIP__ +# include <hip/hip_vector_types.h> +#endif + #include <float.h> #include <math.h> #include <stdio.h> @@ -83,7 +87,8 @@ CCL_NAMESPACE_BEGIN /* Scalar */ -#ifdef _WIN32 +#ifndef __HIP__ +# ifdef _WIN32 ccl_device_inline float fmaxf(float a, float b) { return (a > b) ? a : b; @@ -93,7 +98,9 @@ ccl_device_inline float fminf(float a, float b) { return (a < b) ? a : b; } -#endif /* _WIN32 */ + +# endif /* _WIN32 */ +#endif /* __HIP__ */ #ifndef __KERNEL_GPU__ using std::isfinite; @@ -199,6 +206,7 @@ ccl_device_inline uint as_uint(float f) return u.i; } +#ifndef __HIP__ ccl_device_inline int __float_as_int(float f) { union { @@ -238,6 +246,7 @@ ccl_device_inline float __uint_as_float(uint i) u.i = i; return u.f; } +#endif ccl_device_inline int4 __float4_as_int4(float4 f) { @@ -669,7 +678,7 @@ ccl_device float bits_to_01(uint bits) ccl_device_inline uint count_leading_zeros(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return __clz(x); #else assert(x != 0); @@ -685,7 +694,7 @@ ccl_device_inline uint count_leading_zeros(uint x) ccl_device_inline uint count_trailing_zeros(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return (__ffs(x) - 1); #else assert(x != 0); @@ -701,7 +710,7 @@ ccl_device_inline uint count_trailing_zeros(uint x) ccl_device_inline uint find_first_set(uint x) { -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) return __ffs(x); #else # ifdef _MSC_VER diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h index fa3a541eea9..fd0c9124345 100644 --- a/intern/cycles/util/util_math_intersect.h +++ b/intern/cycles/util/util_math_intersect.h @@ -40,7 +40,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P, /* Ray points away from sphere. */ return false; } - const float dsq = tsq - tp * tp; /* pythagoras */ + const float dsq = tsq - tp * tp; /* Pythagoras. */ if (dsq > radiussq) { /* Closest point on ray outside sphere. */ return false; diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index dca8d3d0ab5..176ee11e1e9 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -100,7 +100,7 @@ class Progress { cancel = true; } - bool get_cancel() + bool get_cancel() const { if (!cancel && cancel_cb) cancel_cb(); @@ -108,7 +108,7 @@ class Progress { return cancel; } - string get_cancel_message() + string get_cancel_message() const { thread_scoped_lock lock(progress_mutex); return cancel_message; @@ -130,12 +130,12 @@ class Progress { cancel = true; } - bool get_error() + bool get_error() const { return error; } - string get_error_message() + string get_error_message() const { thread_scoped_lock lock(progress_mutex); return error_message; @@ -168,7 +168,7 @@ class Progress { } } - void get_time(double &total_time_, double &render_time_) + void get_time(double &total_time_, double &render_time_) const { thread_scoped_lock lock(progress_mutex); @@ -200,7 +200,7 @@ class Progress { total_pixel_samples = total_pixel_samples_; } - float get_progress() + float get_progress() const { thread_scoped_lock lock(progress_mutex); @@ -236,7 +236,7 @@ class Progress { } } - int get_current_sample() + int get_current_sample() const { thread_scoped_lock lock(progress_mutex); /* Note that the value here always belongs to the last tile that updated, @@ -244,13 +244,13 @@ class Progress { return current_tile_sample; } - int get_rendered_tiles() + int get_rendered_tiles() const { thread_scoped_lock lock(progress_mutex); return rendered_tiles; } - int get_denoised_tiles() + int get_denoised_tiles() const { thread_scoped_lock lock(progress_mutex); return denoised_tiles; @@ -300,7 +300,7 @@ class Progress { set_update(); } - void get_status(string &status_, string &substatus_) + void get_status(string &status_, string &substatus_) const { thread_scoped_lock lock(progress_mutex); @@ -330,8 +330,8 @@ class Progress { } protected: - thread_mutex progress_mutex; - thread_mutex update_mutex; + mutable thread_mutex progress_mutex; + mutable thread_mutex update_mutex; function<void()> update_cb; function<void()> cancel_cb; |