Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_atomic.h2
-rw-r--r--intern/cycles/util/util_debug.cpp15
-rw-r--r--intern/cycles/util/util_debug.h16
-rw-r--r--intern/cycles/util/util_half.h24
-rw-r--r--intern/cycles/util/util_math.h19
-rw-r--r--intern/cycles/util/util_math_intersect.h2
-rw-r--r--intern/cycles/util/util_progress.h24
7 files changed, 80 insertions, 22 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index de17efafcf2..faba411c769 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -34,7 +34,7 @@
#else /* __KERNEL_GPU__ */
-# ifdef __KERNEL_CUDA__
+# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 1d598725c84..2245668d02f 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -59,12 +59,23 @@ DebugFlags::CUDA::CUDA() : adaptive_compile(false)
reset();
}
+DebugFlags::HIP::HIP() : adaptive_compile(false)
+{
+ reset();
+}
+
void DebugFlags::CUDA::reset()
{
if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
}
+void DebugFlags::HIP::reset()
+{
+ if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
+ adaptive_compile = true;
+}
+
DebugFlags::OptiX::OptiX()
{
reset();
@@ -103,6 +114,10 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
os << "OptiX flags:\n"
<< " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
+
+ os << "HIP flags:\n"
+ << " HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n";
+
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 99e2723180c..81677201790 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -89,7 +89,18 @@ class DebugFlags {
void reset();
/* Whether adaptive feature based runtime compile is enabled or not.
- * Requires the CUDA Toolkit and only works on Linux atm. */
+ * Requires the CUDA Toolkit and only works on Linux at the moment. */
+ bool adaptive_compile;
+ };
+
+ /* Descriptor of HIP feature-set to be used. */
+ struct HIP {
+ HIP();
+
+ /* Reset flags to their defaults. */
+ void reset();
+
+ /* Whether adaptive feature based runtime compile is enabled or not.*/
bool adaptive_compile;
};
@@ -124,6 +135,9 @@ class DebugFlags {
/* Requested OptiX flags. */
OptiX optix;
+ /* Requested HIP flags. */
+ HIP hip;
+
private:
DebugFlags();
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index d9edfec5da3..f36a492a1b0 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
/* CUDA has its own half data type, no need to define then */
-#ifndef __KERNEL_CUDA__
+#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half {
@@ -59,7 +59,7 @@ struct half4 {
half x, y, z, w;
};
-#ifdef __KERNEL_CUDA__
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
ccl_device_inline void float4_store_half(half *h, float4 f)
{
@@ -73,6 +73,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
ccl_device_inline void float4_store_half(half *h, float4 f)
{
+
# ifndef __KERNEL_SSE2__
for (int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
@@ -109,6 +110,8 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
# endif
}
+# ifndef __KERNEL_HIP__
+
ccl_device_inline float half_to_float(half h)
{
float f;
@@ -117,6 +120,23 @@ ccl_device_inline float half_to_float(half h)
return f;
}
+# else
+
+ccl_device_inline float half_to_float(std::uint32_t a) noexcept
+{
+
+ std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
+
+ std::uint32_t v = __float_as_uint(__uint_as_float(u) *
+ __uint_as_float(0x77800000U) /*0x1.0p+112f*/) +
+ 0x38000000U;
+
+ u = (a & 0x7fff) != 0 ? v : u;
+
+ return __uint_as_float(u) * __uint_as_float(0x07800000U) /*0x1.0p-112f*/;
+}
+
+# endif /* __KERNEL_HIP__ */
ccl_device_inline float4 half4_to_float4(half4 h)
{
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6d728dde679..cb1e94c838c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -26,6 +26,10 @@
# include <cmath>
#endif
+#ifdef __HIP__
+# include <hip/hip_vector_types.h>
+#endif
+
#include <float.h>
#include <math.h>
#include <stdio.h>
@@ -83,7 +87,8 @@ CCL_NAMESPACE_BEGIN
/* Scalar */
-#ifdef _WIN32
+#ifndef __HIP__
+# ifdef _WIN32
ccl_device_inline float fmaxf(float a, float b)
{
return (a > b) ? a : b;
@@ -93,7 +98,9 @@ ccl_device_inline float fminf(float a, float b)
{
return (a < b) ? a : b;
}
-#endif /* _WIN32 */
+
+# endif /* _WIN32 */
+#endif /* __HIP__ */
#ifndef __KERNEL_GPU__
using std::isfinite;
@@ -199,6 +206,7 @@ ccl_device_inline uint as_uint(float f)
return u.i;
}
+#ifndef __HIP__
ccl_device_inline int __float_as_int(float f)
{
union {
@@ -238,6 +246,7 @@ ccl_device_inline float __uint_as_float(uint i)
u.i = i;
return u.f;
}
+#endif
ccl_device_inline int4 __float4_as_int4(float4 f)
{
@@ -669,7 +678,7 @@ ccl_device float bits_to_01(uint bits)
ccl_device_inline uint count_leading_zeros(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return __clz(x);
#else
assert(x != 0);
@@ -685,7 +694,7 @@ ccl_device_inline uint count_leading_zeros(uint x)
ccl_device_inline uint count_trailing_zeros(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return (__ffs(x) - 1);
#else
assert(x != 0);
@@ -701,7 +710,7 @@ ccl_device_inline uint count_trailing_zeros(uint x)
ccl_device_inline uint find_first_set(uint x)
{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
return __ffs(x);
#else
# ifdef _MSC_VER
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index fa3a541eea9..fd0c9124345 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -40,7 +40,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
/* Ray points away from sphere. */
return false;
}
- const float dsq = tsq - tp * tp; /* pythagoras */
+ const float dsq = tsq - tp * tp; /* Pythagoras. */
if (dsq > radiussq) {
/* Closest point on ray outside sphere. */
return false;
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index dca8d3d0ab5..176ee11e1e9 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -100,7 +100,7 @@ class Progress {
cancel = true;
}
- bool get_cancel()
+ bool get_cancel() const
{
if (!cancel && cancel_cb)
cancel_cb();
@@ -108,7 +108,7 @@ class Progress {
return cancel;
}
- string get_cancel_message()
+ string get_cancel_message() const
{
thread_scoped_lock lock(progress_mutex);
return cancel_message;
@@ -130,12 +130,12 @@ class Progress {
cancel = true;
}
- bool get_error()
+ bool get_error() const
{
return error;
}
- string get_error_message()
+ string get_error_message() const
{
thread_scoped_lock lock(progress_mutex);
return error_message;
@@ -168,7 +168,7 @@ class Progress {
}
}
- void get_time(double &total_time_, double &render_time_)
+ void get_time(double &total_time_, double &render_time_) const
{
thread_scoped_lock lock(progress_mutex);
@@ -200,7 +200,7 @@ class Progress {
total_pixel_samples = total_pixel_samples_;
}
- float get_progress()
+ float get_progress() const
{
thread_scoped_lock lock(progress_mutex);
@@ -236,7 +236,7 @@ class Progress {
}
}
- int get_current_sample()
+ int get_current_sample() const
{
thread_scoped_lock lock(progress_mutex);
/* Note that the value here always belongs to the last tile that updated,
@@ -244,13 +244,13 @@ class Progress {
return current_tile_sample;
}
- int get_rendered_tiles()
+ int get_rendered_tiles() const
{
thread_scoped_lock lock(progress_mutex);
return rendered_tiles;
}
- int get_denoised_tiles()
+ int get_denoised_tiles() const
{
thread_scoped_lock lock(progress_mutex);
return denoised_tiles;
@@ -300,7 +300,7 @@ class Progress {
set_update();
}
- void get_status(string &status_, string &substatus_)
+ void get_status(string &status_, string &substatus_) const
{
thread_scoped_lock lock(progress_mutex);
@@ -330,8 +330,8 @@ class Progress {
}
protected:
- thread_mutex progress_mutex;
- thread_mutex update_mutex;
+ mutable thread_mutex progress_mutex;
+ mutable thread_mutex update_mutex;
function<void()> update_cb;
function<void()> cancel_cb;