Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Savery <bsavery>2021-09-28 17:51:14 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-09-28 20:18:55 +0300
commit044a77352f8a8a0e1f60190369d69ef26587b65f (patch)
tree22096da4d5214cbd7419d1a5e0dadc70e6cacea3 /intern/cycles/kernel/device/gpu
parent262b2118565826177133013c324212c66d882456 (diff)
Cycles: add HIP device support for AMD GPUs
NOTE: this feature is not ready for user testing, and not yet enabled in daily builds. It is being merged now for easier collaboration on development. HIP is a heterogenous compute interface allowing C++ code to be executed on GPUs similar to CUDA. It is intended to bring back AMD GPU rendering support on Windows and Linux. https://github.com/ROCm-Developer-Tools/HIP. As of the time of writing, it should compile and run on Linux with existing HIP compilers and driver runtimes. Publicly available compilers and drivers for Windows will come later. See task T91571 for more details on the current status and work remaining to be done. Credits: Sayak Biswas (AMD) Arya Rafii (AMD) Brian Savery (AMD) Differential Revision: https://developer.blender.org/D12578
Diffstat (limited to 'intern/cycles/kernel/device/gpu')
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_active_index.h6
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_prefix_sum.h6
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_reduce.h6
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_sorted_index.h6
4 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index a68d1d80c7d..db4a4bf71e0 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -25,7 +25,11 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
-#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
+#ifdef __HIP__
+# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024
+#else
+# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
+#endif
template<uint blocksize, typename IsActiveOp>
__device__ void gpu_parallel_active_index_array(const uint num_states,
diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
index f609520b8b4..a1349e82efb 100644
--- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
+++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
@@ -27,7 +27,11 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
-#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
+#ifdef __HIP__
+# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
+#else
+# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
+#endif
template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values)
{
diff --git a/intern/cycles/kernel/device/gpu/parallel_reduce.h b/intern/cycles/kernel/device/gpu/parallel_reduce.h
index 65b1990dbb8..b60dceb2ed0 100644
--- a/intern/cycles/kernel/device/gpu/parallel_reduce.h
+++ b/intern/cycles/kernel/device/gpu/parallel_reduce.h
@@ -26,7 +26,11 @@ CCL_NAMESPACE_BEGIN
* the overall cost of the algorithm while keeping the work complexity O(n) and
* the step complexity O(log n). (Brent's Theorem optimization) */
-#define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
+#ifdef __HIP__
+# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 1024
+#else
+# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
+#endif
template<uint blocksize, typename InputT, typename OutputT, typename ConvertOp>
__device__ void gpu_parallel_sum(
diff --git a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
index 99b35468517..9bca1fad22f 100644
--- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
@@ -26,7 +26,11 @@ CCL_NAMESPACE_BEGIN
#include "util/util_atomic.h"
-#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
+#ifdef __HIP__
+# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
+#else
+# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
+#endif
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
template<uint blocksize, typename GetKeyOp>