diff options
author | Brian Savery <bsavery> | 2021-09-28 17:51:14 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-09-28 20:18:55 +0300 |
commit | 044a77352f8a8a0e1f60190369d69ef26587b65f (patch) | |
tree | 22096da4d5214cbd7419d1a5e0dadc70e6cacea3 /intern/cycles/kernel/device/gpu | |
parent | 262b2118565826177133013c324212c66d882456 (diff) |
Cycles: add HIP device support for AMD GPUs
NOTE: this feature is not ready for user testing, and not yet enabled in daily
builds. It is being merged now for easier collaboration on development.
HIP is a heterogenous compute interface allowing C++ code to be executed on
GPUs similar to CUDA. It is intended to bring back AMD GPU rendering support
on Windows and Linux.
https://github.com/ROCm-Developer-Tools/HIP.
As of the time of writing, it should compile and run on Linux with existing
HIP compilers and driver runtimes. Publicly available compilers and drivers
for Windows will come later.
See task T91571 for more details on the current status and work remaining
to be done.
Credits:
Sayak Biswas (AMD)
Arya Rafii (AMD)
Brian Savery (AMD)
Differential Revision: https://developer.blender.org/D12578
Diffstat (limited to 'intern/cycles/kernel/device/gpu')
4 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index a68d1d80c7d..db4a4bf71e0 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -25,7 +25,11 @@ CCL_NAMESPACE_BEGIN #include "util/util_atomic.h" -#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512 +#ifdef __HIP__ +# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024 +#else +# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512 +#endif template<uint blocksize, typename IsActiveOp> __device__ void gpu_parallel_active_index_array(const uint num_states, diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h index f609520b8b4..a1349e82efb 100644 --- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h +++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h @@ -27,7 +27,11 @@ CCL_NAMESPACE_BEGIN #include "util/util_atomic.h" -#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512 +#ifdef __HIP__ +# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024 +#else +# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512 +#endif template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values) { diff --git a/intern/cycles/kernel/device/gpu/parallel_reduce.h b/intern/cycles/kernel/device/gpu/parallel_reduce.h index 65b1990dbb8..b60dceb2ed0 100644 --- a/intern/cycles/kernel/device/gpu/parallel_reduce.h +++ b/intern/cycles/kernel/device/gpu/parallel_reduce.h @@ -26,7 +26,11 @@ CCL_NAMESPACE_BEGIN * the overall cost of the algorithm while keeping the work complexity O(n) and * the step complexity O(log n). (Brent's Theorem optimization) */ -#define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512 +#ifdef __HIP__ +# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 1024 +#else +# define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512 +#endif template<uint blocksize, typename InputT, typename OutputT, typename ConvertOp> __device__ void gpu_parallel_sum( diff --git a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h index 99b35468517..9bca1fad22f 100644 --- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h @@ -26,7 +26,11 @@ CCL_NAMESPACE_BEGIN #include "util/util_atomic.h" -#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512 +#ifdef __HIP__ +# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024 +#else +# define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512 +#endif #define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0) template<uint blocksize, typename GetKeyOp> |