Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayak Biswas <sayakAMD>2021-10-20 14:37:39 +0300
committerWilliam Leeson <william@blender.org>2021-10-20 15:04:28 +0300
commitba4e227def13b4b953775ff4a2fd9c154bb07751 (patch)
tree2ebf929123c0e1cec40fda60c4e8b86f1c38a28c /intern/cycles/device/hip/queue.cpp
parentd28aaf6139c8cfa8555542f4f228f390485dd7ed (diff)
HIP device code cleanup and fix for high VRAM usage
This patch cleans up code for HIP device and makes it more consistent with the CUDA code. It also fixes the issue with high VRAM usage on AMD cards using HIP allowing better performance and usage on cards like 6600XT. Added a check in intern/cycles/kernel/bvh/bvh_util.h to prevent compiler error with hipcc Reviewed By: brecht, leesonw Maniphest Tasks: T92124 Differential Revision: https://developer.blender.org/D12834
Diffstat (limited to 'intern/cycles/device/hip/queue.cpp')
-rw-r--r--intern/cycles/device/hip/queue.cpp82
1 files changed, 55 insertions, 27 deletions
diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp
index 78c77e5fdae..0d9f5916d30 100644
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -39,11 +39,30 @@ HIPDeviceQueue::~HIPDeviceQueue()
hipStreamDestroy(hip_stream_);
}
-int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
+int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const
{
- /* TODO: compute automatically. */
- /* TODO: must have at least num_threads_per_block. */
- return 14416128;
+ int num_states = 0;
+ const int max_num_threads = hip_device_->get_num_multiprocessors() *
+ hip_device_->get_max_num_threads_per_multiprocessor();
+ if (max_num_threads == 0) {
+ num_states = 1048576; // 65536 * 16
+ }
+ else {
+ num_states = max_num_threads * 16;
+ }
+
+ const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
+ if (factor_str) {
+ float factor = atof(factor_str);
+ if (!factor)
+ VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
+ num_states = max((int)(num_states * factor), 1024);
+ }
+
+ VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
+ << string_human_readable_size(num_states * state_size);
+
+ return num_states;
}
int HIPDeviceQueue::num_concurrent_busy_states() const
@@ -105,18 +124,19 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg
}
/* Launch kernel. */
- hip_device_assert(hip_device_,
- hipModuleLaunchKernel(hip_kernel.function,
- num_blocks,
- 1,
- 1,
- num_threads_per_block,
- 1,
- 1,
- shared_mem_bytes,
- hip_stream_,
- args,
- 0));
+ assert_success(hipModuleLaunchKernel(hip_kernel.function,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ shared_mem_bytes,
+ hip_stream_,
+ args,
+ 0),
+ "enqueue");
+
return !(hip_device_->have_error());
}
@@ -127,7 +147,7 @@ bool HIPDeviceQueue::synchronize()
}
const HIPContextScope scope(hip_device_);
- hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
+ assert_success(hipStreamSynchronize(hip_stream_), "synchronize");
debug_synchronize();
return !(hip_device_->have_error());
@@ -150,9 +170,9 @@ void HIPDeviceQueue::zero_to_device(device_memory &mem)
assert(mem.device_pointer != 0);
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
- hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
+ assert_success(
+ hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_),
+ "zero_to_device");
}
void HIPDeviceQueue::copy_to_device(device_memory &mem)
@@ -173,10 +193,10 @@ void HIPDeviceQueue::copy_to_device(device_memory &mem)
/* Copy memory to device. */
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
+ assert_success(
hipMemcpyHtoDAsync(
- (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
+ (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_),
+ "copy_to_device");
}
void HIPDeviceQueue::copy_from_device(device_memory &mem)
@@ -192,13 +212,21 @@ void HIPDeviceQueue::copy_from_device(device_memory &mem)
/* Copy memory from device. */
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
+ assert_success(
hipMemcpyDtoHAsync(
- mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
+ mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_),
+ "copy_from_device");
+}
+
+void HIPDeviceQueue::assert_success(hipError_t result, const char *operation)
+{
+ if (result != hipSuccess) {
+ const char *name = hipewErrorString(result);
+ hip_device_->set_error(
+ string_printf("%s in HIP queue %s (%s)", name, operation, debug_active_kernels().c_str()));
+ }
}
-// TODO : (Arya) Enable this after stabilizing dev branch
unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
{
return make_unique<HIPDeviceGraphicsInterop>(this);