Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/hip/queue.cpp')
-rw-r--r--intern/cycles/device/hip/queue.cpp85
1 files changed, 57 insertions, 28 deletions
diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp
index 78c77e5fdae..81b283e8cf5 100644
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -39,11 +39,27 @@ HIPDeviceQueue::~HIPDeviceQueue()
hipStreamDestroy(hip_stream_);
}
-int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
+int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const
{
- /* TODO: compute automatically. */
- /* TODO: must have at least num_threads_per_block. */
- return 14416128;
+ const int max_num_threads = hip_device_->get_num_multiprocessors() *
+ hip_device_->get_max_num_threads_per_multiprocessor();
+ int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;
+
+ const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
+ if (factor_str) {
+ const float factor = (float)atof(factor_str);
+ if (factor != 0.0f) {
+ num_states = max((int)(num_states * factor), 1024);
+ }
+ else {
+ VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
+ }
+ }
+
+ VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
+ << string_human_readable_size(num_states * state_size);
+
+ return num_states;
}
int HIPDeviceQueue::num_concurrent_busy_states() const
@@ -73,7 +89,9 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
return hip_device_->kernels.available(kernel);
}
-bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
+bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
+ const int work_size,
+ DeviceKernelArguments const &args)
{
if (hip_device_->have_error()) {
return false;
@@ -97,6 +115,8 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
+ case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
/* See parall_active_index.h for why this amount of shared memory is needed. */
shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
break;
@@ -105,18 +125,19 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg
}
/* Launch kernel. */
- hip_device_assert(hip_device_,
- hipModuleLaunchKernel(hip_kernel.function,
- num_blocks,
- 1,
- 1,
- num_threads_per_block,
- 1,
- 1,
- shared_mem_bytes,
- hip_stream_,
- args,
- 0));
+ assert_success(hipModuleLaunchKernel(hip_kernel.function,
+ num_blocks,
+ 1,
+ 1,
+ num_threads_per_block,
+ 1,
+ 1,
+ shared_mem_bytes,
+ hip_stream_,
+ const_cast<void **>(args.values),
+ 0),
+ "enqueue");
+
return !(hip_device_->have_error());
}
@@ -127,7 +148,7 @@ bool HIPDeviceQueue::synchronize()
}
const HIPContextScope scope(hip_device_);
- hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
+ assert_success(hipStreamSynchronize(hip_stream_), "synchronize");
debug_synchronize();
return !(hip_device_->have_error());
@@ -150,9 +171,9 @@ void HIPDeviceQueue::zero_to_device(device_memory &mem)
assert(mem.device_pointer != 0);
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
- hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
+ assert_success(
+ hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_),
+ "zero_to_device");
}
void HIPDeviceQueue::copy_to_device(device_memory &mem)
@@ -173,10 +194,10 @@ void HIPDeviceQueue::copy_to_device(device_memory &mem)
/* Copy memory to device. */
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
+ assert_success(
hipMemcpyHtoDAsync(
- (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
+ (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_),
+ "copy_to_device");
}
void HIPDeviceQueue::copy_from_device(device_memory &mem)
@@ -192,13 +213,21 @@ void HIPDeviceQueue::copy_from_device(device_memory &mem)
/* Copy memory from device. */
const HIPContextScope scope(hip_device_);
- hip_device_assert(
- hip_device_,
+ assert_success(
hipMemcpyDtoHAsync(
- mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
+ mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_),
+ "copy_from_device");
+}
+
+void HIPDeviceQueue::assert_success(hipError_t result, const char *operation)
+{
+ if (result != hipSuccess) {
+ const char *name = hipewErrorString(result);
+ hip_device_->set_error(
+ string_printf("%s in HIP queue %s (%s)", name, operation, debug_active_kernels().c_str()));
+ }
}
-// TODO : (Arya) Enable this after stabilizing dev branch
unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
{
return make_unique<HIPDeviceGraphicsInterop>(this);