diff options
Diffstat (limited to 'intern/cycles/device/hip/queue.cpp')
-rw-r--r-- | intern/cycles/device/hip/queue.cpp | 85 |
1 files changed, 57 insertions, 28 deletions
diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp index 78c77e5fdae..81b283e8cf5 100644 --- a/intern/cycles/device/hip/queue.cpp +++ b/intern/cycles/device/hip/queue.cpp @@ -39,11 +39,27 @@ HIPDeviceQueue::~HIPDeviceQueue() hipStreamDestroy(hip_stream_); } -int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const +int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const { - /* TODO: compute automatically. */ - /* TODO: must have at least num_threads_per_block. */ - return 14416128; + const int max_num_threads = hip_device_->get_num_multiprocessors() * + hip_device_->get_max_num_threads_per_multiprocessor(); + int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16; + + const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR"); + if (factor_str) { + const float factor = (float)atof(factor_str); + if (factor != 0.0f) { + num_states = max((int)(num_states * factor), 1024); + } + else { + VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0"; + } + } + + VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to " + << string_human_readable_size(num_states * state_size); + + return num_states; } int HIPDeviceQueue::num_concurrent_busy_states() const @@ -73,7 +89,9 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const return hip_device_->kernels.available(kernel); } -bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[]) +bool HIPDeviceQueue::enqueue(DeviceKernel kernel, + const int work_size, + DeviceKernelArguments const &args) { if (hip_device_->have_error()) { return false; @@ -97,6 +115,8 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY: case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY: + case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY: /* See parall_active_index.h for why this amount of shared memory is needed. */ shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int); break; @@ -105,18 +125,19 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg } /* Launch kernel. */ - hip_device_assert(hip_device_, - hipModuleLaunchKernel(hip_kernel.function, - num_blocks, - 1, - 1, - num_threads_per_block, - 1, - 1, - shared_mem_bytes, - hip_stream_, - args, - 0)); + assert_success(hipModuleLaunchKernel(hip_kernel.function, + num_blocks, + 1, + 1, + num_threads_per_block, + 1, + 1, + shared_mem_bytes, + hip_stream_, + const_cast<void **>(args.values), + 0), + "enqueue"); + return !(hip_device_->have_error()); } @@ -127,7 +148,7 @@ bool HIPDeviceQueue::synchronize() } const HIPContextScope scope(hip_device_); - hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_)); + assert_success(hipStreamSynchronize(hip_stream_), "synchronize"); debug_synchronize(); return !(hip_device_->have_error()); @@ -150,9 +171,9 @@ void HIPDeviceQueue::zero_to_device(device_memory &mem) assert(mem.device_pointer != 0); const HIPContextScope scope(hip_device_); - hip_device_assert( - hip_device_, - hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_)); + assert_success( + hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_), + "zero_to_device"); } void HIPDeviceQueue::copy_to_device(device_memory &mem) @@ -173,10 +194,10 @@ void HIPDeviceQueue::copy_to_device(device_memory &mem) /* Copy memory to device. */ const HIPContextScope scope(hip_device_); - hip_device_assert( - hip_device_, + assert_success( hipMemcpyHtoDAsync( - (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_)); + (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_), + "copy_to_device"); } void HIPDeviceQueue::copy_from_device(device_memory &mem) @@ -192,13 +213,21 @@ void HIPDeviceQueue::copy_from_device(device_memory &mem) /* Copy memory from device. */ const HIPContextScope scope(hip_device_); - hip_device_assert( - hip_device_, + assert_success( hipMemcpyDtoHAsync( - mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_)); + mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_), + "copy_from_device"); +} + +void HIPDeviceQueue::assert_success(hipError_t result, const char *operation) +{ + if (result != hipSuccess) { + const char *name = hipewErrorString(result); + hip_device_->set_error( + string_printf("%s in HIP queue %s (%s)", name, operation, debug_active_kernels().c_str())); + } } -// TODO : (Arya) Enable this after stabilizing dev branch unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create() { return make_unique<HIPDeviceGraphicsInterop>(this); |