From 39810b3f5163cb934c87db4e623b2ae41901cef3 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 20 Oct 2021 20:40:32 +0200 Subject: Cleanup: make HIP and CUDA code more consistent Ref D12834 --- intern/cycles/device/cuda/queue.cpp | 14 ++++++++++---- intern/cycles/device/hip/queue.cpp | 17 +++++++---------- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp index 1149a835b14..6b2c9a40082 100644 --- a/intern/cycles/device/cuda/queue.cpp +++ b/intern/cycles/device/cuda/queue.cpp @@ -41,13 +41,19 @@ CUDADeviceQueue::~CUDADeviceQueue() int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const { - int num_states = max(cuda_device_->get_num_multiprocessors() * - cuda_device_->get_max_num_threads_per_multiprocessor() * 16, - 1048576); + const int max_num_threads = cuda_device_->get_num_multiprocessors() * + cuda_device_->get_max_num_threads_per_multiprocessor(); + int num_states = max(max_num_threads, 65536) * 16; const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR"); if (factor_str) { - num_states = max((int)(num_states * atof(factor_str)), 1024); + const float factor = (float)atof(factor_str); + if (factor != 0.0f) { + num_states = max((int)(num_states * factor), 1024); + } + else { + VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0"; + } } VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to " diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp index 6cb29670f94..a612f59fb32 100644 --- a/intern/cycles/device/hip/queue.cpp +++ b/intern/cycles/device/hip/queue.cpp @@ -41,22 +41,19 @@ HIPDeviceQueue::~HIPDeviceQueue() int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const { - int num_states = 0; const int max_num_threads = hip_device_->get_num_multiprocessors() * hip_device_->get_max_num_threads_per_multiprocessor(); - if (max_num_threads == 0) { - num_states = 1048576; // 65536 * 16 - } - else { - num_states = max_num_threads * 16; - } + int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16; const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR"); if (factor_str) { - float factor = (float)atof(factor_str); - if (!factor) + const float factor = (float)atof(factor_str); + if (factor != 0.0f) { + num_states = max((int)(num_states * factor), 1024); + } + else { VLOG(3) << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0"; - num_states = max((int)(num_states * factor), 1024); + } } VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to " -- cgit v1.2.3