Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/metal/queue.mm')
-rw-r--r--intern/cycles/device/metal/queue.mm51
1 files changed, 32 insertions, 19 deletions
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 5ac63a16c61..c0df2c8553f 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -264,33 +264,46 @@ MetalDeviceQueue::~MetalDeviceQueue()
}
}
-int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
+int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
{
- /* METAL_WIP */
- /* TODO: compute automatically. */
- /* TODO: must have at least num_threads_per_block. */
- int result = 1048576;
- if (metal_device_->device_vendor == METAL_GPU_AMD) {
- result *= 2;
+ static int result = 0;
+ if (result) {
+ return result;
}
- else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
+
+ result = 1048576;
+ if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
+
+ if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
+ size_t system_ram = system_physical_ram();
+ size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
+ size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];
+
+ /* Determine whether we can double the state count, and leave enough GPU-available memory
+ * (1/8 the system RAM or 1GB - whichever is largest). Enlarging the state size allows us to
+ * keep dispatch sizes high and minimize work submission overheads. */
+ size_t min_headroom = std::max(system_ram / 8, size_t(1024 * 1024 * 1024));
+ size_t total_state_size = result * state_size;
+ if (max_recommended_working_set - allocated_so_far - total_state_size * 2 >= min_headroom) {
+ result *= 2;
+ metal_printf("Doubling state count to exploit available RAM (new size = %d)\n", result);
+ }
+ }
+ }
+ else if (metal_device_->device_vendor == METAL_GPU_AMD) {
+ /* METAL_WIP */
+ /* TODO: compute automatically. */
+ /* TODO: must have at least num_threads_per_block. */
+ result *= 2;
}
return result;
}
-int MetalDeviceQueue::num_concurrent_busy_states() const
+int MetalDeviceQueue::num_concurrent_busy_states(const size_t state_size) const
{
- /* METAL_WIP */
- /* TODO: compute automatically. */
- int result = 65536;
- if (metal_device_->device_vendor == METAL_GPU_AMD) {
- result *= 2;
- }
- else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
- result *= 4;
- }
- return result;
+ /* A 1:4 busy:total ratio gives best rendering performance, independent of total state count. */
+ return num_concurrent_states(state_size) / 4;
}
int MetalDeviceQueue::num_sort_partition_elements() const