diff options
Diffstat (limited to 'intern/cycles/device/metal/util.mm')
-rw-r--r-- | intern/cycles/device/metal/util.mm | 74 |
1 files changed, 65 insertions, 9 deletions
diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm index a6bd593bcb6..65c67c400fe 100644 --- a/intern/cycles/device/metal/util.mm +++ b/intern/cycles/device/metal/util.mm @@ -10,26 +10,83 @@ # include "util/string.h" # include "util/time.h" +# include <IOKit/IOKitLib.h> # include <pwd.h> # include <sys/shm.h> # include <time.h> CCL_NAMESPACE_BEGIN -MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name) +string MetalInfo::get_device_name(id<MTLDevice> device) { - if (device_name.find("Intel") != string::npos) { + string device_name = [device.name UTF8String]; + if (get_device_vendor(device) == METAL_GPU_APPLE) { + /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */ + int gpu_core_count = get_apple_gpu_core_count(device); + device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count); + } + return device_name; +} + +int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device) +{ + int core_count = 0; + if (@available(macos 12.0, *)) { + io_service_t gpu_service = IOServiceGetMatchingService( + kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID)); + if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty( + gpu_service, CFSTR("gpu-core-count"), 0, 0)) { + if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) { + CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count); + } + CFRelease(numberRef); + } + } + return core_count; +} + +AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device) +{ + const char *device_name = [device.name UTF8String]; + if (strstr(device_name, "M1")) { + return APPLE_M1; + } + else if (strstr(device_name, "M2")) { + return APPLE_M2; + } + return APPLE_UNKNOWN; +} + +MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device) +{ + const char *device_name = [device.name UTF8String]; + if (strstr(device_name, "Intel")) { return METAL_GPU_INTEL; } - else if (device_name.find("AMD") != string::npos) { + else if (strstr(device_name, "AMD")) { return METAL_GPU_AMD; } - else if (device_name.find("Apple") != string::npos) { + else if (strstr(device_name, "Apple")) { return METAL_GPU_APPLE; } return METAL_GPU_UNKNOWN; } +int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device) +{ + if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) { + return atoi(str); + } + + /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before + * sorting each partition by material. Partitioning into chunks of 65536 elements results in an + * overall render time speedup of up to 15%. */ + if (get_device_vendor(device) == METAL_GPU_APPLE) { + return 65536; + } + return 0; +} + vector<id<MTLDevice>> const &MetalInfo::get_usable_devices() { static vector<id<MTLDevice>> usable_devices; @@ -41,9 +98,8 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices() metal_printf("Usable Metal devices:\n"); for (id<MTLDevice> device in MTLCopyAllDevices()) { - const char *device_name = [device.name UTF8String]; - - MetalGPUVendor vendor = get_vendor_from_device_name(device_name); + string device_name = get_device_name(device); + MetalGPUVendor vendor = get_device_vendor(device); bool usable = false; if (@available(macos 12.2, *)) { @@ -55,12 +111,12 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices() } if (usable) { - metal_printf("- %s\n", device_name); + metal_printf("- %s\n", device_name.c_str()); [device retain]; usable_devices.push_back(device); } else { - metal_printf(" (skipping \"%s\")\n", device_name); + metal_printf(" (skipping \"%s\")\n", device_name.c_str()); } } if (usable_devices.empty()) { |