Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/metal')
-rw-r--r--intern/cycles/device/metal/device_impl.mm10
-rw-r--r--intern/cycles/device/metal/kernel.mm13
-rw-r--r--intern/cycles/device/metal/queue.h2
-rw-r--r--intern/cycles/device/metal/queue.mm51
-rw-r--r--intern/cycles/device/metal/util.mm6
5 files changed, 62 insertions, 20 deletions
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 6a16d4bb3b4..6f1042b1e55 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -296,9 +296,11 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
}
source = global_defines + source;
+# if 0
metal_printf("================\n%s================\n\%s================\n",
global_defines.c_str(),
baked_constants.c_str());
+# endif
/* Generate an MD5 from the source and include any baked constants. This is used when caching
* PSOs. */
@@ -339,6 +341,14 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
+# if defined(MAC_OS_VERSION_13_0)
+ if (@available(macos 13.0, *)) {
+ if (device_vendor == METAL_GPU_INTEL) {
+ [options setOptimizationLevel:MTLLibraryOptimizationLevelSize];
+ }
+ }
+# endif
+
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 5e0cb6d18f4..55938d1a03a 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -162,6 +162,13 @@ bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
}
}
+ if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
+ if ((device->kernel_features & KERNEL_FEATURE_MNEE) == 0) {
+ /* Skip shade_surface_mnee kernel if the scene doesn't require it. */
+ return false;
+ }
+ }
+
if (pso_type != PSO_GENERIC) {
/* Only specialize kernels where it can make an impact. */
if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
@@ -317,6 +324,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
}
}
+ /* Workaround for Intel GPU having issue using Binary Archives */
+ MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
+ if (gpu_vendor == METAL_GPU_INTEL) {
+ return false;
+ }
+
if (pso_type == PSO_GENERIC) {
/* Archive the generic kernels. */
return true;
diff --git a/intern/cycles/device/metal/queue.h b/intern/cycles/device/metal/queue.h
index fc32740f3e1..2a6c12e2a60 100644
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -23,7 +23,7 @@ class MetalDeviceQueue : public DeviceQueue {
~MetalDeviceQueue();
virtual int num_concurrent_states(const size_t) const override;
- virtual int num_concurrent_busy_states() const override;
+ virtual int num_concurrent_busy_states(const size_t) const override;
virtual int num_sort_partition_elements() const override;
virtual void init_execution() override;
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 5ac63a16c61..c0df2c8553f 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -264,33 +264,46 @@ MetalDeviceQueue::~MetalDeviceQueue()
}
}
-int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
+int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
{
- /* METAL_WIP */
- /* TODO: compute automatically. */
- /* TODO: must have at least num_threads_per_block. */
- int result = 1048576;
- if (metal_device_->device_vendor == METAL_GPU_AMD) {
- result *= 2;
+ static int result = 0;
+ if (result) {
+ return result;
}
- else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
+
+ result = 1048576;
+ if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
+
+ if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
+ size_t system_ram = system_physical_ram();
+ size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
+ size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];
+
+ /* Determine whether we can double the state count, and leave enough GPU-available memory
+ * (1/8 the system RAM or 1GB - whichever is largest). Enlarging the state size allows us to
+ * keep dispatch sizes high and minimize work submission overheads. */
+ size_t min_headroom = std::max(system_ram / 8, size_t(1024 * 1024 * 1024));
+ size_t total_state_size = result * state_size;
+ if (max_recommended_working_set - allocated_so_far - total_state_size * 2 >= min_headroom) {
+ result *= 2;
+ metal_printf("Doubling state count to exploit available RAM (new size = %d)\n", result);
+ }
+ }
+ }
+ else if (metal_device_->device_vendor == METAL_GPU_AMD) {
+ /* METAL_WIP */
+ /* TODO: compute automatically. */
+ /* TODO: must have at least num_threads_per_block. */
+ result *= 2;
}
return result;
}
-int MetalDeviceQueue::num_concurrent_busy_states() const
+int MetalDeviceQueue::num_concurrent_busy_states(const size_t state_size) const
{
- /* METAL_WIP */
- /* TODO: compute automatically. */
- int result = 65536;
- if (metal_device_->device_vendor == METAL_GPU_AMD) {
- result *= 2;
- }
- else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
- result *= 4;
- }
- return result;
+ /* A 1:4 busy:total ratio gives best rendering performance, independent of total state count. */
+ return num_concurrent_states(state_size) / 4;
}
int MetalDeviceQueue::num_sort_partition_elements() const
diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm
index 65c67c400fe..f47638fac15 100644
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -110,6 +110,12 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
usable |= (vendor == METAL_GPU_AMD);
}
+# if defined(MAC_OS_VERSION_13_0)
+ if (@available(macos 13.0, *)) {
+ usable |= (vendor == METAL_GPU_INTEL);
+ }
+# endif
+
if (usable) {
metal_printf("- %s\n", device_name.c_str());
[device retain];