diff options
author | Michael Jones <michael_p_jones@apple.com> | 2022-05-11 16:52:49 +0300 |
---|---|---|
committer | Michael Jones <michael_p_jones@apple.com> | 2022-05-11 18:20:59 +0300 |
commit | 007184bcf2121296fa244871382670b0f06210c0 (patch) | |
tree | fccd5d4b542e45f3391d0cf63e3a995a7cbf93db /intern/cycles/device/metal/queue.mm | |
parent | 59cd616534b46ab85b4324a0886bd9eb8876a48b (diff) |
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives
This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case:
```
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index ad268ae7057..6aa1a56056e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
request.pipeline->use_metalrt = device->use_metalrt;
- request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ request.pipeline->metalrt_hair = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR);
+ request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
{
thread_scoped_lock lock(cache_mutex);
@@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
bool use_metalrt = device->use_metalrt;
- bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+ bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
MetalKernelPipeline *best_pipeline = nullptr;
for (auto &pipeline : collection) {
```
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D14923
Diffstat (limited to 'intern/cycles/device/metal/queue.mm')
-rw-r--r-- | intern/cycles/device/metal/queue.mm | 31 |
1 files changed, 19 insertions, 12 deletions
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 1686ab95ffa..ec10e091b25 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -108,9 +108,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size " << work_size; - const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel); - const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso(); - id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel); /* Determine size requirement for argument buffer. */ @@ -212,6 +209,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } bytes_written = globals_offsets + sizeof(KernelParamsMetal); + const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(metal_device, + kernel); + if (!metal_kernel_pso) { + metal_device->set_error( + string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel))); + return false; + } + /* Encode ancillaries */ [metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets]; [metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d @@ -228,14 +233,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } for (int table = 0; table < METALRT_TABLE_NUM; table++) { - if (metal_kernel_pso.intersection_func_table[table]) { - [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer - offset:globals_offsets - atIndex:1]; + if (metal_kernel_pso->intersection_func_table[table]) { + [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer + offset:globals_offsets + atIndex:1]; [metal_device->mtlAncillaryArgEncoder - setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table] + setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table] atIndex:3 + table]; - [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table] + [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table] usage:MTLResourceUsageRead]; } else { @@ -281,10 +286,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, } } - [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline]; + [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline]; /* Compute kernel launch parameters. */ - const int num_threads_per_block = metal_kernel.get_num_threads_per_block(); + const int num_threads_per_block = metal_kernel_pso->num_threads_per_block; int shared_mem_bytes = 0; @@ -314,7 +319,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, threadsPerThreadgroup:size_threads_per_threadgroup]; [mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { - NSString *kernel_name = metal_kernel_pso.function.label; + NSString *kernel_name = metal_kernel_pso->function.label; /* Enhanced command buffer errors are only available in 11.0+ */ if (@available(macos 11.0, *)) { @@ -547,6 +552,8 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial]; + [mtlComputeEncoder setLabel:@(device_kernel_as_string(kernel))]; + /* declare usage of MTLBuffers etc */ prepare_resources(kernel); } |