diff options
author | Michael Jones <michael_p_jones@apple.com> | 2022-05-11 16:52:49 +0300 |
---|---|---|
committer | Michael Jones <michael_p_jones@apple.com> | 2022-05-11 18:20:59 +0300 |
commit | 007184bcf2121296fa244871382670b0f06210c0 (patch) | |
tree | fccd5d4b542e45f3391d0cf63e3a995a7cbf93db /intern/cycles/kernel | |
parent | 59cd616534b46ab85b4324a0886bd9eb8876a48b (diff) |
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives
This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case:
```
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index ad268ae7057..6aa1a56056e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
request.pipeline->use_metalrt = device->use_metalrt;
- request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ request.pipeline->metalrt_hair = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR);
+ request.pipeline->metalrt_hair_thick = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ request.pipeline->metalrt_pointcloud = device->use_metalrt &&
+ (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
{
thread_scoped_lock lock(cache_mutex);
@@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel)
/* metalrt options */
bool use_metalrt = device->use_metalrt;
- bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR;
- bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
- bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
+ bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
+ bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
+ bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
MetalKernelPipeline *best_pipeline = nullptr;
for (auto &pipeline : collection) {
```
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D14923
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4e309f16c08..0ed52074a90 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,10 +29,26 @@ using namespace metal::raytracing; /* Qualifiers */ -#define ccl_device -#define ccl_device_inline ccl_device -#define ccl_device_forceinline ccl_device -#define ccl_device_noinline ccl_device __attribute__((noinline)) +#if defined(__KERNEL_METAL_APPLE__) + +/* Inline everything for Apple GPUs. + * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface + * at the cost of longer compile times (~4.5 minutes on M1 Max). */ + +# define ccl_device __attribute__((always_inline)) +# define ccl_device_inline __attribute__((always_inline)) +# define ccl_device_forceinline __attribute__((always_inline)) +# define ccl_device_noinline __attribute__((always_inline)) + +#else + +# define ccl_device +# define ccl_device_inline ccl_device +# define ccl_device_forceinline ccl_device +# define ccl_device_noinline ccl_device __attribute__((noinline)) + +#endif + #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device |