diff options
Diffstat (limited to 'intern/cycles/kernel/device/metal/compat.h')
-rw-r--r-- | intern/cycles/kernel/device/metal/compat.h | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 4e309f16c08..0ed52074a90 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,10 +29,26 @@ using namespace metal::raytracing; /* Qualifiers */ -#define ccl_device -#define ccl_device_inline ccl_device -#define ccl_device_forceinline ccl_device -#define ccl_device_noinline ccl_device __attribute__((noinline)) +#if defined(__KERNEL_METAL_APPLE__) + +/* Inline everything for Apple GPUs. + * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface + * at the cost of longer compile times (~4.5 minutes on M1 Max). */ + +# define ccl_device __attribute__((always_inline)) +# define ccl_device_inline __attribute__((always_inline)) +# define ccl_device_forceinline __attribute__((always_inline)) +# define ccl_device_noinline __attribute__((always_inline)) + +#else + +# define ccl_device +# define ccl_device_inline ccl_device +# define ccl_device_forceinline ccl_device +# define ccl_device_noinline ccl_device __attribute__((noinline)) + +#endif + #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device #define ccl_global device |