From 2b88ee50fb7b3ed7e6c0704eee8b39b404219430 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 27 Sep 2022 17:01:17 +0100 Subject: Cycles: Tweak inlining policy on Metal This patch optimises the Metal inlining policy. It gives a small speedup (2-3% on M1 Max) with no notable compilation slowdown vs what is already in master. Previously noted compilation slowdowns (as reported in T100102) were caused by forcing inlining for `ccl_device`, but we get better rendering perf by relying on compiler heuristics in these cases. Reviewed By: brecht Differential Revision: https://developer.blender.org/D16081 --- intern/cycles/kernel/device/metal/compat.h | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 130a9ebafae..f689e93e5a2 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -29,28 +29,13 @@ using namespace metal::raytracing; /* Qualifiers */ -/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill - * reduction for integator_shade_surface. However it comes at the cost of - * longer compile times (~4.5 minutes on M1 Max) and is disabled for that - * reason, until there is a user option to manually enable it. */ - -#if 0 // defined(__KERNEL_METAL_APPLE__) - -# define ccl_device __attribute__((always_inline)) -# define ccl_device_inline __attribute__((always_inline)) -# define ccl_device_forceinline __attribute__((always_inline)) -# define ccl_device_noinline __attribute__((always_inline)) - +#define ccl_device +#define ccl_device_inline ccl_device __attribute__((always_inline)) +#define ccl_device_forceinline ccl_device __attribute__((always_inline)) +#if defined(__KERNEL_METAL_APPLE__) +# define ccl_device_noinline ccl_device #else - -# define ccl_device -# define ccl_device_inline ccl_device -# define ccl_device_forceinline ccl_device -# if defined(__KERNEL_METAL_APPLE__) -# define ccl_device_noinline ccl_device -# else -# define ccl_device_noinline ccl_device __attribute__((noinline)) -# endif +# define ccl_device_noinline ccl_device __attribute__((noinline)) #endif #define ccl_device_noinline_cpu ccl_device -- cgit v1.2.3