Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Jones <michael_jones>2022-09-27 19:01:17 +0300
committerMichael Jones <michael_p_jones@apple.com>2022-09-27 19:01:28 +0300
commit2b88ee50fb7b3ed7e6c0704eee8b39b404219430 (patch)
treedbfef57a9d16b469a9857d90bfb8d240efa6fd6a
parentfc604a0be3a9ad1bf7f646dd70d4f106c7df2a75 (diff)
Cycles: Tweak inlining policy on Metal
This patch optimises the Metal inlining policy. It gives a small speedup (2-3% on M1 Max) with no notable compilation slowdown vs what is already in master. Previously noted compilation slowdowns (as reported in T100102) were caused by forcing inlining for `ccl_device`, but we get better rendering perf by relying on compiler heuristics in these cases. Reviewed By: brecht Differential Revision: https://developer.blender.org/D16081
-rw-r--r--intern/cycles/kernel/device/metal/compat.h27
1 files changed, 6 insertions, 21 deletions
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 130a9ebafae..f689e93e5a2 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,28 +29,13 @@ using namespace metal::raytracing;
/* Qualifiers */
-/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill
- * reduction for integator_shade_surface. However it comes at the cost of
- * longer compile times (~4.5 minutes on M1 Max) and is disabled for that
- * reason, until there is a user option to manually enable it. */
-
-#if 0 // defined(__KERNEL_METAL_APPLE__)
-
-# define ccl_device __attribute__((always_inline))
-# define ccl_device_inline __attribute__((always_inline))
-# define ccl_device_forceinline __attribute__((always_inline))
-# define ccl_device_noinline __attribute__((always_inline))
-
+#define ccl_device
+#define ccl_device_inline ccl_device __attribute__((always_inline))
+#define ccl_device_forceinline ccl_device __attribute__((always_inline))
+#if defined(__KERNEL_METAL_APPLE__)
+# define ccl_device_noinline ccl_device
#else
-
-# define ccl_device
-# define ccl_device_inline ccl_device
-# define ccl_device_forceinline ccl_device
-# if defined(__KERNEL_METAL_APPLE__)
-# define ccl_device_noinline ccl_device
-# else
-# define ccl_device_noinline ccl_device __attribute__((noinline))
-# endif
+# define ccl_device_noinline ccl_device __attribute__((noinline))
#endif
#define ccl_device_noinline_cpu ccl_device