Cycles: Kernel address space changes for MSL

This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
author: Michael Jones <michael_p_jones@apple.com> 2021-10-14 15:53:40 +0300
committer: Michael Jones <michael_p_jones@apple.com> 2021-10-14 18:14:43 +0300
commit: a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree: 0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles/kernel/bvh
parent: 47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)
8 files changed, 69 insertions, 66 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0b44cc5db34..8f6dcd0adb9 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -139,7 +139,7 @@ CCL_NAMESPACE_BEGIN
 
 #endif /* __KERNEL_OPTIX__ */
 
-ccl_device_inline bool scene_intersect_valid(const Ray *ray)
+ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
 {
   /* NOTE: Due to some vectorization code  non-finite origin point might
    * cause lots of false-positive intersections which will overflow traversal
@@ -154,10 +154,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
 }
 
-ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
-                                          const Ray *ray,
+ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg,
+                                          ccl_private const Ray *ray,
                                           const uint visibility,
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
 #ifdef __KERNEL_OPTIX__
   uint p0 = 0;
@@ -248,11 +248,11 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
 }
 
 #ifdef __BVH_LOCAL__
-ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
-                                                const Ray *ray,
-                                                LocalIntersection *local_isect,
+ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
                                                 int local_object,
-                                                uint *lcg_state,
+                                                ccl_private uint *lcg_state,
                                                 int max_hits)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -360,12 +360,12 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
 #endif
 
 #ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      uint visibility,
                                                      uint max_hits,
-                                                     uint *num_hits)
+                                                     ccl_private uint *num_hits)
 {
 #  ifdef __KERNEL_OPTIX__
   uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
@@ -445,9 +445,9 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
 #endif /* __SHADOW_RECORD_ALL__ */
 
 #ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
-                                                 const Ray *ray,
-                                                 Intersection *isect,
+ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
                                                  const uint visibility)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -507,9 +507,9 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
 #endif /* __VOLUME__ */
 
 #ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect uint scene_intersect_volume_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint max_hits,
                                                      const uint visibility)
 {
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 90b9f410b29..78ad4a34da9 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -36,11 +36,11 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     LocalIntersection *local_isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private LocalIntersection *local_isect,
                                      int local_object,
-                                     uint *lcg_state,
+                                     ccl_private uint *lcg_state,
                                      int max_hits)
 {
   /* todo:
@@ -196,11 +196,11 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         LocalIntersection *local_isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private LocalIntersection *local_isect,
                                          int local_object,
-                                         uint *lcg_state,
+                                         ccl_private uint *lcg_state,
                                          int max_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 15cd0f22213..49b37f39671 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -16,7 +16,7 @@
 
 // TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
 // 3-vector which might be faster.
-ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlobals *kg,
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const KernelGlobals *kg,
                                                                 int node_addr,
                                                                 int child)
 {
@@ -28,7 +28,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlob
   return space;
 }
 
-ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                       const float3 P,
                                                       const float3 idir,
                                                       const float t,
@@ -76,7 +76,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGlobals *kg,
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const KernelGlobals *kg,
                                                                const float3 P,
                                                                const float3 dir,
                                                                const float t,
@@ -102,7 +102,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGloba
   return tnear <= tfar;
 }
 
-ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                         const float3 P,
                                                         const float3 dir,
                                                         const float3 idir,
@@ -134,7 +134,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
   return mask;
 }
 
-ccl_device_forceinline int bvh_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(ccl_global const KernelGlobals *kg,
                                               const float3 P,
                                               const float3 dir,
                                               const float3 idir,
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 82c7c1a8a6c..c67c820edbc 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -36,12 +36,12 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect_array,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect_array,
                                      const uint visibility,
                                      const uint max_hits,
-                                     uint *num_hits)
+                                     ccl_private uint *num_hits)
 {
   /* todo:
    * - likely and unlikely for if() statements
@@ -71,7 +71,7 @@ ccl_device_inline
   float t_world_to_instance = 1.0f;
 
   *num_hits = 0;
-  Intersection *isect = isect_array;
+  ccl_private Intersection *isect = isect_array;
 
   /* traversal loop */
   do {
@@ -284,12 +284,12 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect_array,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect_array,
                                          const uint visibility,
                                          const uint max_hits,
-                                         uint *num_hits)
+                                         ccl_private uint *num_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 2feff593c10..a46c45d3529 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -31,9 +31,9 @@
  * BVH_MOTION: motion blur rendering
  */
 
-ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint visibility)
 {
   /* todo:
@@ -226,9 +226,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index d143fe4aeab..fb546f568f3 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -88,7 +88,7 @@ ccl_device int intersections_compare(const void *a, const void *b)
 #endif
 
 #if defined(__SHADOW_RECORD_ALL__)
-ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
+ccl_device_inline void sort_intersections(ccl_private Intersection *hits, uint num_hits)
 {
   kernel_assert(num_hits > 0);
 
@@ -115,8 +115,8 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
 
 /* For subsurface scattering, only sorting a small amount of intersections
  * so bubble sort is fine for CPU and GPU. */
-ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
-                                                      float3 *Ng,
+ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
+                                                      ccl_private float3 *Ng,
                                                       uint num_hits)
 {
   bool swapped;
@@ -139,8 +139,9 @@ ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
 
 /* Utility to quickly get flags from an intersection. */
 
-ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   const int prim = isect->prim;
   int shader = 0;
@@ -161,7 +162,7 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc
 }
 
 ccl_device_forceinline int intersection_get_shader_from_isect_prim(
-    const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
+    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
 {
   int shader = 0;
 
@@ -180,14 +181,16 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
   return shader & SHADER_MASK;
 }
 
-ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
-                                                   const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                   ccl_private const Intersection *ccl_restrict
+                                                       isect)
 {
   return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
 }
 
-ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_object_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   return kernel_tex_fetch(__object_flag, isect->object);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 0411d9c522d..d3bfce2d96b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -35,9 +35,9 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect,
                                      const uint visibility)
 {
   /* todo:
@@ -221,9 +221,9 @@ ccl_device_inline
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 4874270f15d..f0fe95924cf 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -35,8 +35,8 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    uint BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
+    uint BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
                                      Intersection *isect_array,
                                      const uint max_hits,
                                      const uint visibility)
@@ -289,8 +289,8 @@ ccl_device_inline
   return num_hits;
 }
 
-ccl_device_inline uint BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
+ccl_device_inline uint BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
                                          Intersection *isect_array,
                                          const uint max_hits,
                                          const uint visibility)
author	Michael Jones <michael_p_jones@apple.com>	2021-10-14 15:53:40 +0300
committer	Michael Jones <michael_p_jones@apple.com>	2021-10-14 18:14:43 +0300
commit	a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree	0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles/kernel/bvh
parent	47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)