Cycles: Kernel address space changes for MSL

This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
author: Michael Jones <michael_p_jones@apple.com> 2021-10-14 15:53:40 +0300
committer: Michael Jones <michael_p_jones@apple.com> 2021-10-14 18:14:43 +0300
commit: a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree: 0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles
parent: 47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)
148 files changed, 2146 insertions, 1648 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0b44cc5db34..8f6dcd0adb9 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -139,7 +139,7 @@ CCL_NAMESPACE_BEGIN
 
 #endif /* __KERNEL_OPTIX__ */
 
-ccl_device_inline bool scene_intersect_valid(const Ray *ray)
+ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
 {
   /* NOTE: Due to some vectorization code  non-finite origin point might
    * cause lots of false-positive intersections which will overflow traversal
@@ -154,10 +154,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
 }
 
-ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
-                                          const Ray *ray,
+ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg,
+                                          ccl_private const Ray *ray,
                                           const uint visibility,
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
 #ifdef __KERNEL_OPTIX__
   uint p0 = 0;
@@ -248,11 +248,11 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
 }
 
 #ifdef __BVH_LOCAL__
-ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
-                                                const Ray *ray,
-                                                LocalIntersection *local_isect,
+ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
                                                 int local_object,
-                                                uint *lcg_state,
+                                                ccl_private uint *lcg_state,
                                                 int max_hits)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -360,12 +360,12 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
 #endif
 
 #ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      uint visibility,
                                                      uint max_hits,
-                                                     uint *num_hits)
+                                                     ccl_private uint *num_hits)
 {
 #  ifdef __KERNEL_OPTIX__
   uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
@@ -445,9 +445,9 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
 #endif /* __SHADOW_RECORD_ALL__ */
 
 #ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
-                                                 const Ray *ray,
-                                                 Intersection *isect,
+ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
                                                  const uint visibility)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -507,9 +507,9 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
 #endif /* __VOLUME__ */
 
 #ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect uint scene_intersect_volume_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint max_hits,
                                                      const uint visibility)
 {
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 90b9f410b29..78ad4a34da9 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -36,11 +36,11 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     LocalIntersection *local_isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private LocalIntersection *local_isect,
                                      int local_object,
-                                     uint *lcg_state,
+                                     ccl_private uint *lcg_state,
                                      int max_hits)
 {
   /* todo:
@@ -196,11 +196,11 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         LocalIntersection *local_isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private LocalIntersection *local_isect,
                                          int local_object,
-                                         uint *lcg_state,
+                                         ccl_private uint *lcg_state,
                                          int max_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 15cd0f22213..49b37f39671 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -16,7 +16,7 @@
 
 // TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
 // 3-vector which might be faster.
-ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlobals *kg,
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const KernelGlobals *kg,
                                                                 int node_addr,
                                                                 int child)
 {
@@ -28,7 +28,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlob
   return space;
 }
 
-ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                       const float3 P,
                                                       const float3 idir,
                                                       const float t,
@@ -76,7 +76,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGlobals *kg,
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const KernelGlobals *kg,
                                                                const float3 P,
                                                                const float3 dir,
                                                                const float t,
@@ -102,7 +102,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGloba
   return tnear <= tfar;
 }
 
-ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                         const float3 P,
                                                         const float3 dir,
                                                         const float3 idir,
@@ -134,7 +134,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
   return mask;
 }
 
-ccl_device_forceinline int bvh_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(ccl_global const KernelGlobals *kg,
                                               const float3 P,
                                               const float3 dir,
                                               const float3 idir,
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 82c7c1a8a6c..c67c820edbc 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -36,12 +36,12 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect_array,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect_array,
                                      const uint visibility,
                                      const uint max_hits,
-                                     uint *num_hits)
+                                     ccl_private uint *num_hits)
 {
   /* todo:
    * - likely and unlikely for if() statements
@@ -71,7 +71,7 @@ ccl_device_inline
   float t_world_to_instance = 1.0f;
 
   *num_hits = 0;
-  Intersection *isect = isect_array;
+  ccl_private Intersection *isect = isect_array;
 
   /* traversal loop */
   do {
@@ -284,12 +284,12 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect_array,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect_array,
                                          const uint visibility,
                                          const uint max_hits,
-                                         uint *num_hits)
+                                         ccl_private uint *num_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 2feff593c10..a46c45d3529 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -31,9 +31,9 @@
  * BVH_MOTION: motion blur rendering
  */
 
-ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint visibility)
 {
   /* todo:
@@ -226,9 +226,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index d143fe4aeab..fb546f568f3 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -88,7 +88,7 @@ ccl_device int intersections_compare(const void *a, const void *b)
 #endif
 
 #if defined(__SHADOW_RECORD_ALL__)
-ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
+ccl_device_inline void sort_intersections(ccl_private Intersection *hits, uint num_hits)
 {
   kernel_assert(num_hits > 0);
 
@@ -115,8 +115,8 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
 
 /* For subsurface scattering, only sorting a small amount of intersections
  * so bubble sort is fine for CPU and GPU. */
-ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
-                                                      float3 *Ng,
+ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
+                                                      ccl_private float3 *Ng,
                                                       uint num_hits)
 {
   bool swapped;
@@ -139,8 +139,9 @@ ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
 
 /* Utility to quickly get flags from an intersection. */
 
-ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   const int prim = isect->prim;
   int shader = 0;
@@ -161,7 +162,7 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc
 }
 
 ccl_device_forceinline int intersection_get_shader_from_isect_prim(
-    const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
+    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
 {
   int shader = 0;
 
@@ -180,14 +181,16 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
   return shader & SHADER_MASK;
 }
 
-ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
-                                                   const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                   ccl_private const Intersection *ccl_restrict
+                                                       isect)
 {
   return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
 }
 
-ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_object_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   return kernel_tex_fetch(__object_flag, isect->object);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 0411d9c522d..d3bfce2d96b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -35,9 +35,9 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect,
                                      const uint visibility)
 {
   /* todo:
@@ -221,9 +221,9 @@ ccl_device_inline
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 4874270f15d..f0fe95924cf 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -35,8 +35,8 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    uint BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
+    uint BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
                                      Intersection *isect_array,
                                      const uint max_hits,
                                      const uint visibility)
@@ -289,8 +289,8 @@ ccl_device_inline
   return num_hits;
 }
 
-ccl_device_inline uint BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
+ccl_device_inline uint BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
                                          Intersection *isect_array,
                                          const uint max_hits,
                                          const uint visibility)
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index 72a8c2ba090..211eedbddbd 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -18,14 +18,17 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
+ccl_device ccl_private ShaderClosure *closure_alloc(ccl_private ShaderData *sd,
+                                                    int size,
+                                                    ClosureType type,
+                                                    float3 weight)
 {
   kernel_assert(size <= sizeof(ShaderClosure));
 
   if (sd->num_closure_left == 0)
     return NULL;
 
-  ShaderClosure *sc = &sd->closure[sd->num_closure];
+  ccl_private ShaderClosure *sc = &sd->closure[sd->num_closure];
 
   sc->type = type;
   sc->weight = weight;
@@ -36,7 +39,7 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
   return sc;
 }
 
-ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
+ccl_device ccl_private void *closure_alloc_extra(ccl_private ShaderData *sd, int size)
 {
   /* Allocate extra space for closure that need more parameters. We allocate
    * in chunks of sizeof(ShaderClosure) starting from the end of the closure
@@ -54,10 +57,12 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
   }
 
   sd->num_closure_left -= num_extra;
-  return (ccl_addr_space void *)(sd->closure + sd->num_closure + sd->num_closure_left);
+  return (ccl_private void *)(sd->closure + sd->num_closure + sd->num_closure_left);
 }
 
-ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
+ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *sd,
+                                                        int size,
+                                                        float3 weight)
 {
   kernel_assert(isfinite3_safe(weight));
 
@@ -66,7 +71,7 @@ ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 wei
   /* Use comparison this way to help dealing with non-finite weight: if the average is not finite
    * we will not allocate new closure. */
   if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
-    ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+    ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
     if (sc == NULL) {
       return NULL;
     }
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index bb80b9636bb..e115bef3170 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -41,32 +41,32 @@ CCL_NAMESPACE_BEGIN
 
 /* Returns the square of the roughness of the closure if it has roughness,
  * 0 for singular closures and 1 otherwise. */
-ccl_device_inline float bsdf_get_specular_roughness_squared(const ShaderClosure *sc)
+ccl_device_inline float bsdf_get_specular_roughness_squared(ccl_private const ShaderClosure *sc)
 {
   if (CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
     return 0.0f;
   }
 
   if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+    ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
     return bsdf->alpha_x * bsdf->alpha_y;
   }
 
   return 1.0f;
 }
 
-ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc)
+ccl_device_inline float bsdf_get_roughness_squared(ccl_private const ShaderClosure *sc)
 {
   /* This version includes diffuse, mainly for baking Principled BSDF
    * where specular and metallic zero otherwise does not bake the
    * specified roughness parameter. */
   if (sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
-    OrenNayarBsdf *bsdf = (OrenNayarBsdf *)sc;
+    ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)sc;
     return sqr(sqr(bsdf->roughness));
   }
 
   if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)sc;
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)sc;
     return sqr(sqr(bsdf->roughness));
   }
 
@@ -111,15 +111,15 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl
   return val;
 }
 
-ccl_device_inline int bsdf_sample(const KernelGlobals *kg,
-                                  ShaderData *sd,
-                                  const ShaderClosure *sc,
+ccl_device_inline int bsdf_sample(ccl_global const KernelGlobals *kg,
+                                  ccl_private ShaderData *sd,
+                                  ccl_private const ShaderClosure *sc,
                                   float randu,
                                   float randv,
-                                  float3 *eval,
-                                  float3 *omega_in,
-                                  differential3 *domega_in,
-                                  float *pdf)
+                                  ccl_private float3 *eval,
+                                  ccl_private float3 *omega_in,
+                                  ccl_private differential3 *domega_in,
+                                  ccl_private float *pdf)
 {
   /* For curves use the smooth normal, particularly for ribbons the geometric
    * normal gives too much darkening otherwise. */
@@ -467,12 +467,12 @@ ccl_device
 ccl_device_inline
 #endif
     float3
-    bsdf_eval(const KernelGlobals *kg,
-              ShaderData *sd,
-              const ShaderClosure *sc,
+    bsdf_eval(ccl_global const KernelGlobals *kg,
+              ccl_private ShaderData *sd,
+              ccl_private const ShaderClosure *sc,
               const float3 omega_in,
               const bool is_transmission,
-              float *pdf)
+              ccl_private float *pdf)
 {
   float3 eval = zero_float3();
 
@@ -652,7 +652,9 @@ ccl_device_inline
   return eval;
 }
 
-ccl_device void bsdf_blur(const KernelGlobals *kg, ShaderClosure *sc, float roughness)
+ccl_device void bsdf_blur(ccl_global const KernelGlobals *kg,
+                          ccl_private ShaderClosure *sc,
+                          float roughness)
 {
   /* TODO: do we want to blur volume closures? */
 #ifdef __SVM__
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index be6383e521a..6cd8739ce39 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -30,7 +30,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_ashikhmin_shirley_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
@@ -39,9 +39,9 @@ ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_ashikhmin_shirley_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -52,12 +52,13 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
   return 2.0f / (roughness * roughness) - 2.0f;
 }
 
-ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc,
-                                                                  const float3 I,
-                                                                  const float3 omega_in,
-                                                                  float *pdf)
+ccl_device_forceinline float3
+bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc,
+                                    const float3 I,
+                                    const float3 omega_in,
+                                    ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
 
   float NdotI = dot(N, I);        /* in Cycles/OSL convention I is omega_out */
@@ -119,16 +120,20 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
   return make_float3(out, out, out);
 }
 
-ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
-    float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
+ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
+                                                                    float n_y,
+                                                                    float randu,
+                                                                    float randv,
+                                                                    ccl_private float *phi,
+                                                                    ccl_private float *cos_theta)
 {
   *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
   float cos_phi = cosf(*phi);
@@ -136,20 +141,20 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
   *cos_theta = powf(randv, 1.0f / (n_x * cos_phi * cos_phi + n_y * sin_phi * sin_phi + 1.0f));
 }
 
-ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc,
+ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc,
                                              float3 Ng,
                                              float3 I,
                                              float3 dIdx,
                                              float3 dIdy,
                                              float randu,
                                              float randv,
-                                             float3 *eval,
-                                             float3 *omega_in,
-                                             float3 *domega_in_dx,
-                                             float3 *domega_in_dy,
-                                             float *pdf)
+                                             ccl_private float3 *eval,
+                                             ccl_private float3 *omega_in,
+                                             ccl_private float3 *domega_in_dx,
+                                             ccl_private float3 *domega_in_dy,
+                                             ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
   int label = LABEL_REFLECT | LABEL_GLOSSY;
 
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index f51027f5701..c00890be54c 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -36,7 +36,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct VelvetBsdf {
+typedef struct VelvetBsdf {
   SHADER_CLOSURE_BASE;
 
   float sigma;
@@ -45,7 +45,7 @@ typedef ccl_addr_space struct VelvetBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(VelvetBsdf), "VelvetBsdf is too large!");
 
-ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
+ccl_device int bsdf_ashikhmin_velvet_setup(ccl_private VelvetBsdf *bsdf)
 {
   float sigma = fmaxf(bsdf->sigma, 0.01f);
   bsdf->invsigma2 = 1.0f / (sigma * sigma);
@@ -55,12 +55,12 @@ ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
-  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
   float m_invsigma2 = bsdf->invsigma2;
   float3 N = bsdf->N;
 
@@ -97,28 +97,28 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc,
+ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
                                             float3 Ng,
                                             float3 I,
                                             float3 dIdx,
                                             float3 dIdy,
                                             float randu,
                                             float randv,
-                                            float3 *eval,
-                                            float3 *omega_in,
-                                            float3 *domega_in_dx,
-                                            float3 *domega_in_dy,
-                                            float *pdf)
+                                            ccl_private float3 *eval,
+                                            ccl_private float3 *omega_in,
+                                            ccl_private float3 *domega_in_dx,
+                                            ccl_private float3 *domega_in_dy,
+                                            ccl_private float *pdf)
 {
-  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
   float m_invsigma2 = bsdf->invsigma2;
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 1555aa30304..16c9b428004 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct DiffuseBsdf {
+typedef struct DiffuseBsdf {
   SHADER_CLOSURE_BASE;
 } DiffuseBsdf;
 
@@ -42,18 +42,18 @@ static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseBsdf), "DiffuseBsdf is too
 
 /* DIFFUSE */
 
-ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
+ccl_device int bsdf_diffuse_setup(ccl_private DiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
                                             const float3 I,
                                             const float3 omega_in,
-                                            float *pdf)
+                                            ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
@@ -61,28 +61,28 @@ ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
   return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
                                              const float3 I,
                                              const float3 omega_in,
-                                             float *pdf)
+                                             ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
                                    float3 Ng,
                                    float3 I,
                                    float3 dIdx,
                                    float3 dIdy,
                                    float randu,
                                    float randv,
-                                   float3 *eval,
-                                   float3 *omega_in,
-                                   float3 *domega_in_dx,
-                                   float3 *domega_in_dy,
-                                   float *pdf)
+                                   ccl_private float3 *eval,
+                                   ccl_private float3 *omega_in,
+                                   ccl_private float3 *domega_in_dx,
+                                   ccl_private float3 *domega_in_dy,
+                                   ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   // distribution over the hemisphere
@@ -104,26 +104,26 @@ ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
 
 /* TRANSLUCENT */
 
-ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf)
+ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_translucent_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_translucent_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
@@ -131,20 +131,20 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
   return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device int bsdf_translucent_sample(const ShaderClosure *sc,
+ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   // we are viewing the surface from the right side - send a ray out with cosine
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index b06dd196b9e..8bff7709a32 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -36,10 +36,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __OSL__
 
-typedef ccl_addr_space struct DiffuseRampBsdf {
+typedef struct DiffuseRampBsdf {
   SHADER_CLOSURE_BASE;
 
-  float3 *colors;
+  ccl_private float3 *colors;
 } DiffuseRampBsdf;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseRampBsdf), "DiffuseRampBsdf is too large!");
@@ -64,14 +64,14 @@ ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_diffuse_ramp_blur(ccl_private ShaderClosure *sc, float roughness)
 {
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
   float3 N = bsdf->N;
@@ -81,26 +81,26 @@ ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
   return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
                                                   const float3 I,
                                                   const float3 omega_in,
-                                                  float *pdf)
+                                                  ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
                                         float3 Ng,
                                         float3 I,
                                         float3 dIdx,
                                         float3 dIdy,
                                         float randu,
                                         float randv,
-                                        float3 *eval,
-                                        float3 *omega_in,
-                                        float3 *domega_in_dx,
-                                        float3 *domega_in_dy,
-                                        float *pdf)
+                                        ccl_private float3 *eval,
+                                        ccl_private float3 *omega_in,
+                                        ccl_private float3 *domega_in_dx,
+                                        ccl_private float3 *domega_in_dy,
+                                        ccl_private float *pdf)
 {
   const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
   float3 N = bsdf->N;
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index f56f78aa1f0..449a314a90e 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct HairBsdf {
+typedef struct HairBsdf {
   SHADER_CLOSURE_BASE;
 
   float3 T;
@@ -45,7 +45,7 @@ typedef ccl_addr_space struct HairBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(HairBsdf), "HairBsdf is too large!");
 
-ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
+ccl_device int bsdf_hair_reflection_setup(ccl_private HairBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
   bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
@@ -53,7 +53,7 @@ ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
+ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
   bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
@@ -61,12 +61,12 @@ ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
                                                     const float3 I,
                                                     const float3 omega_in,
-                                                    float *pdf)
+                                                    ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -108,28 +108,28 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
   return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_transmission_eval_reflect(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -170,20 +170,20 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
   return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
+ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
                                            float3 Ng,
                                            float3 I,
                                            float3 dIdx,
                                            float3 dIdy,
                                            float randu,
                                            float randv,
-                                           float3 *eval,
-                                           float3 *omega_in,
-                                           float3 *domega_in_dx,
-                                           float3 *domega_in_dy,
-                                           float *pdf)
+                                           ccl_private float3 *eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float3 *domega_in_dx,
+                                           ccl_private float3 *domega_in_dy,
+                                           ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -231,20 +231,20 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
   return LABEL_REFLECT | LABEL_GLOSSY;
 }
 
-ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
+ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc,
                                              float3 Ng,
                                              float3 I,
                                              float3 dIdx,
                                              float3 dIdy,
                                              float randu,
                                              float randv,
-                                             float3 *eval,
-                                             float3 *omega_in,
-                                             float3 *domega_in_dx,
-                                             float3 *domega_in_dy,
-                                             float *pdf)
+                                             ccl_private float3 *eval,
+                                             ccl_private float3 *omega_in,
+                                             ccl_private float3 *domega_in_dx,
+                                             ccl_private float3 *domega_in_dy,
+                                             ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index bfe56e5ab0e..17097b0739b 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -24,12 +24,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct PrincipledHairExtra {
+typedef struct PrincipledHairExtra {
   /* Geometry data. */
   float4 geom;
 } PrincipledHairExtra;
 
-typedef ccl_addr_space struct PrincipledHairBSDF {
+typedef struct PrincipledHairBSDF {
   SHADER_CLOSURE_BASE;
 
   /* Absorption coefficient. */
@@ -46,7 +46,7 @@ typedef ccl_addr_space struct PrincipledHairBSDF {
   float m0_roughness;
 
   /* Extra closure. */
-  PrincipledHairExtra *extra;
+  ccl_private PrincipledHairExtra *extra;
 } PrincipledHairBSDF;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF),
@@ -180,14 +180,15 @@ ccl_device_inline float longitudinal_scattering(
 }
 
 /* Combine the three values using their luminances. */
-ccl_device_inline float4 combine_with_energy(const KernelGlobals *kg, float3 c)
+ccl_device_inline float4 combine_with_energy(ccl_global const KernelGlobals *kg, float3 c)
 {
   return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
 }
 
 #ifdef __HAIR__
 /* Set up the hair closure. */
-ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bsdf)
+ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
+                                          ccl_private PrincipledHairBSDF *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
   bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
@@ -228,7 +229,10 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
 #endif /* __HAIR__ */
 
 /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(const KernelGlobals *kg, float f, float3 T, float4 *Ap)
+ccl_device_inline void hair_attenuation(ccl_global const KernelGlobals *kg,
+                                        float f,
+                                        float3 T,
+                                        ccl_private float4 *Ap)
 {
   /* Primary specular (R). */
   Ap[0] = make_float4(f, f, f, f);
@@ -259,7 +263,7 @@ ccl_device_inline void hair_attenuation(const KernelGlobals *kg, float f, float3
 ccl_device_inline void hair_alpha_angles(float sin_theta_i,
                                          float cos_theta_i,
                                          float alpha,
-                                         float *angles)
+                                         ccl_private float *angles)
 {
   float sin_1alpha = sinf(alpha);
   float cos_1alpha = cos_from_sin(sin_1alpha);
@@ -277,15 +281,15 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
 }
 
 /* Evaluation function for our shader. */
-ccl_device float3 bsdf_principled_hair_eval(const KernelGlobals *kg,
-                                            const ShaderData *sd,
-                                            const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
+                                            ccl_private const ShaderClosure *sc,
                                             const float3 omega_in,
-                                            float *pdf)
+                                            ccl_private float *pdf)
 {
   kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
 
-  const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF *)sc;
+  ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc;
   float3 Y = float4_to_float3(bsdf->extra->geom);
 
   float3 X = safe_normalize(sd->dPdu);
@@ -355,18 +359,18 @@ ccl_device float3 bsdf_principled_hair_eval(const KernelGlobals *kg,
 }
 
 /* Sampling function for the hair shader. */
-ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
-                                           const ShaderClosure *sc,
-                                           ShaderData *sd,
+ccl_device int bsdf_principled_hair_sample(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderClosure *sc,
+                                           ccl_private ShaderData *sd,
                                            float randu,
                                            float randv,
-                                           float3 *eval,
-                                           float3 *omega_in,
-                                           float3 *domega_in_dx,
-                                           float3 *domega_in_dy,
-                                           float *pdf)
+                                           ccl_private float3 *eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float3 *domega_in_dx,
+                                           ccl_private float3 *domega_in_dy,
+                                           ccl_private float *pdf)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
 
   float3 Y = float4_to_float3(bsdf->extra->geom);
 
@@ -378,8 +382,8 @@ ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
 
   float2 u[2];
   u[0] = make_float2(randu, randv);
-  u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
-  u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
+  u[1].x = lcg_step_float(&sd->lcg_state);
+  u[1].y = lcg_step_float(&sd->lcg_state);
 
   float sin_theta_o = wo.x;
   float cos_theta_o = cos_from_sin(sin_theta_o);
@@ -482,9 +486,9 @@ ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
 }
 
 /* Implements Filter Glossy by capping the effective roughness. */
-ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_principled_hair_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
 
   bsdf->v = fmaxf(roughness, bsdf->v);
   bsdf->s = fmaxf(roughness, bsdf->s);
@@ -500,9 +504,9 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
   return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
 }
 
-ccl_device float3 bsdf_principled_hair_albedo(const ShaderClosure *sc)
+ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
   return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
 }
 
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 227cb448b47..41c35867a6b 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -37,17 +37,17 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct MicrofacetExtra {
+typedef struct MicrofacetExtra {
   float3 color, cspec0;
   float3 fresnel_color;
   float clearcoat;
 } MicrofacetExtra;
 
-typedef ccl_addr_space struct MicrofacetBsdf {
+typedef struct MicrofacetBsdf {
   SHADER_CLOSURE_BASE;
 
   float alpha_x, alpha_y, ior;
-  MicrofacetExtra *extra;
+  ccl_private MicrofacetExtra *extra;
   float3 T;
 } MicrofacetBsdf;
 
@@ -55,14 +55,14 @@ static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf i
 
 /* Beckmann and GGX microfacet importance sampling. */
 
-ccl_device_inline void microfacet_beckmann_sample_slopes(const KernelGlobals *kg,
+ccl_device_inline void microfacet_beckmann_sample_slopes(ccl_global const KernelGlobals *kg,
                                                          const float cos_theta_i,
                                                          const float sin_theta_i,
                                                          float randu,
                                                          float randv,
-                                                         float *slope_x,
-                                                         float *slope_y,
-                                                         float *G1i)
+                                                         ccl_private float *slope_x,
+                                                         ccl_private float *slope_y,
+                                                         ccl_private float *G1i)
 {
   /* special case (normal incidence) */
   if (cos_theta_i >= 0.99999f) {
@@ -146,9 +146,9 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
                                                     const float sin_theta_i,
                                                     float randu,
                                                     float randv,
-                                                    float *slope_x,
-                                                    float *slope_y,
-                                                    float *G1i)
+                                                    ccl_private float *slope_x,
+                                                    ccl_private float *slope_y,
+                                                    ccl_private float *G1i)
 {
   /* special case (normal incidence) */
   if (cos_theta_i >= 0.99999f) {
@@ -195,14 +195,14 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
   *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
 }
 
-ccl_device_forceinline float3 microfacet_sample_stretched(const KernelGlobals *kg,
+ccl_device_forceinline float3 microfacet_sample_stretched(ccl_global const KernelGlobals *kg,
                                                           const float3 omega_i,
                                                           const float alpha_x,
                                                           const float alpha_y,
                                                           const float randu,
                                                           const float randv,
                                                           bool beckmann,
-                                                          float *G1i)
+                                                          ccl_private float *G1i)
 {
   /* 1. stretch omega_i */
   float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
@@ -254,7 +254,9 @@ ccl_device_forceinline float3 microfacet_sample_stretched(const KernelGlobals *k
  *
  * Else it is simply white
  */
-ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H)
+ccl_device_forceinline float3 reflection_color(ccl_private const MicrofacetBsdf *bsdf,
+                                               float3 L,
+                                               float3 H)
 {
   float3 F = make_float3(1.0f, 1.0f, 1.0f);
   bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
@@ -277,8 +279,8 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
   return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
 }
 
-ccl_device_forceinline void bsdf_microfacet_fresnel_color(const ShaderData *sd,
-                                                          MicrofacetBsdf *bsdf)
+ccl_device_forceinline void bsdf_microfacet_fresnel_color(ccl_private const ShaderData *sd,
+                                                          ccl_private MicrofacetBsdf *bsdf)
 {
   kernel_assert(CLOSURE_IS_BSDF_MICROFACET_FRESNEL(bsdf->type));
 
@@ -306,7 +308,7 @@ ccl_device_forceinline void bsdf_microfacet_fresnel_color(const ShaderData *sd,
  * Anisotropy is only supported for reflection currently, but adding it for
  * transmission is just a matter of copying code from reflection if needed. */
 
-ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->extra = NULL;
 
@@ -319,14 +321,15 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
 }
 
 /* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_ggx_isotropic_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
   return bsdf_microfacet_ggx_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                 ccl_private const ShaderData *sd)
 {
   bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
 
@@ -340,7 +343,8 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const Sha
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                   ccl_private const ShaderData *sd)
 {
   bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
 
@@ -354,7 +358,7 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const S
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->extra = NULL;
 
@@ -366,20 +370,20 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosure *sc,
                                                    const float3 I,
                                                    const float3 omega_in,
-                                                   float *pdf)
+                                                   ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -487,12 +491,12 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClosure *sc,
                                                     const float3 I,
                                                     const float3 omega_in,
-                                                    float *pdf)
+                                                    ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   float m_eta = bsdf->ior;
@@ -545,21 +549,21 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_ggx_sample(const KernelGlobals *kg,
-                                          const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_ggx_sample(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderClosure *sc,
                                           float3 Ng,
                                           float3 I,
                                           float3 dIdx,
                                           float3 dIdy,
                                           float randu,
                                           float randv,
-                                          float3 *eval,
-                                          float3 *omega_in,
-                                          float3 *domega_in_dx,
-                                          float3 *domega_in_dy,
-                                          float *pdf)
+                                          ccl_private float3 *eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private float3 *domega_in_dx,
+                                          ccl_private float3 *domega_in_dy,
+                                          ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -774,7 +778,7 @@ ccl_device int bsdf_microfacet_ggx_sample(const KernelGlobals *kg,
  * Microfacet Models for Refraction through Rough Surfaces
  * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007 */
 
-ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = saturate(bsdf->alpha_x);
   bsdf->alpha_y = saturate(bsdf->alpha_y);
@@ -784,14 +788,14 @@ ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
 }
 
 /* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_beckmann_isotropic_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
   return bsdf_microfacet_beckmann_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = saturate(bsdf->alpha_x);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -800,9 +804,9 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -839,12 +843,12 @@ ccl_device_inline float bsdf_beckmann_aniso_G1(
   return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const ShaderClosure *sc,
                                                         const float3 I,
                                                         const float3 omega_in,
-                                                        float *pdf)
+                                                        ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
@@ -918,12 +922,12 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const ShaderClosure *sc,
                                                          const float3 I,
                                                          const float3 omega_in,
-                                                         float *pdf)
+                                                         ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   float m_eta = bsdf->ior;
@@ -973,21 +977,21 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_beckmann_sample(const KernelGlobals *kg,
-                                               const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_beckmann_sample(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderClosure *sc,
                                                float3 Ng,
                                                float3 I,
                                                float3 dIdx,
                                                float3 dIdy,
                                                float randu,
                                                float randv,
-                                               float3 *eval,
-                                               float3 *omega_in,
-                                               float3 *domega_in_dx,
-                                               float3 *domega_in_dy,
-                                               float *pdf)
+                                               ccl_private float3 *eval,
+                                               ccl_private float3 *omega_in,
+                                               ccl_private float3 *domega_in_dx,
+                                               ccl_private float3 *domega_in_dy,
+                                               ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 68d5071dbce..6ee1139ddbb 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -105,7 +105,7 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
 
 /* Phase function for reflective materials. */
 ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi,
-                                                     float3 *weight,
+                                                     ccl_private float3 *weight,
                                                      const float3 wm)
 {
   return -wi + 2.0f * wm * dot(wi, wm);
@@ -140,8 +140,11 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
 
 /* Phase function for dielectric transmissive materials, including both reflection and refraction
  * according to the dielectric fresnel term. */
-ccl_device_forceinline float3 mf_sample_phase_glass(
-    const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
+ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi,
+                                                    const float eta,
+                                                    const float3 wm,
+                                                    const float randV,
+                                                    ccl_private bool *outside)
 {
   float cosI = dot(wi, wm);
   float f = fresnel_dielectric_cos(cosI, eta);
@@ -234,8 +237,12 @@ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float l
 
 /* Sampling from the visible height distribution (based on page 17 of the supplemental
  * implementation). */
-ccl_device_forceinline bool mf_sample_height(
-    const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
+ccl_device_forceinline bool mf_sample_height(const float3 w,
+                                             ccl_private float *h,
+                                             ccl_private float *C1,
+                                             ccl_private float *G1,
+                                             ccl_private float *lambda,
+                                             const float U)
 {
   if (w.z > 0.9999f)
     return false;
@@ -364,9 +371,9 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi,
 #define MF_MULTI_GLOSSY
 #include "kernel/closure/bsdf_microfacet_multi_impl.h"
 
-ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_multi_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -376,7 +383,7 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughnes
 
 /* Multiscattering GGX Glossy closure */
 
-ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_common_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
@@ -386,7 +393,7 @@ ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   if (is_zero(bsdf->T))
     bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
@@ -396,7 +403,8 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                       ccl_private const ShaderData *sd)
 {
   if (is_zero(bsdf->T))
     bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
@@ -408,7 +416,7 @@ ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, con
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
@@ -417,23 +425,23 @@ ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(ccl_private const ShaderClosure *sc,
                                                           const float3 I,
                                                           const float3 omega_in,
-                                                          float *pdf,
-                                                          ccl_addr_space uint *lcg_state)
+                                                          ccl_private float *pdf,
+                                                          ccl_private uint *lcg_state)
 {
   *pdf = 0.0f;
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const ShaderClosure *sc,
                                                          const float3 I,
                                                          const float3 omega_in,
-                                                         float *pdf,
-                                                         ccl_addr_space uint *lcg_state)
+                                                         ccl_private float *pdf,
+                                                         ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -468,22 +476,22 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc
                         bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_sample(const KernelGlobals *kg,
-                                                const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_multi_ggx_sample(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderClosure *sc,
                                                 float3 Ng,
                                                 float3 I,
                                                 float3 dIdx,
                                                 float3 dIdy,
                                                 float randu,
                                                 float randv,
-                                                float3 *eval,
-                                                float3 *omega_in,
-                                                float3 *domega_in_dx,
-                                                float3 *domega_in_dy,
-                                                float *pdf,
-                                                ccl_addr_space uint *lcg_state)
+                                                ccl_private float3 *eval,
+                                                ccl_private float3 *omega_in,
+                                                ccl_private float3 *domega_in_dx,
+                                                ccl_private float3 *domega_in_dy,
+                                                ccl_private float *pdf,
+                                                ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   float3 X, Y, Z;
   Z = bsdf->N;
@@ -536,7 +544,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(const KernelGlobals *kg,
 
 /* Multiscattering GGX Glass closure */
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -548,8 +556,8 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf,
-                                                             const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                             ccl_private const ShaderData *sd)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -564,13 +572,14 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc,
-                                                                const float3 I,
-                                                                const float3 omega_in,
-                                                                float *pdf,
-                                                                ccl_addr_space uint *lcg_state)
+ccl_device float3
+bsdf_microfacet_multi_ggx_glass_eval_transmit(ccl_private const ShaderClosure *sc,
+                                              const float3 I,
+                                              const float3 omega_in,
+                                              ccl_private float *pdf,
+                                              ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -596,13 +605,13 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClos
                        bsdf->extra->color);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const ShaderClosure *sc,
                                                                const float3 I,
                                                                const float3 omega_in,
-                                                               float *pdf,
-                                                               ccl_addr_space uint *lcg_state)
+                                                               ccl_private float *pdf,
+                                                               ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -630,22 +639,22 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosu
                        bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_sample(const KernelGlobals *kg,
-                                                      const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(ccl_global const KernelGlobals *kg,
+                                                      ccl_private const ShaderClosure *sc,
                                                       float3 Ng,
                                                       float3 I,
                                                       float3 dIdx,
                                                       float3 dIdy,
                                                       float randu,
                                                       float randv,
-                                                      float3 *eval,
-                                                      float3 *omega_in,
-                                                      float3 *domega_in_dx,
-                                                      float3 *domega_in_dy,
-                                                      float *pdf,
-                                                      ccl_addr_space uint *lcg_state)
+                                                      ccl_private float3 *eval,
+                                                      ccl_private float3 *omega_in,
+                                                      ccl_private float3 *domega_in_dx,
+                                                      ccl_private float3 *domega_in_dy,
+                                                      ccl_private float *pdf,
+                                                      ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   float3 X, Y, Z;
   Z = bsdf->N;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 04d9b22d7d2..d23cc16cff3 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -31,7 +31,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
                                                              const float3 color,
                                                              const float alpha_x,
                                                              const float alpha_y,
-                                                             ccl_addr_space uint *lcg_state,
+                                                             ccl_private uint *lcg_state,
                                                              const float eta,
                                                              bool use_fresnel,
                                                              const float3 cspec0)
@@ -101,12 +101,12 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
 
   for (int order = 0; order < 10; order++) {
     /* Sample microfacet height. */
-    float height_rand = lcg_step_float_addrspace(lcg_state);
+    float height_rand = lcg_step_float(lcg_state);
     if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
       break;
     /* Sample microfacet normal. */
-    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_y = lcg_step_float(lcg_state);
+    float vndf_rand_x = lcg_step_float(lcg_state);
     float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
 #ifdef MF_MULTI_GLASS
@@ -145,7 +145,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
 #ifdef MF_MULTI_GLASS
       bool next_outside;
       float3 wi_prev = -wr;
-      float phase_rand = lcg_step_float_addrspace(lcg_state);
+      float phase_rand = lcg_step_float(lcg_state);
       wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
       if (!next_outside) {
         outside = !outside;
@@ -186,11 +186,11 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
  * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
  */
 ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
-                                                               float3 *wo,
+                                                               ccl_private float3 *wo,
                                                                const float3 color,
                                                                const float alpha_x,
                                                                const float alpha_y,
-                                                               ccl_addr_space uint *lcg_state,
+                                                               ccl_private uint *lcg_state,
                                                                const float eta,
                                                                bool use_fresnel,
                                                                const float3 cspec0)
@@ -213,15 +213,15 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
   int order;
   for (order = 0; order < 10; order++) {
     /* Sample microfacet height. */
-    float height_rand = lcg_step_float_addrspace(lcg_state);
+    float height_rand = lcg_step_float(lcg_state);
     if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
       /* The random walk has left the surface. */
       *wo = outside ? wr : -wr;
       return throughput;
     }
     /* Sample microfacet normal. */
-    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_y = lcg_step_float(lcg_state);
+    float vndf_rand_x = lcg_step_float(lcg_state);
     float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
     /* First-bounce color is already accounted for in mix weight. */
@@ -232,7 +232,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
 #ifdef MF_MULTI_GLASS
     bool next_outside;
     float3 wi_prev = -wr;
-    float phase_rand = lcg_step_float_addrspace(lcg_state);
+    float phase_rand = lcg_step_float(lcg_state);
     wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
     if (!next_outside) {
       hr = -hr;
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index be12d47f0ea..00c2678f0a0 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -18,7 +18,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct OrenNayarBsdf {
+typedef struct OrenNayarBsdf {
   SHADER_CLOSURE_BASE;
 
   float roughness;
@@ -28,12 +28,12 @@ typedef ccl_addr_space struct OrenNayarBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(OrenNayarBsdf), "OrenNayarBsdf is too large!");
 
-ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc,
                                                 float3 n,
                                                 float3 v,
                                                 float3 l)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   float nl = max(dot(n, l), 0.0f);
   float nv = max(dot(n, v), 0.0f);
   float t = dot(l, v) - nl * nv;
@@ -44,7 +44,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
   return make_float3(is, is, is);
 }
 
-ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
+ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)
 {
   float sigma = bsdf->roughness;
 
@@ -60,12 +60,12 @@ ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   if (dot(bsdf->N, omega_in) > 0.0f) {
     *pdf = 0.5f * M_1_PI_F;
     return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
@@ -76,28 +76,28 @@ ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc,
+ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
 
   if (dot(Ng, *omega_in) > 0.0f) {
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 43f8cf71c59..74cc62d917b 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -36,11 +36,11 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __OSL__
 
-typedef ccl_addr_space struct PhongRampBsdf {
+typedef struct PhongRampBsdf {
   SHADER_CLOSURE_BASE;
 
   float exponent;
-  float3 *colors;
+  ccl_private float3 *colors;
 } PhongRampBsdf;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(PhongRampBsdf), "PhongRampBsdf is too large!");
@@ -59,19 +59,19 @@ ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
   return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
 }
 
-ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf)
+ccl_device int bsdf_phong_ramp_setup(ccl_private PhongRampBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
   bsdf->exponent = max(bsdf->exponent, 0.0f);
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
-  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
   float m_exponent = bsdf->exponent;
   float cosNI = dot(bsdf->N, omega_in);
   float cosNO = dot(bsdf->N, I);
@@ -92,28 +92,28 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc,
+ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
   float cosNO = dot(bsdf->N, I);
   float m_exponent = bsdf->exponent;
 
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 52a37eafd9f..6d25daa2356 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -36,7 +36,7 @@ enum PrincipledDiffuseBsdfComponents {
   PRINCIPLED_DIFFUSE_RETRO_REFLECTION = 8,
 };
 
-typedef ccl_addr_space struct PrincipledDiffuseBsdf {
+typedef struct PrincipledDiffuseBsdf {
   SHADER_CLOSURE_BASE;
 
   float roughness;
@@ -46,14 +46,18 @@ typedef ccl_addr_space struct PrincipledDiffuseBsdf {
 static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledDiffuseBsdf),
               "PrincipledDiffuseBsdf is too large!");
 
-ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
+ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_diffuse_compute_brdf(
-    const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float *pdf)
+ccl_device float3
+bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bsdf,
+                                     float3 N,
+                                     float3 V,
+                                     float3 L,
+                                     ccl_private float *pdf)
 {
   const float NdotL = dot(N, L);
 
@@ -102,24 +106,25 @@ ccl_device_inline float bsdf_principled_diffuse_compute_entry_fresnel(const floa
 /* Ad-hoc weight adjustment to avoid retro-reflection taking away half the
  * samples from BSSRDF. */
 ccl_device_inline float bsdf_principled_diffuse_retro_reflection_sample_weight(
-    PrincipledDiffuseBsdf *bsdf, const float3 I)
+    ccl_private PrincipledDiffuseBsdf *bsdf, const float3 I)
 {
   return bsdf->roughness * schlick_fresnel(dot(bsdf->N, I));
 }
 
-ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf, int components)
+ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *bsdf,
+                                             int components)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
   bsdf->components = components;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
-  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+  ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
 
   float3 N = bsdf->N;
   float3 V = I;         // outgoing
@@ -135,28 +140,28 @@ ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
                                                         const float3 I,
                                                         const float3 omega_in,
-                                                        float *pdf)
+                                                        ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
+ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc,
                                               float3 Ng,
                                               float3 I,
                                               float3 dIdx,
                                               float3 dIdy,
                                               float randu,
                                               float randv,
-                                              float3 *eval,
-                                              float3 *omega_in,
-                                              float3 *domega_in_dx,
-                                              float3 *domega_in_dy,
-                                              float *pdf)
+                                              ccl_private float3 *eval,
+                                              ccl_private float3 *omega_in,
+                                              ccl_private float3 *domega_in_dx,
+                                              ccl_private float3 *domega_in_dy,
+                                              ccl_private float *pdf)
 {
-  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+  ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
 
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index 60ce7e4eb75..cc0a5accb95 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -25,7 +25,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct PrincipledSheenBsdf {
+typedef struct PrincipledSheenBsdf {
   SHADER_CLOSURE_BASE;
   float avg_value;
 } PrincipledSheenBsdf;
@@ -46,7 +46,7 @@ ccl_device_inline float calculate_avg_principled_sheen_brdf(float3 N, float3 I)
 }
 
 ccl_device float3
-calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, float *pdf)
+calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_private float *pdf)
 {
   float NdotL = dot(N, L);
   float NdotV = dot(N, V);
@@ -63,7 +63,8 @@ calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, float *p
   return make_float3(value, value, value);
 }
 
-ccl_device int bsdf_principled_sheen_setup(const ShaderData *sd, PrincipledSheenBsdf *bsdf)
+ccl_device int bsdf_principled_sheen_setup(ccl_private const ShaderData *sd,
+                                           ccl_private PrincipledSheenBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
   bsdf->avg_value = calculate_avg_principled_sheen_brdf(bsdf->N, sd->I);
@@ -71,12 +72,12 @@ ccl_device int bsdf_principled_sheen_setup(const ShaderData *sd, PrincipledSheen
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
-  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+  ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
 
   float3 N = bsdf->N;
   float3 V = I;         // outgoing
@@ -93,28 +94,28 @@ ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_sheen_eval_transmit(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
+ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
                                             float3 Ng,
                                             float3 I,
                                             float3 dIdx,
                                             float3 dIdy,
                                             float randu,
                                             float randv,
-                                            float3 *eval,
-                                            float3 *omega_in,
-                                            float3 *domega_in_dx,
-                                            float3 *domega_in_dy,
-                                            float *pdf)
+                                            ccl_private float3 *eval,
+                                            ccl_private float3 *omega_in,
+                                            ccl_private float3 *domega_in_dx,
+                                            ccl_private float3 *domega_in_dy,
+                                            ccl_private float *pdf)
 {
-  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+  ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
 
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index 31283971d5a..758bfd2b2d0 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -36,42 +36,42 @@ CCL_NAMESPACE_BEGIN
 
 /* REFLECTION */
 
-ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
   return SD_BSDF;
 }
 
-ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_reflection_sample(const ShaderClosure *sc,
+ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
 
   // only one direction is possible
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index cfedb5dfe2c..74e149b059e 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -36,42 +36,42 @@ CCL_NAMESPACE_BEGIN
 
 /* REFRACTION */
 
-ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
   return SD_BSDF;
 }
 
-ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_refraction_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_refraction_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_refraction_sample(const ShaderClosure *sc,
+ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float m_eta = bsdf->ior;
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index acdafe0f735..7f20a328b5e 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct ToonBsdf {
+typedef struct ToonBsdf {
   SHADER_CLOSURE_BASE;
 
   float size;
@@ -45,7 +45,7 @@ static_assert(sizeof(ShaderClosure) >= sizeof(ToonBsdf), "ToonBsdf is too large!
 
 /* DIFFUSE TOON */
 
-ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
+ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
   bsdf->size = saturate(bsdf->size);
@@ -73,12 +73,12 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
   return fminf(max_angle + smooth, M_PI_2_F);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_toon_eval_reflect(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
@@ -95,28 +95,28 @@ ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClosure *sc,
                                                   const float3 I,
                                                   const float3 omega_in,
-                                                  float *pdf)
+                                                  ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
                                         float3 Ng,
                                         float3 I,
                                         float3 dIdx,
                                         float3 dIdy,
                                         float randu,
                                         float randv,
-                                        float3 *eval,
-                                        float3 *omega_in,
-                                        float3 *domega_in_dx,
-                                        float3 *domega_in_dy,
-                                        float *pdf)
+                                        ccl_private float3 *eval,
+                                        ccl_private float3 *omega_in,
+                                        ccl_private float3 *domega_in_dx,
+                                        ccl_private float3 *domega_in_dy,
+                                        ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
@@ -143,7 +143,7 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
 
 /* GLOSSY TOON */
 
-ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
+ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
   bsdf->size = saturate(bsdf->size);
@@ -152,12 +152,12 @@ ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float cosNI = dot(bsdf->N, omega_in);
@@ -180,28 +180,28 @@ ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc,
+ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float cosNO = dot(bsdf->N, I);
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index f1dc7efb345..8313ab964d7 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -34,7 +34,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int path_flag)
+ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd,
+                                       const float3 weight,
+                                       int path_flag)
 {
   /* Check cutoff weight. */
   float sample_weight = fabsf(average(weight));
@@ -47,7 +49,7 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
 
     /* Add weight to existing transparent BSDF. */
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
 
       if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
         sc->weight += weight;
@@ -68,7 +70,7 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
     }
 
     /* Create new transparent BSDF. */
-    ShaderClosure *bsdf = closure_alloc(
+    ccl_private ShaderClosure *bsdf = closure_alloc(
         sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
 
     if (bsdf) {
@@ -81,34 +83,34 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
   }
 }
 
-ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_transparent_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_transparent_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_transparent_sample(const ShaderClosure *sc,
+ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
   // only one direction is possible
   *omega_in = -I;
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index beec5f768a1..873494c1e03 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -37,17 +37,17 @@ CCL_NAMESPACE_BEGIN
 ccl_device float fresnel_dielectric(float eta,
                                     const float3 N,
                                     const float3 I,
-                                    float3 *R,
-                                    float3 *T,
+                                    ccl_private float3 *R,
+                                    ccl_private float3 *T,
 #ifdef __RAY_DIFFERENTIALS__
                                     const float3 dIdx,
                                     const float3 dIdy,
-                                    float3 *dRdx,
-                                    float3 *dRdy,
-                                    float3 *dTdx,
-                                    float3 *dTdy,
+                                    ccl_private float3 *dRdx,
+                                    ccl_private float3 *dRdy,
+                                    ccl_private float3 *dTdx,
+                                    ccl_private float3 *dTdy,
 #endif
-                                    bool *is_inside)
+                                    ccl_private bool *is_inside)
 {
   float cos = dot(N, I), neta;
   float3 Nn;
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 07415c53ec5..9df69e073c1 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -18,7 +18,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct Bssrdf {
+typedef struct Bssrdf {
   SHADER_CLOSURE_BASE;
 
   float3 radius;
@@ -66,7 +66,9 @@ ccl_device float bssrdf_dipole_compute_alpha_prime(float rd, float fourthirdA)
   return xmid;
 }
 
-ccl_device void bssrdf_setup_radius(Bssrdf *bssrdf, const ClosureType type, const float eta)
+ccl_device void bssrdf_setup_radius(ccl_private Bssrdf *bssrdf,
+                                    const ClosureType type,
+                                    const float eta)
 {
   if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) {
     /* Scale mean free path length so it gives similar looking result to older
@@ -114,7 +116,7 @@ ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
   return 0.25f * M_1_PI_F * r;
 }
 
-ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
+ccl_device void bssrdf_burley_setup(ccl_private Bssrdf *bssrdf)
 {
   /* Mean free path length. */
   const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
@@ -195,7 +197,10 @@ ccl_device_forceinline float bssrdf_burley_root_find(float xi)
   return r;
 }
 
-ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
+ccl_device void bssrdf_burley_sample(const float d,
+                                     float xi,
+                                     ccl_private float *r,
+                                     ccl_private float *h)
 {
   const float Rm = BURLEY_TRUNCATE * d;
   const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
@@ -221,7 +226,10 @@ ccl_device float bssrdf_num_channels(const float3 radius)
   return channels;
 }
 
-ccl_device void bssrdf_sample(const float3 radius, float xi, float *r, float *h)
+ccl_device void bssrdf_sample(const float3 radius,
+                              float xi,
+                              ccl_private float *r,
+                              ccl_private float *h)
 {
   const float num_channels = bssrdf_num_channels(radius);
   float sampled_radius;
@@ -261,9 +269,10 @@ ccl_device_forceinline float bssrdf_pdf(const float3 radius, float r)
 
 /* Setup */
 
-ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
+ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, float3 weight)
 {
-  Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+  ccl_private Bssrdf *bssrdf = (ccl_private Bssrdf *)closure_alloc(
+      sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
 
   if (bssrdf == NULL) {
     return NULL;
@@ -274,13 +283,16 @@ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
   return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
 }
 
-ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, const float ior)
+ccl_device int bssrdf_setup(ccl_private ShaderData *sd,
+                            ccl_private Bssrdf *bssrdf,
+                            ClosureType type,
+                            const float ior)
 {
   int flag = 0;
 
   /* Add retro-reflection component as separate diffuse BSDF. */
   if (bssrdf->roughness != FLT_MAX) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
         sd, sizeof(PrincipledDiffuseBsdf), bssrdf->weight);
 
     if (bsdf) {
@@ -321,7 +333,7 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, co
     /* Add diffuse BSDF if any radius too small. */
 #ifdef __PRINCIPLED__
     if (bssrdf->roughness != FLT_MAX) {
-      PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+      ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
           sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
 
       if (bsdf) {
@@ -333,7 +345,8 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, co
     else
 #endif /* __PRINCIPLED__ */
     {
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), diffuse_weight);
 
       if (bsdf) {
         bsdf->N = bssrdf->N;
diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h
index a2519d97618..3d56e989522 100644
--- a/intern/cycles/kernel/closure/emissive.h
+++ b/intern/cycles/kernel/closure/emissive.h
@@ -36,7 +36,7 @@ CCL_NAMESPACE_BEGIN
 
 /* BACKGROUND CLOSURE */
 
-ccl_device void background_setup(ShaderData *sd, const float3 weight)
+ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight)
 {
   if (sd->flag & SD_EMISSION) {
     sd->closure_emission_background += weight;
@@ -49,7 +49,7 @@ ccl_device void background_setup(ShaderData *sd, const float3 weight)
 
 /* EMISSION CLOSURE */
 
-ccl_device void emission_setup(ShaderData *sd, const float3 weight)
+ccl_device void emission_setup(ccl_private ShaderData *sd, const float3 weight)
 {
   if (sd->flag & SD_EMISSION) {
     sd->closure_emission_background += weight;
@@ -69,8 +69,11 @@ ccl_device float emissive_pdf(const float3 Ng, const float3 I)
   return (cosNO > 0.0f) ? 1.0f : 0.0f;
 }
 
-ccl_device void emissive_sample(
-    const float3 Ng, float randu, float randv, float3 *omega_out, float *pdf)
+ccl_device void emissive_sample(const float3 Ng,
+                                float randu,
+                                float randv,
+                                ccl_private float3 *omega_out,
+                                ccl_private float *pdf)
 {
   /* todo: not implemented and used yet */
 }
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 69959a3f21b..023fb3ac4ea 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* VOLUME EXTINCTION */
 
-ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
+ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, float3 weight)
 {
   if (sd->flag & SD_EXTINCTION) {
     sd->closure_transparent_extinction += weight;
@@ -33,7 +33,7 @@ ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
 
 /* HENYEY-GREENSTEIN CLOSURE */
 
-typedef ccl_addr_space struct HenyeyGreensteinVolume {
+typedef struct HenyeyGreensteinVolume {
   SHADER_CLOSURE_BASE;
 
   float g;
@@ -51,7 +51,7 @@ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g)
          (M_1_PI_F * 0.25f);
 };
 
-ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
+ccl_device int volume_henyey_greenstein_setup(ccl_private HenyeyGreensteinVolume *volume)
 {
   volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
 
@@ -61,10 +61,10 @@ ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
   return SD_SCATTER;
 }
 
-ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderVolumeClosure *svc,
+ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc,
                                                       const float3 I,
                                                       float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   float g = svc->g;
 
@@ -81,7 +81,7 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderVolumeClosure
 }
 
 ccl_device float3
-henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
+henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_private float *pdf)
 {
   /* match pdf for small g */
   float cos_theta;
@@ -112,17 +112,17 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pd
   return dir;
 }
 
-ccl_device int volume_henyey_greenstein_sample(const ShaderVolumeClosure *svc,
+ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc,
                                                float3 I,
                                                float3 dIdx,
                                                float3 dIdy,
                                                float randu,
                                                float randv,
-                                               float3 *eval,
-                                               float3 *omega_in,
-                                               float3 *domega_in_dx,
-                                               float3 *domega_in_dy,
-                                               float *pdf)
+                                               ccl_private float3 *eval,
+                                               ccl_private float3 *omega_in,
+                                               ccl_private float3 *domega_in_dx,
+                                               ccl_private float3 *domega_in_dy,
+                                               ccl_private float *pdf)
 {
   float g = svc->g;
 
@@ -141,22 +141,22 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderVolumeClosure *svc,
 
 /* VOLUME CLOSURE */
 
-ccl_device float3 volume_phase_eval(const ShaderData *sd,
-                                    const ShaderVolumeClosure *svc,
+ccl_device float3 volume_phase_eval(ccl_private const ShaderData *sd,
+                                    ccl_private const ShaderVolumeClosure *svc,
                                     float3 omega_in,
-                                    float *pdf)
+                                    ccl_private float *pdf)
 {
   return volume_henyey_greenstein_eval_phase(svc, sd->I, omega_in, pdf);
 }
 
-ccl_device int volume_phase_sample(const ShaderData *sd,
-                                   const ShaderVolumeClosure *svc,
+ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
+                                   ccl_private const ShaderVolumeClosure *svc,
                                    float randu,
                                    float randv,
-                                   float3 *eval,
-                                   float3 *omega_in,
-                                   differential3 *domega_in,
-                                   float *pdf)
+                                   ccl_private float3 *eval,
+                                   ccl_private float3 *omega_in,
+                                   ccl_private differential3 *domega_in,
+                                   ccl_private float *pdf)
 {
   return volume_henyey_greenstein_sample(svc,
                                          sd->I,
@@ -187,7 +187,10 @@ ccl_device float volume_channel_get(float3 value, int channel)
   return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
 }
 
-ccl_device int volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
+ccl_device int volume_sample_channel(float3 albedo,
+                                     float3 throughput,
+                                     float rand,
+                                     ccl_private float3 *pdf)
 {
   /* Sample color channel proportional to throughput and single scattering
    * albedo, to significantly reduce noise with many bounce, following:
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index bfd936c7bbd..888c0d5d872 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -32,8 +32,6 @@
 #include "util/util_texture.h"
 #include "util/util_types.h"
 
-#define ccl_addr_space
-
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
  */
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h
index 3c85a8e7bd2..685c7a5b753 100644
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -59,7 +59,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h
index 95338fe7d6e..089976d84e4 100644
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -52,7 +52,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
new file mode 100644
index 00000000000..77cea30914c
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#define __KERNEL_GPU__
+#define __KERNEL_METAL__
+#define CCL_NAMESPACE_BEGIN
+#define CCL_NAMESPACE_END
+
+#ifndef ATTR_FALLTHROUGH
+#  define ATTR_FALLTHROUGH
+#endif
+
+#include <metal_atomic>
+#include <metal_pack>
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wsign-compare"
+
+/* Qualifiers */
+
+#define ccl_device
+#define ccl_device_inline ccl_device
+#define ccl_device_forceinline ccl_device
+#define ccl_device_noinline ccl_device __attribute__((noinline))
+#define ccl_device_noinline_cpu ccl_device
+#define ccl_global device
+#define ccl_static_constant static constant constexpr
+#define ccl_device_constant constant
+#define ccl_constant const device
+#define ccl_gpu_shared threadgroup
+#define ccl_private thread
+#define ccl_may_alias
+#define ccl_restrict __restrict
+#define ccl_loop_no_unroll
+#define ccl_align(n) alignas(n)
+#define ccl_optional_struct_init
+
+/* No assert supported for Metal */
+
+#define kernel_assert(cond)
+
+/* make_type definitions with Metal style element initializers */
+#ifdef make_float2
+#  undef make_float2
+#endif
+#ifdef make_float3
+#  undef make_float3
+#endif
+#ifdef make_float4
+#  undef make_float4
+#endif
+#ifdef make_int2
+#  undef make_int2
+#endif
+#ifdef make_int3
+#  undef make_int3
+#endif
+#ifdef make_int4
+#  undef make_int4
+#endif
+#ifdef make_uchar4
+#  undef make_uchar4
+#endif
+
+#define make_float2(x, y) float2(x, y)
+#define make_float3(x, y, z) float3(x, y, z)
+#define make_float4(x, y, z, w) float4(x, y, z, w)
+#define make_int2(x, y) int2(x, y)
+#define make_int3(x, y, z) int3(x, y, z)
+#define make_int4(x, y, z, w) int4(x, y, z, w)
+#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+
+/* Math functions */
+
+#define __uint_as_float(x) as_type<float>(x)
+#define __float_as_uint(x) as_type<uint>(x)
+#define __int_as_float(x) as_type<float>(x)
+#define __float_as_int(x) as_type<int>(x)
+#define __float2half(x) half(x)
+#define powf(x, y) pow(float(x), float(y))
+#define fabsf(x) fabs(float(x))
+#define copysignf(x, y) copysign(float(x), float(y))
+#define asinf(x) asin(float(x))
+#define acosf(x) acos(float(x))
+#define atanf(x) atan(float(x))
+#define floorf(x) floor(float(x))
+#define ceilf(x) ceil(float(x))
+#define hypotf(x, y) hypot(float(x), float(y))
+#define atan2f(x, y) atan2(float(x), float(y))
+#define fmaxf(x, y) fmax(float(x), float(y))
+#define fminf(x, y) fmin(float(x), float(y))
+#define fmodf(x, y) fmod(float(x), float(y))
+#define sinhf(x) sinh(float(x))
+#define coshf(x) cosh(float(x))
+#define tanhf(x) tanh(float(x))
+
+/* Use native functions with possibly lower precision for performance,
+ * no issues found so far. */
+#define trigmode fast
+#define sinf(x) trigmode::sin(float(x))
+#define cosf(x) trigmode::cos(float(x))
+#define tanf(x) trigmode::tan(float(x))
+#define expf(x) trigmode::exp(float(x))
+#define sqrtf(x) trigmode::sqrt(float(x))
+#define logf(x) trigmode::log(float(x))
+
+#define NULL 0
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index fb9e094b535..c9ec9be05df 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -58,7 +58,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 9532a21fec7..850ac44e6e0 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -27,9 +27,11 @@ CCL_NAMESPACE_BEGIN
  * Lookup of attributes is different between OSL and SVM, as OSL is ustring
  * based while for SVM we use integer ids. */
 
-ccl_device_inline uint subd_triangle_patch(const KernelGlobals *kg, const ShaderData *sd);
+ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd);
 
-ccl_device_inline uint attribute_primitive_type(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline uint attribute_primitive_type(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd)
 {
   if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
     return ATTR_PRIM_SUBD;
@@ -48,13 +50,13 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
 
 /* Find attribute based on ID */
 
-ccl_device_inline uint object_attribute_map_offset(const KernelGlobals *kg, int object)
+ccl_device_inline uint object_attribute_map_offset(ccl_global const KernelGlobals *kg, int object)
 {
   return kernel_tex_fetch(__objects, object).attribute_map_offset;
 }
 
-ccl_device_inline AttributeDescriptor find_attribute(const KernelGlobals *kg,
-                                                     const ShaderData *sd,
+ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const ShaderData *sd,
                                                      uint id)
 {
   if (sd->object == OBJECT_NONE) {
@@ -100,8 +102,8 @@ ccl_device_inline AttributeDescriptor find_attribute(const KernelGlobals *kg,
 
 /* Transform matrix attribute on meshes */
 
-ccl_device Transform primitive_attribute_matrix(const KernelGlobals *kg,
-                                                const ShaderData *sd,
+ccl_device Transform primitive_attribute_matrix(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd,
                                                 const AttributeDescriptor desc)
 {
   Transform tfm;
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 811558edae9..07f218d781b 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -27,11 +27,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Reading attributes on various curve elements */
 
-ccl_device float curve_attribute_float(const KernelGlobals *kg,
-                                       const ShaderData *sd,
+ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg,
+                                       ccl_private const ShaderData *sd,
                                        const AttributeDescriptor desc,
-                                       float *dx,
-                                       float *dy)
+                                       ccl_private float *dx,
+                                       ccl_private float *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -69,11 +69,11 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 curve_attribute_float2(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float2 *dx,
-                                         float2 *dy)
+                                         ccl_private float2 *dx,
+                                         ccl_private float2 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -115,11 +115,11 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 curve_attribute_float3(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float3 *dx,
-                                         float3 *dy)
+                                         ccl_private float3 *dx,
+                                         ccl_private float3 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -157,11 +157,11 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 curve_attribute_float4(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float4 *dx,
-                                         float4 *dy)
+                                         ccl_private float4 *dx,
+                                         ccl_private float4 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -201,7 +201,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg,
 
 /* Curve thickness */
 
-ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float curve_thickness(ccl_global const KernelGlobals *kg,
+                                 ccl_private const ShaderData *sd)
 {
   float r = 0.0f;
 
@@ -229,7 +230,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
 /* Curve location for motion pass, linear interpolation between keys and
  * ignoring radius because we do the same for the motion keys */
 
-ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderData *sd)
 {
   KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
   int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
@@ -245,7 +247,8 @@ ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const Sh
 
 /* Curve tangent normal */
 
-ccl_device float3 curve_tangent_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 curve_tangent_normal(ccl_global const KernelGlobals *kg,
+                                       ccl_private const ShaderData *sd)
 {
   float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
 
@@ -265,12 +268,12 @@ ccl_device float3 curve_tangent_normal(const KernelGlobals *kg, const ShaderData
 
 /* Curve bounds utility function */
 
-ccl_device_inline void curvebounds(float *lower,
-                                   float *upper,
-                                   float *extremta,
-                                   float *extrema,
-                                   float *extremtb,
-                                   float *extremb,
+ccl_device_inline void curvebounds(ccl_private float *lower,
+                                   ccl_private float *upper,
+                                   ccl_private float *extremta,
+                                   ccl_private float *extrema,
+                                   ccl_private float *extremtb,
+                                   ccl_private float *extremb,
                                    float p0,
                                    float p1,
                                    float p2,
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 30addb9616d..04af8ea1421 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -86,11 +86,11 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
                                           const float3 cylinder_end,
                                           const float cylinder_radius,
                                           const float3 ray_dir,
-                                          float2 *t_o,
-                                          float *u0_o,
-                                          float3 *Ng0_o,
-                                          float *u1_o,
-                                          float3 *Ng1_o)
+                                          ccl_private float2 *t_o,
+                                          ccl_private float *u0_o,
+                                          ccl_private float3 *Ng0_o,
+                                          ccl_private float *u1_o,
+                                          ccl_private float3 *Ng1_o)
 {
   /* Calculate quadratic equation to solve. */
   const float rl = 1.0f / len(cylinder_end - cylinder_start);
@@ -169,13 +169,13 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
 }
 
 ccl_device bool curve_intersect_iterative(const float3 ray_dir,
-                                          float *ray_tfar,
+                                          ccl_private float *ray_tfar,
                                           const float dt,
                                           const float4 curve[4],
                                           float u,
                                           float t,
                                           const bool use_backfacing,
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
   const float length_ray_dir = len(ray_dir);
 
@@ -265,7 +265,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
                                           const float3 ray_dir,
                                           float ray_tfar,
                                           float4 curve[4],
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
   /* Move ray closer to make intersection stable. */
   const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
@@ -474,9 +474,9 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
                                              const float3 quad_v1,
                                              const float3 quad_v2,
                                              const float3 quad_v3,
-                                             float *u_o,
-                                             float *v_o,
-                                             float *t_o)
+                                             ccl_private float *u_o,
+                                             ccl_private float *v_o,
+                                             ccl_private float *t_o)
 {
   /* Calculate vertices relative to ray origin? */
   const float3 O = make_float3(0.0f, 0.0f, 0.0f);
@@ -550,7 +550,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
                                         float ray_tfar,
                                         const int N,
                                         float4 curve[4],
-                                        Intersection *isect)
+                                        ccl_private Intersection *isect)
 {
   /* Transform control points into ray space. */
   float3 ray_space[3];
@@ -625,8 +625,8 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
   return false;
 }
 
-ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
-                                            Intersection *isect,
+ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg,
+                                            ccl_private Intersection *isect,
                                             const float3 P,
                                             const float3 dir,
                                             const float tmax,
@@ -679,8 +679,8 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
   }
 }
 
-ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
-                                          ShaderData *sd,
+ccl_device_inline void curve_shader_setup(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
                                           float3 P,
                                           float3 D,
                                           float t,
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 5294da03145..8e32df439cd 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-ccl_device_inline int find_attribute_curve_motion(const KernelGlobals *kg,
+ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg,
                                                   int object,
                                                   uint id,
-                                                  AttributeElement *elem)
+                                                  ccl_private AttributeElement *elem)
 {
   /* todo: find a better (faster) solution for this, maybe store offset per object.
    *
@@ -52,7 +52,7 @@ ccl_device_inline int find_attribute_curve_motion(const KernelGlobals *kg,
   return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_curve_keys_for_step_linear(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
                                                          int offset,
                                                          int numkeys,
                                                          int numsteps,
@@ -79,8 +79,13 @@ ccl_device_inline void motion_curve_keys_for_step_linear(const KernelGlobals *kg
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys_linear(
-    const KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *kg,
+                                                int object,
+                                                int prim,
+                                                float time,
+                                                int k0,
+                                                int k1,
+                                                float4 keys[2])
 {
   /* get motion info */
   int numsteps, numkeys;
@@ -107,7 +112,7 @@ ccl_device_inline void motion_curve_keys_linear(
   keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
 }
 
-ccl_device_inline void motion_curve_keys_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals *kg,
                                                   int offset,
                                                   int numkeys,
                                                   int numsteps,
@@ -140,7 +145,7 @@ ccl_device_inline void motion_curve_keys_for_step(const KernelGlobals *kg,
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
                                          int object,
                                          int prim,
                                          float time,
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index b7f182090aa..161b358110d 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -31,10 +31,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Time interpolation of vertex positions and normals */
 
-ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
+ccl_device_inline int find_attribute_motion(ccl_global const KernelGlobals *kg,
                                             int object,
                                             uint id,
-                                            AttributeElement *elem)
+                                            ccl_private AttributeElement *elem)
 {
   /* todo: find a better (faster) solution for this, maybe store offset per object */
   uint attr_offset = object_attribute_map_offset(kg, object);
@@ -62,7 +62,7 @@ ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
   return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg,
                                                       uint4 tri_vindex,
                                                       int offset,
                                                       int numverts,
@@ -89,7 +89,7 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg,
   }
 }
 
-ccl_device_inline void motion_triangle_normals_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelGlobals *kg,
                                                         uint4 tri_vindex,
                                                         int offset,
                                                         int numverts,
@@ -117,7 +117,7 @@ ccl_device_inline void motion_triangle_normals_for_step(const KernelGlobals *kg,
 }
 
 ccl_device_inline void motion_triangle_vertices(
-    const KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
 {
   /* get motion info */
   int numsteps, numverts;
@@ -146,8 +146,13 @@ ccl_device_inline void motion_triangle_vertices(
   verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
 }
 
-ccl_device_inline float3 motion_triangle_smooth_normal(
-    const KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
+ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGlobals *kg,
+                                                       float3 Ng,
+                                                       int object,
+                                                       int prim,
+                                                       float u,
+                                                       float v,
+                                                       float time)
 {
   /* get motion info */
   int numsteps, numverts;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 6fb9756ff92..94d00875f0a 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -34,8 +34,8 @@ CCL_NAMESPACE_BEGIN
  * a closer distance.
  */
 
-ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg,
-                                                ShaderData *sd,
+ccl_device_inline float3 motion_triangle_refine(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
                                                 float3 P,
                                                 float3 D,
                                                 float t,
@@ -92,8 +92,8 @@ ccl_device_noinline
 ccl_device_inline
 #  endif
     float3
-    motion_triangle_refine_local(const KernelGlobals *kg,
-                                 ShaderData *sd,
+    motion_triangle_refine_local(ccl_global const KernelGlobals *kg,
+                                 ccl_private ShaderData *sd,
                                  float3 P,
                                  float3 D,
                                  float t,
@@ -145,8 +145,8 @@ ccl_device_inline
  * time and do a ray intersection with the resulting triangle.
  */
 
-ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg,
-                                                 Intersection *isect,
+ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals *kg,
+                                                 ccl_private Intersection *isect,
                                                  float3 P,
                                                  float3 dir,
                                                  float tmax,
@@ -202,8 +202,8 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg,
  * Returns whether traversal should be stopped.
  */
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
-                                                       LocalIntersection *local_isect,
+ccl_device_inline bool motion_triangle_intersect_local(ccl_global const KernelGlobals *kg,
+                                                       ccl_private LocalIntersection *local_isect,
                                                        float3 P,
                                                        float3 dir,
                                                        float time,
@@ -211,7 +211,7 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
                                                        int local_object,
                                                        int prim_addr,
                                                        float tmax,
-                                                       uint *lcg_state,
+                                                       ccl_private uint *lcg_state,
                                                        int max_hits)
 {
   /* Only intersect with matching object, for instanced objects we
@@ -285,7 +285,7 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
   }
 
   /* Record intersection. */
-  Intersection *isect = &local_isect->hits[hit];
+  ccl_private Intersection *isect = &local_isect->hits[hit];
   isect->t = t;
   isect->u = u;
   isect->v = v;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
index 85c4f0ca522..03bb1fba2a2 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -34,8 +34,8 @@ CCL_NAMESPACE_BEGIN
  * normals */
 
 /* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(const KernelGlobals *kg,
-                                                      ShaderData *sd,
+ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlobals *kg,
+                                                      ccl_private ShaderData *sd,
                                                       const float3 P,
                                                       const float3 D,
                                                       const float ray_t,
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 7d6ad7b4fe3..730c01d4709 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -37,7 +37,7 @@ enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST
 
 /* Object to world space transformation */
 
-ccl_device_inline Transform object_fetch_transform(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobals *kg,
                                                    int object,
                                                    enum ObjectTransform type)
 {
@@ -51,7 +51,9 @@ ccl_device_inline Transform object_fetch_transform(const KernelGlobals *kg,
 
 /* Lamp to world space transformation */
 
-ccl_device_inline Transform lamp_fetch_transform(const KernelGlobals *kg, int lamp, bool inverse)
+ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals *kg,
+                                                 int lamp,
+                                                 bool inverse)
 {
   if (inverse) {
     return kernel_tex_fetch(__lights, lamp).itfm;
@@ -63,7 +65,7 @@ ccl_device_inline Transform lamp_fetch_transform(const KernelGlobals *kg, int la
 
 /* Object to world space transformation for motion vectors */
 
-ccl_device_inline Transform object_fetch_motion_pass_transform(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const KernelGlobals *kg,
                                                                int object,
                                                                enum ObjectVectorTransform type)
 {
@@ -74,12 +76,12 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(const KernelGloba
 /* Motion blurred object transformations */
 
 #ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform_motion(ccl_global const KernelGlobals *kg,
                                                           int object,
                                                           float time)
 {
   const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
-  const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+  ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
   const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
 
   Transform tfm;
@@ -88,10 +90,10 @@ ccl_device_inline Transform object_fetch_transform_motion(const KernelGlobals *k
   return tfm;
 }
 
-ccl_device_inline Transform object_fetch_transform_motion_test(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const KernelGlobals *kg,
                                                                int object,
                                                                float time,
-                                                               Transform *itfm)
+                                                               ccl_private Transform *itfm)
 {
   int object_flag = kernel_tex_fetch(__object_flag, object);
   if (object_flag & SD_OBJECT_MOTION) {
@@ -115,7 +117,8 @@ ccl_device_inline Transform object_fetch_transform_motion_test(const KernelGloba
 
 /* Get transform matrix for shading point. */
 
-ccl_device_inline Transform object_get_transform(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
   return (sd->object_flag & SD_OBJECT_MOTION) ?
@@ -126,8 +129,8 @@ ccl_device_inline Transform object_get_transform(const KernelGlobals *kg, const
 #endif
 }
 
-ccl_device_inline Transform object_get_inverse_transform(const KernelGlobals *kg,
-                                                         const ShaderData *sd)
+ccl_device_inline Transform object_get_inverse_transform(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
   return (sd->object_flag & SD_OBJECT_MOTION) ?
@@ -139,9 +142,9 @@ ccl_device_inline Transform object_get_inverse_transform(const KernelGlobals *kg
 }
 /* Transform position from object to world space */
 
-ccl_device_inline void object_position_transform(const KernelGlobals *kg,
-                                                 const ShaderData *sd,
-                                                 float3 *P)
+ccl_device_inline void object_position_transform(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd,
+                                                 ccl_private float3 *P)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -156,9 +159,9 @@ ccl_device_inline void object_position_transform(const KernelGlobals *kg,
 
 /* Transform position from world to object space */
 
-ccl_device_inline void object_inverse_position_transform(const KernelGlobals *kg,
-                                                         const ShaderData *sd,
-                                                         float3 *P)
+ccl_device_inline void object_inverse_position_transform(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd,
+                                                         ccl_private float3 *P)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -173,9 +176,9 @@ ccl_device_inline void object_inverse_position_transform(const KernelGlobals *kg
 
 /* Transform normal from world to object space */
 
-ccl_device_inline void object_inverse_normal_transform(const KernelGlobals *kg,
-                                                       const ShaderData *sd,
-                                                       float3 *N)
+ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGlobals *kg,
+                                                       ccl_private const ShaderData *sd,
+                                                       ccl_private float3 *N)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -198,9 +201,9 @@ ccl_device_inline void object_inverse_normal_transform(const KernelGlobals *kg,
 
 /* Transform normal from object to world space */
 
-ccl_device_inline void object_normal_transform(const KernelGlobals *kg,
-                                               const ShaderData *sd,
-                                               float3 *N)
+ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderData *sd,
+                                               ccl_private float3 *N)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -215,9 +218,9 @@ ccl_device_inline void object_normal_transform(const KernelGlobals *kg,
 
 /* Transform direction vector from object to world space */
 
-ccl_device_inline void object_dir_transform(const KernelGlobals *kg,
-                                            const ShaderData *sd,
-                                            float3 *D)
+ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
+                                            ccl_private float3 *D)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -232,9 +235,9 @@ ccl_device_inline void object_dir_transform(const KernelGlobals *kg,
 
 /* Transform direction vector from world to object space */
 
-ccl_device_inline void object_inverse_dir_transform(const KernelGlobals *kg,
-                                                    const ShaderData *sd,
-                                                    float3 *D)
+ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGlobals *kg,
+                                                    ccl_private const ShaderData *sd,
+                                                    ccl_private float3 *D)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -249,7 +252,8 @@ ccl_device_inline void object_inverse_dir_transform(const KernelGlobals *kg,
 
 /* Object center position */
 
-ccl_device_inline float3 object_location(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd)
 {
   if (sd->object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -266,18 +270,18 @@ ccl_device_inline float3 object_location(const KernelGlobals *kg, const ShaderDa
 
 /* Color of the object */
 
-ccl_device_inline float3 object_color(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
 }
 
 /* Pass ID number of object */
 
-ccl_device_inline float object_pass_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -287,7 +291,7 @@ ccl_device_inline float object_pass_id(const KernelGlobals *kg, int object)
 
 /* Per lamp random number for shader variation */
 
-ccl_device_inline float lamp_random_number(const KernelGlobals *kg, int lamp)
+ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, int lamp)
 {
   if (lamp == LAMP_NONE)
     return 0.0f;
@@ -297,7 +301,7 @@ ccl_device_inline float lamp_random_number(const KernelGlobals *kg, int lamp)
 
 /* Per object random number for shader variation */
 
-ccl_device_inline float object_random_number(const KernelGlobals *kg, int object)
+ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -307,7 +311,7 @@ ccl_device_inline float object_random_number(const KernelGlobals *kg, int object
 
 /* Particle ID from which this object was generated */
 
-ccl_device_inline int object_particle_id(const KernelGlobals *kg, int object)
+ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -317,31 +321,34 @@ ccl_device_inline int object_particle_id(const KernelGlobals *kg, int object)
 
 /* Generated texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_generated(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(
       kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
 }
 
 /* UV texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_uv(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
 }
 
 /* Information about mesh for motion blurred triangles and curves */
 
-ccl_device_inline void object_motion_info(
-    const KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
+ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg,
+                                          int object,
+                                          ccl_private int *numsteps,
+                                          ccl_private int *numverts,
+                                          ccl_private int *numkeys)
 {
   if (numkeys) {
     *numkeys = kernel_tex_fetch(__objects, object).numkeys;
@@ -355,7 +362,7 @@ ccl_device_inline void object_motion_info(
 
 /* Offset to an objects patch map */
 
-ccl_device_inline uint object_patch_map_offset(const KernelGlobals *kg, int object)
+ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -365,7 +372,7 @@ ccl_device_inline uint object_patch_map_offset(const KernelGlobals *kg, int obje
 
 /* Volume step size */
 
-ccl_device_inline float object_volume_density(const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE) {
     return 1.0f;
@@ -374,7 +381,7 @@ ccl_device_inline float object_volume_density(const KernelGlobals *kg, int objec
   return kernel_tex_fetch(__objects, object).volume_density;
 }
 
-ccl_device_inline float object_volume_step_size(const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE) {
     return kernel_data.background.volume_step_size;
@@ -385,14 +392,14 @@ ccl_device_inline float object_volume_step_size(const KernelGlobals *kg, int obj
 
 /* Pass ID for shader */
 
-ccl_device int shader_pass_id(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device int shader_pass_id(ccl_global const KernelGlobals *kg, ccl_private const ShaderData *sd)
 {
   return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
 }
 
 /* Cryptomatte ID */
 
-ccl_device_inline float object_cryptomatte_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -400,7 +407,7 @@ ccl_device_inline float object_cryptomatte_id(const KernelGlobals *kg, int objec
   return kernel_tex_fetch(__objects, object).cryptomatte_object;
 }
 
-ccl_device_inline float object_cryptomatte_asset_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -410,42 +417,42 @@ ccl_device_inline float object_cryptomatte_asset_id(const KernelGlobals *kg, int
 
 /* Particle data from which object was instanced */
 
-ccl_device_inline uint particle_index(const KernelGlobals *kg, int particle)
+ccl_device_inline uint particle_index(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).index;
 }
 
-ccl_device float particle_age(const KernelGlobals *kg, int particle)
+ccl_device float particle_age(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).age;
 }
 
-ccl_device float particle_lifetime(const KernelGlobals *kg, int particle)
+ccl_device float particle_lifetime(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).lifetime;
 }
 
-ccl_device float particle_size(const KernelGlobals *kg, int particle)
+ccl_device float particle_size(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).size;
 }
 
-ccl_device float4 particle_rotation(const KernelGlobals *kg, int particle)
+ccl_device float4 particle_rotation(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).rotation;
 }
 
-ccl_device float3 particle_location(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_location(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
 }
 
-ccl_device float3 particle_velocity(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_velocity(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
 }
 
-ccl_device float3 particle_angular_velocity(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_angular_velocity(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
 }
@@ -467,8 +474,12 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
 
 /* Transform ray into object space to enter static object in BVH */
 
-ccl_device_inline float bvh_instance_push(
-    const KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir)
+ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg,
+                                          int object,
+                                          ccl_private const Ray *ray,
+                                          ccl_private float3 *P,
+                                          ccl_private float3 *dir,
+                                          ccl_private float3 *idir)
 {
   Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
@@ -483,12 +494,12 @@ ccl_device_inline float bvh_instance_push(
 
 /* Transform ray to exit static object in BVH. */
 
-ccl_device_inline float bvh_instance_pop(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg,
                                          int object,
-                                         const Ray *ray,
-                                         float3 *P,
-                                         float3 *dir,
-                                         float3 *idir,
+                                         ccl_private const Ray *ray,
+                                         ccl_private float3 *P,
+                                         ccl_private float3 *dir,
+                                         ccl_private float3 *idir,
                                          float t)
 {
   if (t != FLT_MAX) {
@@ -505,13 +516,13 @@ ccl_device_inline float bvh_instance_pop(const KernelGlobals *kg,
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_pop_factor(const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *kg,
                                                int object,
-                                               const Ray *ray,
-                                               float3 *P,
-                                               float3 *dir,
-                                               float3 *idir,
-                                               float *t_fac)
+                                               ccl_private const Ray *ray,
+                                               ccl_private float3 *P,
+                                               ccl_private float3 *dir,
+                                               ccl_private float3 *idir,
+                                               ccl_private float *t_fac)
 {
   Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
   *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
@@ -524,13 +535,13 @@ ccl_device_inline void bvh_instance_pop_factor(const KernelGlobals *kg,
 #ifdef __OBJECT_MOTION__
 /* Transform ray into object space to enter motion blurred object in BVH */
 
-ccl_device_inline float bvh_instance_motion_push(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals *kg,
                                                  int object,
-                                                 const Ray *ray,
-                                                 float3 *P,
-                                                 float3 *dir,
-                                                 float3 *idir,
-                                                 Transform *itfm)
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private float3 *P,
+                                                 ccl_private float3 *dir,
+                                                 ccl_private float3 *idir,
+                                                 ccl_private Transform *itfm)
 {
   object_fetch_transform_motion_test(kg, object, ray->time, itfm);
 
@@ -545,14 +556,14 @@ ccl_device_inline float bvh_instance_motion_push(const KernelGlobals *kg,
 
 /* Transform ray to exit motion blurred object in BVH. */
 
-ccl_device_inline float bvh_instance_motion_pop(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals *kg,
                                                 int object,
-                                                const Ray *ray,
-                                                float3 *P,
-                                                float3 *dir,
-                                                float3 *idir,
+                                                ccl_private const Ray *ray,
+                                                ccl_private float3 *P,
+                                                ccl_private float3 *dir,
+                                                ccl_private float3 *idir,
                                                 float t,
-                                                Transform *itfm)
+                                                ccl_private Transform *itfm)
 {
   if (t != FLT_MAX) {
     t /= len(transform_direction(itfm, ray->D));
@@ -567,14 +578,14 @@ ccl_device_inline float bvh_instance_motion_pop(const KernelGlobals *kg,
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_motion_pop_factor(const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_motion_pop_factor(ccl_global const KernelGlobals *kg,
                                                       int object,
-                                                      const Ray *ray,
-                                                      float3 *P,
-                                                      float3 *dir,
-                                                      float3 *idir,
-                                                      float *t_fac,
-                                                      Transform *itfm)
+                                                      ccl_private const Ray *ray,
+                                                      ccl_private float3 *P,
+                                                      ccl_private float3 *dir,
+                                                      ccl_private float3 *idir,
+                                                      ccl_private float *t_fac,
+                                                      ccl_private Transform *itfm)
 {
   *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
   *P = ray->P;
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index ce0fc15f196..b54eafd6220 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -32,7 +32,9 @@ typedef struct PatchHandle {
   int array_index, patch_index, vert_index;
 } PatchHandle;
 
-ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v)
+ccl_device_inline int patch_map_resolve_quadrant(float median,
+                                                 ccl_private float *u,
+                                                 ccl_private float *v)
 {
   int quadrant = -1;
 
@@ -62,7 +64,7 @@ ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *
 /* retrieve PatchHandle from patch coords */
 
 ccl_device_inline PatchHandle
-patch_map_find_patch(const KernelGlobals *kg, int object, int patch, float u, float v)
+patch_map_find_patch(ccl_global const KernelGlobals *kg, int object, int patch, float u, float v)
 {
   PatchHandle handle;
 
@@ -108,7 +110,9 @@ patch_map_find_patch(const KernelGlobals *kg, int object, int patch, float u, fl
   return handle;
 }
 
-ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv)
+ccl_device_inline void patch_eval_bspline_weights(float t,
+                                                  ccl_private float *point,
+                                                  ccl_private float *deriv)
 {
   /* The four uniform cubic B-Spline basis functions evaluated at t */
   float inv_6 = 1.0f / 6.0f;
@@ -128,7 +132,9 @@ ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *
   deriv[3] = 0.5f * t2;
 }
 
-ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t)
+ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits,
+                                                          ccl_private float *s,
+                                                          ccl_private float *t)
 {
   int boundary = ((bits >> 8) & 0xf);
 
@@ -175,7 +181,9 @@ ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
   }
 }
 
-ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v)
+ccl_device_inline void patch_eval_normalize_coords(uint patch_bits,
+                                                   ccl_private float *u,
+                                                   ccl_private float *v)
 {
   float frac = patch_eval_param_fraction(patch_bits);
 
@@ -193,8 +201,8 @@ ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, fl
 
 /* retrieve patch control indices */
 
-ccl_device_inline int patch_eval_indices(const KernelGlobals *kg,
-                                         const PatchHandle *handle,
+ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg,
+                                         ccl_private const PatchHandle *handle,
                                          int channel,
                                          int indices[PATCH_MAX_CONTROL_VERTS])
 {
@@ -210,8 +218,8 @@ ccl_device_inline int patch_eval_indices(const KernelGlobals *kg,
 
 /* evaluate patch basis functions */
 
-ccl_device_inline void patch_eval_basis(const KernelGlobals *kg,
-                                        const PatchHandle *handle,
+ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg,
+                                        ccl_private const PatchHandle *handle,
                                         float u,
                                         float v,
                                         float weights[PATCH_MAX_CONTROL_VERTS],
@@ -249,7 +257,7 @@ ccl_device_inline void patch_eval_basis(const KernelGlobals *kg,
 
 /* generic function for evaluating indices and weights from patch coords */
 
-ccl_device_inline int patch_eval_control_verts(const KernelGlobals *kg,
+ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *kg,
                                                int object,
                                                int patch,
                                                float u,
@@ -271,15 +279,15 @@ ccl_device_inline int patch_eval_control_verts(const KernelGlobals *kg,
 
 /* functions for evaluating attributes on patches */
 
-ccl_device float patch_eval_float(const KernelGlobals *kg,
-                                  const ShaderData *sd,
+ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg,
+                                  ccl_private const ShaderData *sd,
                                   int offset,
                                   int patch,
                                   float u,
                                   float v,
                                   int channel,
-                                  float *du,
-                                  float *dv)
+                                  ccl_private float *du,
+                                  ccl_private float *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -308,15 +316,15 @@ ccl_device float patch_eval_float(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float2 patch_eval_float2(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float2 *du,
-                                    float2 *dv)
+                                    ccl_private float2 *du,
+                                    ccl_private float2 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -345,15 +353,15 @@ ccl_device float2 patch_eval_float2(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float3 patch_eval_float3(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float3 *du,
-                                    float3 *dv)
+                                    ccl_private float3 *du,
+                                    ccl_private float3 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -382,15 +390,15 @@ ccl_device float3 patch_eval_float3(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_float4(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float4 *du,
-                                    float4 *dv)
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -419,15 +427,15 @@ ccl_device float4 patch_eval_float4(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_uchar4(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float4 patch_eval_uchar4(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float4 *du,
-                                    float4 *dv)
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index ba31b12e817..869b911f76f 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -31,11 +31,11 @@ CCL_NAMESPACE_BEGIN
  * attributes for performance, mainly for GPU performance to avoid bringing in
  * heavy volume interpolation code. */
 
-ccl_device_inline float primitive_surface_attribute_float(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_inline float primitive_surface_attribute_float(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float *dx,
-                                                          float *dy)
+                                                          ccl_private float *dx,
+                                                          ccl_private float *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -57,11 +57,11 @@ ccl_device_inline float primitive_surface_attribute_float(const KernelGlobals *k
   }
 }
 
-ccl_device_inline float2 primitive_surface_attribute_float2(const KernelGlobals *kg,
-                                                            const ShaderData *sd,
+ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const KernelGlobals *kg,
+                                                            ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float2 *dx,
-                                                            float2 *dy)
+                                                            ccl_private float2 *dx,
+                                                            ccl_private float2 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -83,11 +83,11 @@ ccl_device_inline float2 primitive_surface_attribute_float2(const KernelGlobals
   }
 }
 
-ccl_device_inline float3 primitive_surface_attribute_float3(const KernelGlobals *kg,
-                                                            const ShaderData *sd,
+ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                            ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float3 *dx,
-                                                            float3 *dy)
+                                                            ccl_private float3 *dx,
+                                                            ccl_private float3 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -109,11 +109,12 @@ ccl_device_inline float3 primitive_surface_attribute_float3(const KernelGlobals
   }
 }
 
-ccl_device_forceinline float4 primitive_surface_attribute_float4(const KernelGlobals *kg,
-                                                                 const ShaderData *sd,
-                                                                 const AttributeDescriptor desc,
-                                                                 float4 *dx,
-                                                                 float4 *dy)
+ccl_device_forceinline float4
+primitive_surface_attribute_float4(ccl_global const KernelGlobals *kg,
+                                   ccl_private const ShaderData *sd,
+                                   const AttributeDescriptor desc,
+                                   ccl_private float4 *dx,
+                                   ccl_private float4 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -142,14 +143,14 @@ ccl_device_forceinline float4 primitive_surface_attribute_float4(const KernelGlo
  * attributes for performance, mainly for GPU performance to avoid bringing in
  * heavy volume interpolation code. */
 
-ccl_device_inline bool primitive_is_volume_attribute(const ShaderData *sd,
+ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd,
                                                      const AttributeDescriptor desc)
 {
   return sd->type == PRIMITIVE_VOLUME;
 }
 
-ccl_device_inline float primitive_volume_attribute_float(const KernelGlobals *kg,
-                                                         const ShaderData *sd,
+ccl_device_inline float primitive_volume_attribute_float(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd,
                                                          const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -160,8 +161,8 @@ ccl_device_inline float primitive_volume_attribute_float(const KernelGlobals *kg
   }
 }
 
-ccl_device_inline float3 primitive_volume_attribute_float3(const KernelGlobals *kg,
-                                                           const ShaderData *sd,
+ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                           ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -172,8 +173,8 @@ ccl_device_inline float3 primitive_volume_attribute_float3(const KernelGlobals *
   }
 }
 
-ccl_device_inline float4 primitive_volume_attribute_float4(const KernelGlobals *kg,
-                                                           const ShaderData *sd,
+ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const KernelGlobals *kg,
+                                                           ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -187,7 +188,8 @@ ccl_device_inline float4 primitive_volume_attribute_float4(const KernelGlobals *
 
 /* Default UV coordinate */
 
-ccl_device_inline float3 primitive_uv(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg,
+                                      ccl_private const ShaderData *sd)
 {
   const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
 
@@ -200,7 +202,10 @@ ccl_device_inline float3 primitive_uv(const KernelGlobals *kg, const ShaderData
 
 /* Ptex coordinates */
 
-ccl_device bool primitive_ptex(const KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
+ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg,
+                               ccl_private ShaderData *sd,
+                               ccl_private float2 *uv,
+                               ccl_private int *face_id)
 {
   /* storing ptex data as attributes is not memory efficient but simple for tests */
   const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
@@ -220,7 +225,7 @@ ccl_device bool primitive_ptex(const KernelGlobals *kg, ShaderData *sd, float2 *
 
 /* Surface tangent */
 
-ccl_device float3 primitive_tangent(const KernelGlobals *kg, ShaderData *sd)
+ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_private ShaderData *sd)
 {
 #ifdef __HAIR__
   if (sd->type & PRIMITIVE_ALL_CURVE)
@@ -252,7 +257,8 @@ ccl_device float3 primitive_tangent(const KernelGlobals *kg, ShaderData *sd)
 
 /* Motion vector for motion pass */
 
-ccl_device_inline float4 primitive_motion_vector(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float4 primitive_motion_vector(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd)
 {
   /* center position */
   float3 center;
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
index f78d194359d..2cf60e263c3 100644
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -25,8 +25,8 @@ CCL_NAMESPACE_BEGIN
 /* ShaderData setup from incoming ray */
 
 #ifdef __OBJECT_MOTION__
-ccl_device void shader_setup_object_transforms(const KernelGlobals *ccl_restrict kg,
-                                               ShaderData *ccl_restrict sd,
+ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *ccl_restrict kg,
+                                               ccl_private ShaderData *ccl_restrict sd,
                                                float time)
 {
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -38,10 +38,10 @@ ccl_device void shader_setup_object_transforms(const KernelGlobals *ccl_restrict
 
 /* TODO: break this up if it helps reduce register pressure to load data from
  * global memory as we write it to shader-data. */
-ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict kg,
-                                             ShaderData *ccl_restrict sd,
-                                             const Ray *ccl_restrict ray,
-                                             const Intersection *ccl_restrict isect)
+ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl_restrict kg,
+                                             ccl_private ShaderData *ccl_restrict sd,
+                                             ccl_private const Ray *ccl_restrict ray,
+                                             ccl_private const Intersection *ccl_restrict isect)
 {
   /* Read intersection data into shader globals.
    *
@@ -135,8 +135,8 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
 
 /* ShaderData setup from position sampled on mesh */
 
-ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restrict kg,
-                                                ShaderData *ccl_restrict sd,
+ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
                                                 const float3 P,
                                                 const float3 Ng,
                                                 const float3 I,
@@ -247,8 +247,8 @@ ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restric
 
 /* ShaderData setup for displacement */
 
-ccl_device void shader_setup_from_displace(const KernelGlobals *ccl_restrict kg,
-                                           ShaderData *ccl_restrict sd,
+ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_restrict kg,
+                                           ccl_private ShaderData *ccl_restrict sd,
                                            int object,
                                            int prim,
                                            float u,
@@ -281,8 +281,9 @@ ccl_device void shader_setup_from_displace(const KernelGlobals *ccl_restrict kg,
 
 /* ShaderData setup from ray into background */
 
-ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_restrict kg,
-                                                    ShaderData *ccl_restrict sd,
+ccl_device_inline void shader_setup_from_background(ccl_global const KernelGlobals *ccl_restrict
+                                                        kg,
+                                                    ccl_private ShaderData *ccl_restrict sd,
                                                     const float3 ray_P,
                                                     const float3 ray_D,
                                                     const float ray_time)
@@ -325,9 +326,9 @@ ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_res
 /* ShaderData setup from point inside volume */
 
 #ifdef __VOLUME__
-ccl_device_inline void shader_setup_from_volume(const KernelGlobals *ccl_restrict kg,
-                                                ShaderData *ccl_restrict sd,
-                                                const Ray *ccl_restrict ray)
+ccl_device_inline void shader_setup_from_volume(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
+                                                ccl_private const Ray *ccl_restrict ray)
 {
 
   /* vectors */
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 877b2ece15b..927d630fe91 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,15 +22,16 @@ CCL_NAMESPACE_BEGIN
 
 /* Patch index for triangle, -1 if not subdivision triangle */
 
-ccl_device_inline uint subd_triangle_patch(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
 }
 
 /* UV coords of triangle within patch */
 
-ccl_device_inline void subd_triangle_patch_uv(const KernelGlobals *kg,
-                                              const ShaderData *sd,
+ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg,
+                                              ccl_private const ShaderData *sd,
                                               float2 uv[3])
 {
   uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
@@ -42,7 +43,7 @@ ccl_device_inline void subd_triangle_patch_uv(const KernelGlobals *kg,
 
 /* Vertex indices of patch */
 
-ccl_device_inline uint4 subd_triangle_patch_indices(const KernelGlobals *kg, int patch)
+ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGlobals *kg, int patch)
 {
   uint4 indices;
 
@@ -56,21 +57,22 @@ ccl_device_inline uint4 subd_triangle_patch_indices(const KernelGlobals *kg, int
 
 /* Originating face for patch */
 
-ccl_device_inline uint subd_triangle_patch_face(const KernelGlobals *kg, int patch)
+ccl_device_inline uint subd_triangle_patch_face(ccl_global const KernelGlobals *kg, int patch)
 {
   return kernel_tex_fetch(__patches, patch + 4);
 }
 
 /* Number of corners on originating face */
 
-ccl_device_inline uint subd_triangle_patch_num_corners(const KernelGlobals *kg, int patch)
+ccl_device_inline uint subd_triangle_patch_num_corners(ccl_global const KernelGlobals *kg,
+                                                       int patch)
 {
   return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
 }
 
 /* Indices of the four corners that are used by the patch */
 
-ccl_device_inline void subd_triangle_patch_corners(const KernelGlobals *kg,
+ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobals *kg,
                                                    int patch,
                                                    int corners[4])
 {
@@ -103,11 +105,11 @@ ccl_device_inline void subd_triangle_patch_corners(const KernelGlobals *kg,
 
 /* Reading attributes on various subdivision triangle elements */
 
-ccl_device_noinline float subd_triangle_attribute_float(const KernelGlobals *kg,
-                                                        const ShaderData *sd,
+ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelGlobals *kg,
+                                                        ccl_private const ShaderData *sd,
                                                         const AttributeDescriptor desc,
-                                                        float *dx,
-                                                        float *dy)
+                                                        ccl_private float *dx,
+                                                        ccl_private float *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -242,11 +244,11 @@ ccl_device_noinline float subd_triangle_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline float2 subd_triangle_attribute_float2(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float2 *dx,
-                                                          float2 *dy)
+                                                          ccl_private float2 *dx,
+                                                          ccl_private float2 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -385,11 +387,11 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(const KernelGlobals *k
   }
 }
 
-ccl_device_noinline float3 subd_triangle_attribute_float3(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float3 *dx,
-                                                          float3 *dy)
+                                                          ccl_private float3 *dx,
+                                                          ccl_private float3 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -527,11 +529,11 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(const KernelGlobals *k
   }
 }
 
-ccl_device_noinline float4 subd_triangle_attribute_float4(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float4 subd_triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float4 *dx,
-                                                          float4 *dy)
+                                                          ccl_private float4 *dx,
+                                                          ccl_private float4 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 8edba46fd39..17f87b7c570 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -25,7 +25,8 @@
 CCL_NAMESPACE_BEGIN
 
 /* Normal on triangle. */
-ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd)
+ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
@@ -43,14 +44,14 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd
 }
 
 /* Point and normal on triangle. */
-ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
+ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg,
                                              int object,
                                              int prim,
                                              float u,
                                              float v,
-                                             float3 *P,
-                                             float3 *Ng,
-                                             int *shader)
+                                             ccl_private float3 *P,
+                                             ccl_private float3 *Ng,
+                                             ccl_private int *shader)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -75,7 +76,7 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
 
 /* Triangle vertex locations */
 
-ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3])
+ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int prim, float3 P[3])
 {
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
   P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
@@ -85,7 +86,7 @@ ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, floa
 
 /* Triangle vertex locations and vertex normals */
 
-ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg,
+ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlobals *kg,
                                                      int prim,
                                                      float3 P[3],
                                                      float3 N[3])
@@ -102,7 +103,7 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg,
 /* Interpolate smooth vertex normal from vertices */
 
 ccl_device_inline float3
-triangle_smooth_normal(const KernelGlobals *kg, float3 Ng, int prim, float u, float v)
+triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim, float u, float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -115,8 +116,12 @@ triangle_smooth_normal(const KernelGlobals *kg, float3 Ng, int prim, float u, fl
   return is_zero(N) ? Ng : N;
 }
 
-ccl_device_inline float3 triangle_smooth_normal_unnormalized(
-    const KernelGlobals *kg, const ShaderData *sd, float3 Ng, int prim, float u, float v)
+ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const KernelGlobals *kg,
+                                                             ccl_private const ShaderData *sd,
+                                                             float3 Ng,
+                                                             int prim,
+                                                             float u,
+                                                             float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -138,10 +143,10 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(
 
 /* Ray differentials on triangle */
 
-ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg,
+ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg,
                                        int prim,
-                                       ccl_addr_space float3 *dPdu,
-                                       ccl_addr_space float3 *dPdv)
+                                       ccl_private float3 *dPdu,
+                                       ccl_private float3 *dPdv)
 {
   /* fetch triangle vertex coordinates */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -156,11 +161,11 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg,
 
 /* Reading attributes on various triangle elements */
 
-ccl_device float triangle_attribute_float(const KernelGlobals *kg,
-                                          const ShaderData *sd,
+ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc,
-                                          float *dx,
-                                          float *dy)
+                                          ccl_private float *dx,
+                                          ccl_private float *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float f0, f1, f2;
@@ -206,11 +211,11 @@ ccl_device float triangle_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 triangle_attribute_float2(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float2 *dx,
-                                            float2 *dy)
+                                            ccl_private float2 *dx,
+                                            ccl_private float2 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float2 f0, f1, f2;
@@ -256,11 +261,11 @@ ccl_device float2 triangle_attribute_float2(const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 triangle_attribute_float3(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float3 *dx,
-                                            float3 *dy)
+                                            ccl_private float3 *dx,
+                                            ccl_private float3 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float3 f0, f1, f2;
@@ -306,11 +311,11 @@ ccl_device float3 triangle_attribute_float3(const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 triangle_attribute_float4(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float4 triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float4 *dx,
-                                            float4 *dy)
+                                            ccl_private float4 *dx,
+                                            ccl_private float4 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER |
                       ATTR_ELEMENT_CORNER_BYTE)) {
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index b784cc75d08..f637206da19 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -26,8 +26,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
-                                          Intersection *isect,
+ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg,
+                                          ccl_private Intersection *isect,
                                           float3 P,
                                           float3 dir,
                                           float tmax,
@@ -85,15 +85,15 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
  */
 
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
-                                                LocalIntersection *local_isect,
+ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals *kg,
+                                                ccl_private LocalIntersection *local_isect,
                                                 float3 P,
                                                 float3 dir,
                                                 int object,
                                                 int local_object,
                                                 int prim_addr,
                                                 float tmax,
-                                                uint *lcg_state,
+                                                ccl_private uint *lcg_state,
                                                 int max_hits)
 {
   /* Only intersect with matching object, for instanced objects we
@@ -169,7 +169,7 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
   }
 
   /* Record intersection. */
-  Intersection *isect = &local_isect->hits[hit];
+  ccl_private Intersection *isect = &local_isect->hits[hit];
   isect->prim = prim;
   isect->object = local_object;
   isect->type = PRIMITIVE_TRIANGLE;
@@ -200,8 +200,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
  * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
  */
 
-ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
-                                         ShaderData *sd,
+ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
                                          float3 P,
                                          float3 D,
                                          float t,
@@ -256,8 +256,8 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
 /* Same as above, except that t is assumed to be in object space for
  * instancing.
  */
-ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg,
-                                               ShaderData *sd,
+ccl_device_inline float3 triangle_refine_local(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
                                                float3 P,
                                                float3 D,
                                                float t,
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 2bcd7e56b5f..c466c3fb07a 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -31,8 +31,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Return position normalized to 0..1 in mesh bounds */
 
-ccl_device_inline float3 volume_normalized_position(const KernelGlobals *kg,
-                                                    const ShaderData *sd,
+ccl_device_inline float3 volume_normalized_position(ccl_global const KernelGlobals *kg,
+                                                    ccl_private const ShaderData *sd,
                                                     float3 P)
 {
   /* todo: optimize this so it's just a single matrix multiplication when
@@ -70,8 +70,8 @@ ccl_device float3 volume_attribute_value_to_float3(const float4 value)
   }
 }
 
-ccl_device float4 volume_attribute_float4(const KernelGlobals *kg,
-                                          const ShaderData *sd,
+ccl_device float4 volume_attribute_float4(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc)
 {
   if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index 6e4e1be55fa..c822823de9c 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -44,7 +44,7 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
-                                          const ccl_global KernelWorkTile *ccl_restrict tile,
+                                          ccl_global const KernelWorkTile *ccl_restrict tile,
                                           ccl_global float *render_buffer,
                                           const int x,
                                           const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
index 58e7bde4c94..291f0f106f0 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -25,12 +25,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
                                                const int sample,
                                                const int x,
                                                const int y,
                                                const uint rng_hash,
-                                               Ray *ray)
+                                               ccl_private Ray *ray)
 {
   /* Filter sampling. */
   float filter_u, filter_v;
@@ -64,7 +64,7 @@ ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
-                                            const ccl_global KernelWorkTile *ccl_restrict tile,
+                                            ccl_global const KernelWorkTile *ccl_restrict tile,
                                             ccl_global float *render_buffer,
                                             const int x,
                                             const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index cd9af1c62fc..760c08159e3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -86,7 +86,7 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_shader_next_kernel(
     INTEGRATOR_STATE_ARGS,
-    const Intersection *ccl_restrict isect,
+    ccl_private const Intersection *ccl_restrict isect,
     const int shader,
     const int shader_flags)
 {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index 5bd9cfda4a4..00d44f0e5ed 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -32,7 +32,7 @@ ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STA
 }
 
 ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
-                                                  const Ray *ray,
+                                                  ccl_private const Ray *ray,
                                                   const uint visibility)
 {
   /* Mask which will pick only opaque visibility bits from the `visibility`.
@@ -62,7 +62,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STAT
 
 #ifdef __TRANSPARENT_SHADOWS__
 ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ray,
+                                                       ccl_private const Ray *ray,
                                                        const uint visibility)
 {
   Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE];
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
index 33a77d0fe29..192e9c6ab43 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -30,7 +30,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   kernel_assert(kernel_data.integrator.use_volumes);
 
@@ -78,7 +78,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   Ray volume_ray ccl_optional_struct_init;
   integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
index 234aa7cae63..a898f3fb2fc 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -49,7 +49,7 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
     /* TODO: does aliasing like this break automatic SoA in CUDA?
      * Should we instead store closures separate from ShaderData? */
     ShaderDataTinyStorage emission_sd_storage;
-    ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+    ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
 
     PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
     shader_setup_from_background(kg,
@@ -155,7 +155,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       /* Evaluate light shader. */
       /* TODO: does aliasing like this break automatic SoA in CUDA? */
       ShaderDataTinyStorage emission_sd_storage;
-      ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+      ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
       float3 light_eval = light_sample_shader_eval(
           INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
       if (is_zero(light_eval)) {
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
index 05b530f9665..d8f8da63023 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -72,7 +72,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   /* Evaluate light shader. */
   /* TODO: does aliasing like this break automatic SoA in CUDA? */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
   if (is_zero(light_eval)) {
     return;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index fd3c3ae1653..3857b522b25 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -39,7 +39,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
    * TODO: is it better to declare this outside the loop or keep it local
    * so the compiler can see there is no dependency between iterations? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data at surface. */
   Intersection isect ccl_optional_struct_init;
@@ -69,13 +69,14 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
                                                            const int hit,
                                                            const int num_recorded_hits,
-                                                           float3 *ccl_restrict throughput)
+                                                           ccl_private float3 *ccl_restrict
+                                                               throughput)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
 
   /* TODO: deduplicate with surface, or does it not matter for memory usage? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 27338f824c0..0d739517592 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -29,7 +29,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
-                                                           ShaderData *sd)
+                                                           ccl_private ShaderData *sd)
 {
   Intersection isect ccl_optional_struct_init;
   integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
@@ -42,7 +42,7 @@ ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONS
 
 #ifdef __HOLDOUT__
 ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
-                                                      ShaderData *sd,
+                                                      ccl_private ShaderData *sd,
                                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Write holdout transparency to render buffer and stop if fully holdout. */
@@ -67,7 +67,7 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG
 
 #ifdef __EMISSION__
 ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
-                                                       const ShaderData *sd,
+                                                       ccl_private const ShaderData *sd,
                                                        ccl_global float *ccl_restrict
                                                            render_buffer)
 {
@@ -103,8 +103,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state)
+                                                           ccl_private ShaderData *sd,
+                                                           ccl_private const RNGState *rng_state)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
@@ -134,7 +134,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
@@ -206,9 +206,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
 #endif
 
 /* Path tracing: bounce off or through surface with new direction. */
-ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE_ARGS,
-                                                                ShaderData *sd,
-                                                                const RNGState *rng_state)
+ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
+    INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state)
 {
   /* Sample BSDF or BSSRDF. */
   if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
@@ -217,7 +216,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
   float bsdf_u, bsdf_v;
   path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-  const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
 
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -281,7 +280,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
 #ifdef __VOLUME__
 ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
-                                                                 ShaderData *sd)
+                                                                 ccl_private ShaderData *sd)
 {
   if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
     return LABEL_NONE;
@@ -304,19 +303,21 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT
 #endif
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
-ccl_device_forceinline void integrate_surface_ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+ccl_device_forceinline void integrate_surface_ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  ifdef __KERNEL_OPTIX__
   optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
 }
 
-extern "C" __device__ void __direct_callable__ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+extern "C" __device__ void __direct_callable__ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  endif /* __KERNEL_OPTIX__ */
   float bsdf_u, bsdf_v;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index aa4c652c037..72c609751f7 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -71,8 +71,8 @@ typedef struct VolumeShaderCoefficients {
 
 /* Evaluate shader to get extinction coefficient at P. */
 ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                                   ShaderData *ccl_restrict sd,
-                                                   float3 *ccl_restrict extinction)
+                                                   ccl_private ShaderData *ccl_restrict sd,
+                                                   ccl_private float3 *ccl_restrict extinction)
 {
   shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
     return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
@@ -89,8 +89,8 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
 ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                            ShaderData *ccl_restrict sd,
-                                            VolumeShaderCoefficients *coeff)
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private VolumeShaderCoefficients *coeff)
 {
   const int path_flag = INTEGRATOR_STATE(path, flag);
   shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
@@ -107,7 +107,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
   if (sd->flag & SD_SCATTER) {
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_VOLUME(sc->type)) {
         coeff->sigma_s += sc->weight;
@@ -123,14 +123,14 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
   return true;
 }
 
-ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
-                                             const RNGState *rng_state,
+ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
+                                             ccl_private const RNGState *rng_state,
                                              const float object_step_size,
                                              float t,
-                                             float *step_size,
-                                             float *step_shade_offset,
-                                             float *steps_offset,
-                                             int *max_steps)
+                                             ccl_private float *step_size,
+                                             ccl_private float *step_shade_offset,
+                                             ccl_private float *steps_offset,
+                                             ccl_private int *max_steps)
 {
   if (object_step_size == FLT_MAX) {
     /* Homogeneous volume. */
@@ -170,9 +170,9 @@ ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
 ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
-                                          Ray *ccl_restrict ray,
-                                          ShaderData *ccl_restrict sd,
-                                          float3 *ccl_restrict throughput)
+                                          ccl_private Ray *ccl_restrict ray,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          ccl_global float3 *ccl_restrict throughput)
 {
   float3 sigma_t = zero_float3();
 
@@ -185,9 +185,9 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
 ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
-                                            Ray *ccl_restrict ray,
-                                            ShaderData *ccl_restrict sd,
-                                            float3 *ccl_restrict throughput,
+                                            ccl_private Ray *ccl_restrict ray,
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private float3 *ccl_restrict throughput,
                                             const float object_step_size)
 {
   /* Load random number state. */
@@ -257,10 +257,10 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 /* Equi-angular sampling as in:
  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
 
-ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
                                            const float3 light_P,
                                            const float xi,
-                                           float *pdf)
+                                           ccl_private float *pdf)
 {
   const float t = ray->t;
   const float delta = dot((light_P - ray->P), ray->D);
@@ -281,7 +281,7 @@ ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
   return min(t, delta + t_); /* min is only for float precision errors */
 }
 
-ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -305,7 +305,7 @@ ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
   return pdf;
 }
 
-ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -332,8 +332,12 @@ ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
 
 /* Distance sampling */
 
-ccl_device float volume_distance_sample(
-    float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float volume_distance_sample(float max_t,
+                                        float3 sigma_t,
+                                        int channel,
+                                        float xi,
+                                        ccl_private float3 *transmittance,
+                                        ccl_private float3 *pdf)
 {
   /* xi is [0, 1[ so log(0) should never happen, division by zero is
    * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
@@ -363,7 +367,7 @@ ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_
 
 /* Emission */
 
-ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff,
+ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
                                             int closure_flag,
                                             float3 transmittance,
                                             float t)
@@ -410,13 +414,13 @@ typedef struct VolumeIntegrateState {
 } VolumeIntegrateState;
 
 ccl_device_forceinline void volume_integrate_step_scattering(
-    const ShaderData *sd,
-    const Ray *ray,
+    ccl_private const ShaderData *sd,
+    ccl_private const Ray *ray,
     const float3 equiangular_light_P,
-    const VolumeShaderCoefficients &ccl_restrict coeff,
+    ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
     const float3 transmittance,
-    VolumeIntegrateState &ccl_restrict vstate,
-    VolumeIntegrateResult &ccl_restrict result)
+    ccl_private VolumeIntegrateState &ccl_restrict vstate,
+    ccl_private VolumeIntegrateResult &ccl_restrict result)
 {
   /* Pick random color channel, we use the Veach one-sample
    * model with balance heuristic for the channels. */
@@ -507,14 +511,14 @@ ccl_device_forceinline void volume_integrate_step_scattering(
  * for path tracing where we don't want to branch. */
 ccl_device_forceinline void volume_integrate_heterogeneous(
     INTEGRATOR_STATE_ARGS,
-    Ray *ccl_restrict ray,
-    ShaderData *ccl_restrict sd,
-    const RNGState *rng_state,
+    ccl_private Ray *ccl_restrict ray,
+    ccl_private ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *rng_state,
     ccl_global float *ccl_restrict render_buffer,
     const float object_step_size,
     const VolumeSampleMethod direct_sample_method,
     const float3 equiangular_light_P,
-    VolumeIntegrateResult &result)
+    ccl_private VolumeIntegrateResult &result)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
 
@@ -666,10 +670,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 #  ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline bool integrate_volume_sample_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_private LightSample *ccl_restrict ls)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -694,14 +699,14 @@ ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
 
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          const float3 P,
-                                                          const ShaderVolumePhases *ccl_restrict
-                                                              phases,
-                                                          const float3 throughput,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline void integrate_volume_direct_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    const float3 P,
+    ccl_private const ShaderVolumePhases *ccl_restrict phases,
+    ccl_private const float3 throughput,
+    ccl_private LightSample *ccl_restrict ls)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
@@ -737,7 +742,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
   if (is_zero(light_eval)) {
@@ -801,10 +806,11 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
 #  endif
 
 /* Path tracing: scatter in new direction using phase function */
-ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state,
-                                                           const ShaderVolumePhases *phases)
+ccl_device_forceinline bool integrate_volume_phase_scatter(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state,
+    ccl_private const ShaderVolumePhases *phases)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
 
@@ -865,7 +871,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
 ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
-                                                 Ray *ccl_restrict ray,
+                                                 ccl_private Ray *ccl_restrict ray,
                                                  ccl_global float *ccl_restrict render_buffer)
 {
   ShaderData sd;
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index efc7576d95b..517e2891769 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -106,7 +106,7 @@ typedef struct IntegratorQueueCounter {
  * GPU rendering path state with SoA layout. */
 typedef struct IntegratorStateGPU {
 #define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type *name;
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
 #define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
 #define KERNEL_STRUCT_END(name) \
   } \
@@ -124,13 +124,13 @@ typedef struct IntegratorStateGPU {
 #undef KERNEL_STRUCT_VOLUME_STACK_SIZE
 
   /* Count number of queued kernels. */
-  IntegratorQueueCounter *queue_counter;
+  ccl_global IntegratorQueueCounter *queue_counter;
 
   /* Count number of kernels queued for specific shaders. */
-  int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
+  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
 
   /* Index of path which will be used by a next shadow catcher split.  */
-  int *next_shadow_catcher_path_index;
+  ccl_global int *next_shadow_catcher_path_index;
 } IntegratorStateGPU;
 
 /* Abstraction
@@ -173,9 +173,10 @@ typedef IntegratorStateCPU *ccl_restrict IntegratorState;
 
 typedef int IntegratorState;
 
-#  define INTEGRATOR_STATE_ARGS const KernelGlobals *ccl_restrict kg, const IntegratorState state
+#  define INTEGRATOR_STATE_ARGS \
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_CONST_ARGS \
-    const KernelGlobals *ccl_restrict kg, const IntegratorState state
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_PASS kg, state
 
 #  define INTEGRATOR_STATE_PASS_NULL kg, -1
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index 037c7533943..fddd9eb5ac8 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
 /* Ray */
 
 ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ccl_restrict ray)
+                                                       ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
@@ -35,7 +35,7 @@ ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
 }
 
 ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                      Ray *ccl_restrict ray)
+                                                      ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(ray, P);
   ray->D = INTEGRATOR_STATE(ray, D);
@@ -47,8 +47,8 @@ ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARG
 
 /* Shadow Ray */
 
-ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_ARGS,
-                                                              const Ray *ccl_restrict ray)
+ccl_device_forceinline void integrator_state_write_shadow_ray(
+    INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
@@ -58,7 +58,7 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_A
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                             Ray *ccl_restrict ray)
+                                                             ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(shadow_ray, P);
   ray->D = INTEGRATOR_STATE(shadow_ray, D);
@@ -70,8 +70,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CO
 
 /* Intersection */
 
-ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_write_isect(
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect)
 {
   INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
   INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
@@ -84,8 +84,8 @@ ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                        Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_read_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect)
 {
   isect->prim = INTEGRATOR_STATE(isect, prim);
   isect->object = INTEGRATOR_STATE(isect, object);
@@ -124,7 +124,7 @@ ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_ST
 /* Shadow Intersection */
 
 ccl_device_forceinline void integrator_state_write_shadow_isect(
-    INTEGRATOR_STATE_ARGS, const Intersection *ccl_restrict isect, const int index)
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index)
 {
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
@@ -137,9 +137,8 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                               Intersection *ccl_restrict isect,
-                                                               const int index)
+ccl_device_forceinline void integrator_state_read_shadow_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index)
 {
   isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
   isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 2d15c82322a..153f9b79743 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -36,14 +36,16 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SUBSURFACE__
 
-ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const ShaderClosure *sc)
+ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private const ShaderClosure *sc)
 {
   /* We should never have two consecutive BSSRDF bounces, the second one should
    * be converted to a diffuse BSDF to avoid this. */
   kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
 
   /* Setup path state for intersect_subsurface kernel. */
-  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
 
   /* Setup ray into surface. */
   INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
@@ -89,7 +91,7 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const Sh
 }
 
 ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
-                                             ShaderData *sd,
+                                             ccl_private ShaderData *sd,
                                              const uint32_t path_flag)
 {
   /* Get bump mapped normal from shader evaluation at exit point. */
@@ -107,7 +109,7 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
 
 #  ifdef __PRINCIPLED__
   if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
         sd, sizeof(PrincipledDiffuseBsdf), weight);
 
     if (bsdf) {
@@ -119,7 +121,8 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
   else
 #  endif /* __PRINCIPLED__ */
   {
-    DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+    ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(DiffuseBsdf), weight);
 
     if (bsdf) {
       bsdf->N = N;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 3f685e3a2e9..788a5e9b929 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -33,8 +33,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r,
  * nearby points on the same object. */
 ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
                                        RNGState rng_state,
-                                       Ray &ray,
-                                       LocalIntersection &ss_isect)
+                                       ccl_private Ray &ray,
+                                       ccl_private LocalIntersection &ss_isect)
 
 {
   float disk_u, disk_v;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index d4935b0ce4a..45a43ea67a9 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -31,8 +31,11 @@ CCL_NAMESPACE_BEGIN
  * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
  * https://graphics.pixar.com/library/PathTracedSubsurface/ */
 
-ccl_device void subsurface_random_walk_remap(
-    const float albedo, const float d, float g, float *sigma_t, float *alpha)
+ccl_device void subsurface_random_walk_remap(const float albedo,
+                                             const float d,
+                                             float g,
+                                             ccl_private float *sigma_t,
+                                             ccl_private float *alpha)
 {
   /* Compute attenuation and scattering coefficients from albedo. */
   const float g2 = g * g;
@@ -78,9 +81,9 @@ ccl_device void subsurface_random_walk_remap(
 ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
                                                     const float3 radius,
                                                     const float anisotropy,
-                                                    float3 *sigma_t,
-                                                    float3 *alpha,
-                                                    float3 *throughput)
+                                                    ccl_private float3 *sigma_t,
+                                                    ccl_private float3 *alpha,
+                                                    ccl_private float3 *throughput)
 {
   float sigma_t_x, sigma_t_y, sigma_t_z;
   float alpha_x, alpha_y, alpha_z;
@@ -164,7 +167,7 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f
 ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
                                                          float t,
                                                          bool hit,
-                                                         float3 *transmittance)
+                                                         ccl_private float3 *transmittance)
 {
   float3 T = volume_color_transmittance(sigma_t, t);
   if (transmittance) {
@@ -179,8 +182,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
 
 ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
                                               RNGState rng_state,
-                                              Ray &ray,
-                                              LocalIntersection &ss_isect)
+                                              ccl_private Ray &ray,
+                                              ccl_private LocalIntersection &ss_isect)
 {
   float bssrdf_u, bssrdf_v;
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
index 01ebf8376b1..0c4a723de6f 100644
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
 
 template<typename StackReadOp, typename StackWriteOp>
 ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
-                                        const ShaderData *sd,
+                                        ccl_private const ShaderData *sd,
                                         StackReadOp stack_read,
                                         StackWriteOp stack_write)
 {
@@ -84,7 +84,7 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
@@ -95,7 +95,8 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData
       });
 }
 
-ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+                                               ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index f4d00e4c20c..dc0aa9356f7 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -32,7 +32,9 @@ CCL_NAMESPACE_BEGIN
  * that only one of those can happen at a bounce, and so do not need to accumulate
  * them separately. */
 
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, float3 value)
+ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
+                                      const bool is_diffuse,
+                                      float3 value)
 {
   eval->diffuse = zero_float3();
   eval->glossy = zero_float3();
@@ -45,7 +47,7 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, flo
   }
 }
 
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
+ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
                                        const bool is_diffuse,
                                        float3 value,
                                        float mis_weight)
@@ -60,29 +62,29 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
   }
 }
 
-ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
+ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
 {
   return is_zero(eval->diffuse) && is_zero(eval->glossy);
 }
 
-ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
+ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
 {
   eval->diffuse *= value;
   eval->glossy *= value;
 }
 
-ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
+ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
 {
   eval->diffuse *= value;
   eval->glossy *= value;
 }
 
-ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
+ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
 {
   return eval->diffuse + eval->glossy;
 }
 
-ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
 {
   /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
    * We assume reflection, transmission and volume scatter to be exclusive. */
@@ -96,7 +98,9 @@ ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
  * to render buffers instead of using per-thread memory, and to avoid the
  * impact of clamping on other contributions. */
 
-ccl_device_forceinline void kernel_accum_clamp(const KernelGlobals *kg, float3 *L, int bounce)
+ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *kg,
+                                               ccl_private float3 *L,
+                                               int bounce)
 {
 #ifdef __KERNEL_DEBUG_NAN__
   if (!isfinite3_safe(*L)) {
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index 7d71907effe..cdf2601f6c3 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -40,7 +40,7 @@ ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS
 
 /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
 
-ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *kg,
+ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const KernelGlobals *kg,
                                                            ccl_global float *render_buffer,
                                                            int x,
                                                            int y,
@@ -90,7 +90,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
 /* This is a simple box filter in two passes.
  * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
 
-ccl_device void kernel_adaptive_sampling_filter_x(const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals *kg,
                                                   ccl_global float *render_buffer,
                                                   int y,
                                                   int start_x,
@@ -123,7 +123,7 @@ ccl_device void kernel_adaptive_sampling_filter_x(const KernelGlobals *kg,
   }
 }
 
-ccl_device void kernel_adaptive_sampling_filter_y(const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_y(ccl_global const KernelGlobals *kg,
                                                   ccl_global float *render_buffer,
                                                   int x,
                                                   int start_y,
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index abb1ba455e6..cfff727d007 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -24,7 +24,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,
+ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
                                          ccl_global const KernelShaderEvalInput *input,
                                          ccl_global float4 *output,
                                          const int offset)
@@ -56,7 +56,7 @@ ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,
   output[offset] += make_float4(D.x, D.y, D.z, 0.0f);
 }
 
-ccl_device void kernel_background_evaluate(const KernelGlobals *kg,
+ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
                                            ccl_global const KernelShaderEvalInput *input,
                                            ccl_global float4 *output,
                                            const int offset)
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 7be5da8fe6d..73683a15c5d 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -46,12 +46,12 @@ ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u
   return bokeh;
 }
 
-ccl_device void camera_sample_perspective(const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_restrict kg,
                                           float raster_x,
                                           float raster_y,
                                           float lens_u,
                                           float lens_v,
-                                          ccl_addr_space Ray *ray)
+                                          ccl_private Ray *ray)
 {
   /* create ray form raster position */
   ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
@@ -185,12 +185,12 @@ ccl_device void camera_sample_perspective(const KernelGlobals *ccl_restrict kg,
 }
 
 /* Orthographic Camera */
-ccl_device void camera_sample_orthographic(const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_orthographic(ccl_global const KernelGlobals *ccl_restrict kg,
                                            float raster_x,
                                            float raster_y,
                                            float lens_u,
                                            float lens_v,
-                                           ccl_addr_space Ray *ray)
+                                           ccl_private Ray *ray)
 {
   /* create ray form raster position */
   ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
@@ -254,13 +254,13 @@ ccl_device void camera_sample_orthographic(const KernelGlobals *ccl_restrict kg,
 
 ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 #ifdef __CAMERA_MOTION__
-                                              const ccl_global DecomposedTransform *cam_motion,
+                                              ccl_global const DecomposedTransform *cam_motion,
 #endif
                                               float raster_x,
                                               float raster_y,
                                               float lens_u,
                                               float lens_v,
-                                              ccl_addr_space Ray *ray)
+                                              ccl_private Ray *ray)
 {
   ProjectionTransform rastertocamera = cam->rastertocamera;
   float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
@@ -370,7 +370,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 
 /* Common */
 
-ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
                                      int x,
                                      int y,
                                      float filter_u,
@@ -378,7 +378,7 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
                                      float lens_u,
                                      float lens_v,
                                      float time,
-                                     ccl_addr_space Ray *ray)
+                                     ccl_private Ray *ray)
 {
   /* pixel filter */
   int filter_table_offset = kernel_data.film.filter_table_offset;
@@ -434,7 +434,7 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
   }
   else {
 #ifdef __CAMERA_MOTION__
-    const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
+    ccl_global const DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
     camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
 #else
     camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
@@ -444,13 +444,13 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
 
 /* Utilities */
 
-ccl_device_inline float3 camera_position(const KernelGlobals *kg)
+ccl_device_inline float3 camera_position(ccl_global const KernelGlobals *kg)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
 }
 
-ccl_device_inline float camera_distance(const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
@@ -464,7 +464,7 @@ ccl_device_inline float camera_distance(const KernelGlobals *kg, float3 P)
   }
 }
 
-ccl_device_inline float camera_z_depth(const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float3 P)
 {
   if (kernel_data.cam.type != CAMERA_PANORAMA) {
     Transform worldtocamera = kernel_data.cam.worldtocamera;
@@ -477,7 +477,7 @@ ccl_device_inline float camera_z_depth(const KernelGlobals *kg, float3 P)
   }
 }
 
-ccl_device_inline float3 camera_direction_from_point(const KernelGlobals *kg, float3 P)
+ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlobals *kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
 
@@ -491,7 +491,9 @@ ccl_device_inline float3 camera_direction_from_point(const KernelGlobals *kg, fl
   }
 }
 
-ccl_device_inline float3 camera_world_to_ndc(const KernelGlobals *kg, ShaderData *sd, float3 P)
+ccl_device_inline float3 camera_world_to_ndc(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             float3 P)
 {
   if (kernel_data.cam.type != CAMERA_PANORAMA) {
     /* perspective / ortho */
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index 960774e0741..9e8e0e68b8f 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -20,14 +20,14 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 xyz_to_rgb(const KernelGlobals *kg, float3 xyz)
+ccl_device float3 xyz_to_rgb(ccl_global const KernelGlobals *kg, float3 xyz)
 {
   return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
 }
 
-ccl_device float linear_rgb_to_gray(const KernelGlobals *kg, float3 c)
+ccl_device float linear_rgb_to_gray(ccl_global const KernelGlobals *kg, float3 c)
 {
   return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
 }
diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h
index db4e110bd10..17187083019 100644
--- a/intern/cycles/kernel/kernel_differential.h
+++ b/intern/cycles/kernel/kernel_differential.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* See "Tracing Ray Differentials", Homan Igehy, 1999. */
 
-ccl_device void differential_transfer(ccl_addr_space differential3 *surface_dP,
+ccl_device void differential_transfer(ccl_private differential3 *surface_dP,
                                       const differential3 ray_dP,
                                       float3 ray_D,
                                       const differential3 ray_dD,
@@ -38,7 +38,7 @@ ccl_device void differential_transfer(ccl_addr_space differential3 *surface_dP,
   surface_dP->dy = tmpy - dot(tmpy, surface_Ng) * tmp;
 }
 
-ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD)
+ccl_device void differential_incoming(ccl_private differential3 *dI, const differential3 dD)
 {
   /* compute dIdx/dy at a shading point, we just need to negate the
    * differential of the ray direction */
@@ -47,8 +47,8 @@ ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const di
   dI->dy = -dD.dy;
 }
 
-ccl_device void differential_dudv(ccl_addr_space differential *du,
-                                  ccl_addr_space differential *dv,
+ccl_device void differential_dudv(ccl_private differential *du,
+                                  ccl_private differential *dv,
                                   float3 dPdu,
                                   float3 dPdv,
                                   differential3 dP,
@@ -132,7 +132,7 @@ ccl_device_forceinline float differential_make_compact(const differential3 D)
   return 0.5f * (len(D.dx) + len(D.dy));
 }
 
-ccl_device_forceinline void differential_transfer_compact(ccl_addr_space differential3 *surface_dP,
+ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
                                                           const float ray_dP,
                                                           const float3 /* ray_D */,
                                                           const float ray_dD,
@@ -149,7 +149,7 @@ ccl_device_forceinline void differential_transfer_compact(ccl_addr_space differe
   surface_dP->dy = dy * scale;
 }
 
-ccl_device_forceinline void differential_incoming_compact(ccl_addr_space differential3 *dI,
+ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
                                                           const float3 D,
                                                           const float dD)
 {
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index d62285d173d..015587ccbbd 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -24,10 +24,11 @@
 CCL_NAMESPACE_BEGIN
 
 /* Evaluate shader on light. */
-ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
-                                                        ShaderData *ccl_restrict emission_sd,
-                                                        LightSample *ccl_restrict ls,
-                                                        float time)
+ccl_device_noinline_cpu float3
+light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
+                         ccl_private ShaderData *ccl_restrict emission_sd,
+                         ccl_private LightSample *ccl_restrict ls,
+                         float time)
 {
   /* setup shading at emitter */
   float3 eval = zero_float3();
@@ -89,7 +90,7 @@ ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
   eval *= ls->eval_fac;
 
   if (ls->lamp != LAMP_NONE) {
-    const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+    ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
     eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
   }
 
@@ -97,16 +98,16 @@ ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
 }
 
 /* Test if light sample is from a light or emission from geometry. */
-ccl_device_inline bool light_sample_is_light(const LightSample *ccl_restrict ls)
+ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_restrict ls)
 {
   /* return if it's a lamp for shadow pass */
   return (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
 }
 
 /* Early path termination of shadow rays. */
-ccl_device_inline bool light_sample_terminate(const KernelGlobals *ccl_restrict kg,
-                                              const LightSample *ccl_restrict ls,
-                                              BsdfEval *ccl_restrict eval,
+ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *ccl_restrict kg,
+                                              ccl_private const LightSample *ccl_restrict ls,
+                                              ccl_private BsdfEval *ccl_restrict eval,
                                               const float rand_terminate)
 {
   if (bsdf_eval_is_zero(eval)) {
@@ -132,9 +133,10 @@ ccl_device_inline bool light_sample_terminate(const KernelGlobals *ccl_restrict
  * of a triangle. Surface is lifted by amount h along normal n in the incident
  * point. */
 
-ccl_device_inline float3 shadow_ray_smooth_surface_offset(const KernelGlobals *ccl_restrict kg,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          float3 Ng)
+ccl_device_inline float3
+shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg,
+                                 ccl_private const ShaderData *ccl_restrict sd,
+                                 float3 Ng)
 {
   float3 V[3], N[3];
   triangle_vertices_and_normals(kg, sd->prim, V, N);
@@ -178,8 +180,8 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset(const KernelGlobals *c
 
 /* Ray offset to avoid shadow terminator artifact. */
 
-ccl_device_inline float3 shadow_ray_offset(const KernelGlobals *ccl_restrict kg,
-                                           const ShaderData *ccl_restrict sd,
+ccl_device_inline float3 shadow_ray_offset(ccl_global const KernelGlobals *ccl_restrict kg,
+                                           ccl_private const ShaderData *ccl_restrict sd,
                                            float3 L)
 {
   float NL = dot(sd->N, L);
@@ -211,10 +213,10 @@ ccl_device_inline float3 shadow_ray_offset(const KernelGlobals *ccl_restrict kg,
   return P;
 }
 
-ccl_device_inline void shadow_ray_setup(const ShaderData *ccl_restrict sd,
-                                        const LightSample *ccl_restrict ls,
+ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restrict sd,
+                                        ccl_private const LightSample *ccl_restrict ls,
                                         const float3 P,
-                                        Ray *ray)
+                                        ccl_private Ray *ray)
 {
   if (ls->shader & SHADER_CAST_SHADOW) {
     /* setup ray */
@@ -244,21 +246,23 @@ ccl_device_inline void shadow_ray_setup(const ShaderData *ccl_restrict sd,
 }
 
 /* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_surface_shadow_ray(const KernelGlobals *ccl_restrict kg,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const LightSample *ccl_restrict ls,
-                                                          Ray *ray)
+ccl_device_inline void light_sample_to_surface_shadow_ray(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    ccl_private Ray *ray)
 {
   const float3 P = shadow_ray_offset(kg, sd, ls->D);
   shadow_ray_setup(sd, ls, P, ray);
 }
 
 /* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_volume_shadow_ray(const KernelGlobals *ccl_restrict kg,
-                                                         const ShaderData *ccl_restrict sd,
-                                                         const LightSample *ccl_restrict ls,
-                                                         const float3 P,
-                                                         Ray *ray)
+ccl_device_inline void light_sample_to_volume_shadow_ray(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    const float3 P,
+    ccl_private Ray *ray)
 {
   shadow_ray_setup(sd, ls, P, ray);
 }
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index e8f4a21878e..a87eff3832e 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -30,7 +30,8 @@ ccl_device_forceinline float film_transparency_to_alpha(float transparency)
   return saturate(1.0f - transparency);
 }
 
-ccl_device_inline float film_get_scale(const KernelFilmConvert *ccl_restrict kfilm_convert,
+ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
+                                           kfilm_convert,
                                        ccl_global const float *ccl_restrict buffer)
 {
   if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
@@ -38,14 +39,15 @@ ccl_device_inline float film_get_scale(const KernelFilmConvert *ccl_restrict kfi
   }
 
   if (kfilm_convert->pass_use_filter) {
-    const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+    const uint sample_count = *(
+        (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
     return 1.0f / sample_count;
   }
 
   return 1.0f;
 }
 
-ccl_device_inline float film_get_scale_exposure(const KernelFilmConvert *ccl_restrict
+ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict
                                                     kfilm_convert,
                                                 ccl_global const float *ccl_restrict buffer)
 {
@@ -63,10 +65,10 @@ ccl_device_inline float film_get_scale_exposure(const KernelFilmConvert *ccl_res
 }
 
 ccl_device_inline bool film_get_scale_and_scale_exposure(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict scale,
-    float *ccl_restrict scale_exposure)
+    ccl_private float *ccl_restrict scale,
+    ccl_private float *ccl_restrict scale_exposure)
 {
   if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
     *scale = kfilm_convert->scale;
@@ -74,7 +76,8 @@ ccl_device_inline bool film_get_scale_and_scale_exposure(
     return true;
   }
 
-  const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+  const uint sample_count = *(
+      (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
   if (!sample_count) {
     *scale = 0.0f;
     *scale_exposure = 0.0f;
@@ -102,33 +105,33 @@ ccl_device_inline bool film_get_scale_and_scale_exposure(
  * Float (scalar) passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_depth(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict
                                                      kfilm_convert,
                                                  ccl_global const float *ccl_restrict buffer,
-                                                 float *ccl_restrict pixel)
+                                                 ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
 }
 
-ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict
                                                     kfilm_convert,
                                                 ccl_global const float *ccl_restrict buffer,
-                                                float *ccl_restrict pixel)
+                                                ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   /* Note that we accumulate 1 - mist in the kernel to avoid having to
@@ -137,9 +140,9 @@ ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_res
 }
 
 ccl_device_inline void film_get_pass_pixel_sample_count(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
    * meaningful value when adaptive sampler stopped rendering image way before the maximum
@@ -149,23 +152,23 @@ ccl_device_inline void film_get_pass_pixel_sample_count(
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
 }
 
-ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict
                                                      kfilm_convert,
                                                  ccl_global const float *ccl_restrict buffer,
-                                                 float *ccl_restrict pixel)
+                                                 ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = f * scale_exposure;
@@ -175,28 +178,28 @@ ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_re
  * Float 3 passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_light_path(const KernelFilmConvert *ccl_restrict
-                                                          kfilm_convert,
-                                                      ccl_global const float *ccl_restrict buffer,
-                                                      float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_light_path(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   /* Read light pass. */
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   float3 f = make_float3(in[0], in[1], in[2]);
 
   /* Optionally add indirect light pass. */
   if (kfilm_convert->pass_indirect != PASS_UNUSED) {
-    const float *in_indirect = buffer + kfilm_convert->pass_indirect;
+    ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect;
     const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
     f += f_indirect;
   }
 
   /* Optionally divide out color. */
   if (kfilm_convert->pass_divide != PASS_UNUSED) {
-    const float *in_divide = buffer + kfilm_convert->pass_divide;
+    ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide;
     const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
     f = safe_divide_even_color(f, f_divide);
 
@@ -213,17 +216,17 @@ ccl_device_inline void film_get_pass_pixel_light_path(const KernelFilmConvert *c
   pixel[2] = f.z;
 }
 
-ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
 
@@ -236,17 +239,17 @@ ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_r
  * Float4 passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
   kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
-  const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
 
   const float weight = in_weight[0];
   const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
@@ -259,17 +262,17 @@ ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_r
   pixel[3] = motion.w;
 }
 
-ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *ccl_restrict
-                                                           kfilm_convert,
-                                                       ccl_global const float *ccl_restrict buffer,
-                                                       float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_cryptomatte(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale = film_get_scale(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float4 f = make_float4(in[0], in[1], in[2], in[3]);
 
@@ -281,10 +284,10 @@ ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *
   pixel[3] = f.w * scale;
 }
 
-ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
@@ -292,7 +295,7 @@ ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_r
   float scale, scale_exposure;
   film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
   const float alpha = in[3] * scale;
@@ -303,10 +306,10 @@ ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_r
   pixel[3] = alpha;
 }
 
-ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl_restrict
-                                                        kfilm_convert,
-                                                    ccl_global const float *ccl_restrict buffer,
-                                                    float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_combined(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
 
@@ -324,7 +327,7 @@ ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl
     return;
   }
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
   const float alpha = in[3] * scale;
@@ -339,9 +342,9 @@ ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl
  * Shadow catcher.
  */
 
-ccl_device_inline float3
-film_calculate_shadow_catcher_denoised(const KernelFilmConvert *ccl_restrict kfilm_convert,
-                                       ccl_global const float *ccl_restrict buffer)
+ccl_device_inline float3 film_calculate_shadow_catcher_denoised(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer)
 {
   kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
 
@@ -367,7 +370,7 @@ ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
 }
 
 ccl_device_inline float3
-film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_convert,
+film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
                               ccl_global const float *ccl_restrict buffer)
 {
   /* For the shadow catcher pass we divide combined pass by the shadow catcher.
@@ -431,7 +434,7 @@ film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_conver
 }
 
 ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer)
 {
   /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
@@ -474,9 +477,9 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
 }
 
 ccl_device_inline void film_get_pass_pixel_shadow_catcher(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
 
@@ -488,9 +491,9 @@ ccl_device_inline void film_get_pass_pixel_shadow_catcher(
 }
 
 ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
 
@@ -510,9 +513,9 @@ ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
  */
 
 ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   if (kfilm_convert->show_active_pixels &&
       kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index ed01f494f98..07b96d0e1a8 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -92,7 +92,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
 }
 
 /* post-sorting for Cryptomatte */
-ccl_device_inline void kernel_cryptomatte_post(const KernelGlobals *kg,
+ccl_device_inline void kernel_cryptomatte_post(ccl_global const KernelGlobals *kg,
                                                ccl_global float *render_buffer,
                                                int pixel_index)
 {
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 1beaf3cc2b2..1f745ab1da9 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -72,7 +72,10 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
   return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
 }
 
-ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
+ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg,
+                               uint sample,
+                               uint rng_hash,
+                               uint dimension)
 {
   /* Perform Owen shuffle of the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
@@ -115,8 +118,12 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
   return fx;
 }
 
-ccl_device void pmj_sample_2D(
-    const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension, float *x, float *y)
+ccl_device void pmj_sample_2D(ccl_global const KernelGlobals *kg,
+                              uint sample,
+                              uint rng_hash,
+                              uint dimension,
+                              ccl_private float *x,
+                              ccl_private float *y)
 {
   /* Perform a shuffle on the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 52f641634b9..33d0c09a32a 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -45,13 +45,13 @@ typedef struct LightSample {
 /* Regular Light */
 
 template<bool in_volume_segment>
-ccl_device_inline bool light_sample(const KernelGlobals *kg,
+ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg,
                                     const int lamp,
                                     const float randu,
                                     const float randv,
                                     const float3 P,
                                     const int path_flag,
-                                    LightSample *ls)
+                                    ccl_private LightSample *ls)
 {
   const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
@@ -209,9 +209,9 @@ ccl_device_inline bool light_sample(const KernelGlobals *kg,
   return (ls->pdf > 0.0f);
 }
 
-ccl_device bool lights_intersect(const KernelGlobals *ccl_restrict kg,
-                                 const Ray *ccl_restrict ray,
-                                 Intersection *ccl_restrict isect,
+ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg,
+                                 ccl_private const Ray *ccl_restrict ray,
+                                 ccl_private Intersection *ccl_restrict isect,
                                  const int last_prim,
                                  const int last_object,
                                  const int last_type,
@@ -298,12 +298,12 @@ ccl_device bool lights_intersect(const KernelGlobals *ccl_restrict kg,
   return isect->prim != PRIM_NONE;
 }
 
-ccl_device bool light_sample_from_distant_ray(const KernelGlobals *ccl_restrict kg,
+ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *ccl_restrict kg,
                                               const float3 ray_D,
                                               const int lamp,
-                                              LightSample *ccl_restrict ls)
+                                              ccl_private LightSample *ccl_restrict ls)
 {
-  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   const int shader = klight->shader_id;
   const float radius = klight->distant.radius;
   const LightType type = (LightType)klight->type;
@@ -362,14 +362,14 @@ ccl_device bool light_sample_from_distant_ray(const KernelGlobals *ccl_restrict
   return true;
 }
 
-ccl_device bool light_sample_from_intersection(const KernelGlobals *ccl_restrict kg,
-                                               const Intersection *ccl_restrict isect,
+ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *ccl_restrict kg,
+                                               ccl_private const Intersection *ccl_restrict isect,
                                                const float3 ray_P,
                                                const float3 ray_D,
-                                               LightSample *ccl_restrict ls)
+                                               ccl_private LightSample *ccl_restrict ls)
 {
   const int lamp = isect->prim;
-  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   LightType type = (LightType)klight->type;
   ls->type = type;
   ls->shader = klight->shader_id;
@@ -464,7 +464,7 @@ ccl_device bool light_sample_from_intersection(const KernelGlobals *ccl_restrict
 
 /* returns true if the triangle is has motion blur or an instancing transform applied */
 ccl_device_inline bool triangle_world_space_vertices(
-    const KernelGlobals *kg, int object, int prim, float time, float3 V[3])
+    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 V[3])
 {
   bool has_motion = false;
   const int object_flag = kernel_tex_fetch(__object_flag, object);
@@ -492,7 +492,7 @@ ccl_device_inline bool triangle_world_space_vertices(
   return has_motion;
 }
 
-ccl_device_inline float triangle_light_pdf_area(const KernelGlobals *kg,
+ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals *kg,
                                                 const float3 Ng,
                                                 const float3 I,
                                                 float t)
@@ -506,8 +506,8 @@ ccl_device_inline float triangle_light_pdf_area(const KernelGlobals *kg,
   return t * t * pdf / cos_pi;
 }
 
-ccl_device_forceinline float triangle_light_pdf(const KernelGlobals *kg,
-                                                const ShaderData *sd,
+ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd,
                                                 float t)
 {
   /* A naive heuristic to decide between costly solid angle sampling
@@ -578,13 +578,13 @@ ccl_device_forceinline float triangle_light_pdf(const KernelGlobals *kg,
 }
 
 template<bool in_volume_segment>
-ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
+ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals *kg,
                                                   int prim,
                                                   int object,
                                                   float randu,
                                                   float randv,
                                                   float time,
-                                                  LightSample *ls,
+                                                  ccl_private LightSample *ls,
                                                   const float3 P)
 {
   /* A naive heuristic to decide between costly solid angle sampling
@@ -747,7 +747,8 @@ ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
 
 /* Light Distribution */
 
-ccl_device int light_distribution_sample(const KernelGlobals *kg, float *randu)
+ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg,
+                                         ccl_private float *randu)
 {
   /* This is basically std::upper_bound as used by PBRT, to find a point light or
    * triangle to emit from, proportional to area. a good improvement would be to
@@ -785,7 +786,7 @@ ccl_device int light_distribution_sample(const KernelGlobals *kg, float *randu)
 
 /* Generic Light */
 
-ccl_device_inline bool light_select_reached_max_bounces(const KernelGlobals *kg,
+ccl_device_inline bool light_select_reached_max_bounces(ccl_global const KernelGlobals *kg,
                                                         int index,
                                                         int bounce)
 {
@@ -793,18 +794,18 @@ ccl_device_inline bool light_select_reached_max_bounces(const KernelGlobals *kg,
 }
 
 template<bool in_volume_segment>
-ccl_device_noinline bool light_distribution_sample(const KernelGlobals *kg,
+ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobals *kg,
                                                    float randu,
                                                    const float randv,
                                                    const float time,
                                                    const float3 P,
                                                    const int bounce,
                                                    const int path_flag,
-                                                   LightSample *ls)
+                                                   ccl_private LightSample *ls)
 {
   /* Sample light index from distribution. */
   const int index = light_distribution_sample(kg, &randu);
-  const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
+  ccl_global const KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
                                                                               index);
   const int prim = kdistribution->prim;
 
@@ -833,36 +834,37 @@ ccl_device_noinline bool light_distribution_sample(const KernelGlobals *kg,
   return light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_volume_segment(const KernelGlobals *kg,
-                                                                     float randu,
-                                                                     const float randv,
-                                                                     const float time,
-                                                                     const float3 P,
-                                                                     const int bounce,
-                                                                     const int path_flag,
-                                                                     LightSample *ls)
+ccl_device_inline bool light_distribution_sample_from_volume_segment(
+    ccl_global const KernelGlobals *kg,
+    float randu,
+    const float randv,
+    const float time,
+    const float3 P,
+    const int bounce,
+    const int path_flag,
+    ccl_private LightSample *ls)
 {
   return light_distribution_sample<true>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_position(const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_from_position(ccl_global const KernelGlobals *kg,
                                                                float randu,
                                                                const float randv,
                                                                const float time,
                                                                const float3 P,
                                                                const int bounce,
                                                                const int path_flag,
-                                                               LightSample *ls)
+                                                               ccl_private LightSample *ls)
 {
   return light_distribution_sample<false>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_new_position(const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_new_position(ccl_global const KernelGlobals *kg,
                                                               const float randu,
                                                               const float randv,
                                                               const float time,
                                                               const float3 P,
-                                                              LightSample *ls)
+                                                              ccl_private LightSample *ls)
 {
   /* Sample a new position on the same light, for volume sampling. */
   if (ls->type == LIGHT_TRIANGLE) {
diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h
index 493ed560bc6..3669ff50455 100644
--- a/intern/cycles/kernel/kernel_light_background.h
+++ b/intern/cycles/kernel/kernel_light_background.h
@@ -24,10 +24,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BACKGROUND_MIS__
 
-ccl_device float3 background_map_sample(const KernelGlobals *kg,
+ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg,
                                         float randu,
                                         float randv,
-                                        float *pdf)
+                                        ccl_private float *pdf)
 {
   /* for the following, the CDF values are actually a pair of floats, with the
    * function value as X and the actual CDF as Y.  The last entry's function
@@ -109,7 +109,7 @@ ccl_device float3 background_map_sample(const KernelGlobals *kg,
 /* TODO(sergey): Same as above, after the release we should consider using
  * 'noinline' for all devices.
  */
-ccl_device float background_map_pdf(const KernelGlobals *kg, float3 direction)
+ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 direction)
 {
   float2 uv = direction_to_equirectangular(direction);
   int res_x = kernel_data.background.map_res_x;
@@ -143,7 +143,11 @@ ccl_device float background_map_pdf(const KernelGlobals *kg, float3 direction)
 }
 
 ccl_device_inline bool background_portal_data_fetch_and_check_side(
-    const KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
+    ccl_global const KernelGlobals *kg,
+    float3 P,
+    int index,
+    ccl_private float3 *lightpos,
+    ccl_private float3 *dir)
 {
   int portal = kernel_data.background.portal_offset + index;
   const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
@@ -158,8 +162,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(
   return false;
 }
 
-ccl_device_inline float background_portal_pdf(
-    const KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
+ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg,
+                                              float3 P,
+                                              float3 direction,
+                                              int ignore_portal,
+                                              ccl_private bool *is_possible)
 {
   float portal_pdf = 0.0f;
 
@@ -219,7 +226,7 @@ ccl_device_inline float background_portal_pdf(
   return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
 }
 
-ccl_device int background_num_possible_portals(const KernelGlobals *kg, float3 P)
+ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *kg, float3 P)
 {
   int num_possible_portals = 0;
   for (int p = 0; p < kernel_data.background.num_portals; p++) {
@@ -230,13 +237,13 @@ ccl_device int background_num_possible_portals(const KernelGlobals *kg, float3 P
   return num_possible_portals;
 }
 
-ccl_device float3 background_portal_sample(const KernelGlobals *kg,
+ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg,
                                            float3 P,
                                            float randu,
                                            float randv,
                                            int num_possible,
-                                           int *sampled_portal,
-                                           float *pdf)
+                                           ccl_private int *sampled_portal,
+                                           ccl_private float *pdf)
 {
   /* Pick a portal, then re-normalize randv. */
   randv *= num_possible;
@@ -285,10 +292,10 @@ ccl_device float3 background_portal_sample(const KernelGlobals *kg,
   return zero_float3();
 }
 
-ccl_device_inline float3 background_sun_sample(const KernelGlobals *kg,
+ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *kg,
                                                float randu,
                                                float randv,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   float3 D;
   const float3 N = float4_to_float3(kernel_data.background.sun);
@@ -297,15 +304,15 @@ ccl_device_inline float3 background_sun_sample(const KernelGlobals *kg,
   return D;
 }
 
-ccl_device_inline float background_sun_pdf(const KernelGlobals *kg, float3 D)
+ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, float3 D)
 {
   const float3 N = float4_to_float3(kernel_data.background.sun);
   const float angle = kernel_data.background.sun.w;
   return pdf_uniform_cone(N, D, angle);
 }
 
-ccl_device_inline float3
-background_light_sample(const KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+ccl_device_inline float3 background_light_sample(
+    ccl_global const KernelGlobals *kg, float3 P, float randu, float randv, ccl_private float *pdf)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
@@ -405,7 +412,9 @@ background_light_sample(const KernelGlobals *kg, float3 P, float randu, float ra
   return D;
 }
 
-ccl_device float background_light_pdf(const KernelGlobals *kg, float3 P, float3 direction)
+ccl_device float background_light_pdf(ccl_global const KernelGlobals *kg,
+                                      float3 P,
+                                      float3 direction)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
index 765d8f5338e..9421ac462e2 100644
--- a/intern/cycles/kernel/kernel_light_common.h
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN
  * Note: light_p is modified when sample_coord is true.
  */
 ccl_device_inline float rect_light_sample(float3 P,
-                                          float3 *light_p,
+                                          ccl_private float3 *light_p,
                                           float3 axisu,
                                           float3 axisv,
                                           float randu,
@@ -167,9 +167,9 @@ ccl_device float light_spread_attenuation(const float3 D,
  * reduce noise with low spread. */
 ccl_device bool light_spread_clamp_area_light(const float3 P,
                                               const float3 lightNg,
-                                              float3 *lightP,
-                                              float3 *axisu,
-                                              float3 *axisv,
+                                              ccl_private float3 *lightP,
+                                              ccl_private float3 *axisu,
+                                              ccl_private float3 *axisv,
                                               const float tan_spread)
 {
   /* Closest point in area light plane and distance to that plane. */
@@ -214,7 +214,10 @@ ccl_device bool light_spread_clamp_area_light(const float3 P,
   return true;
 }
 
-ccl_device float lamp_light_pdf(const KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+ccl_device float lamp_light_pdf(ccl_global const KernelGlobals *kg,
+                                const float3 Ng,
+                                const float3 I,
+                                float t)
 {
   float cos_pi = dot(Ng, I);
 
diff --git a/intern/cycles/kernel/kernel_lookup_table.h b/intern/cycles/kernel/kernel_lookup_table.h
index 33d9d5ae1f0..3c8577af417 100644
--- a/intern/cycles/kernel/kernel_lookup_table.h
+++ b/intern/cycles/kernel/kernel_lookup_table.h
@@ -20,7 +20,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Interpolated lookup table access */
 
-ccl_device float lookup_table_read(const KernelGlobals *kg, float x, int offset, int size)
+ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg,
+                                   float x,
+                                   int offset,
+                                   int size)
 {
   x = saturate(x) * (size - 1);
 
@@ -37,7 +40,7 @@ ccl_device float lookup_table_read(const KernelGlobals *kg, float x, int offset,
 }
 
 ccl_device float lookup_table_read_2D(
-    const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
+    ccl_global const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
 {
   y = saturate(y) * (ysize - 1);
 
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index b158f4c4fd3..c931aa45276 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -35,7 +35,7 @@
 CCL_NAMESPACE_BEGIN
 
 /* distribute uniform xy on [0,1] over unit disk [-1,1] */
-ccl_device void to_unit_disk(float *x, float *y)
+ccl_device void to_unit_disk(ccl_private float *x, ccl_private float *y)
 {
   float phi = M_2PI_F * (*x);
   float r = sqrtf(*y);
@@ -46,7 +46,10 @@ ccl_device void to_unit_disk(float *x, float *y)
 
 /* return an orthogonal tangent and bitangent given a normal and tangent that
  * may not be exactly orthogonal */
-ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3 *a, float3 *b)
+ccl_device void make_orthonormals_tangent(const float3 N,
+                                          const float3 T,
+                                          ccl_private float3 *a,
+                                          ccl_private float3 *b)
 {
   *b = normalize(cross(N, T));
   *a = cross(*b, N);
@@ -54,7 +57,7 @@ ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3
 
 /* sample direction with cosine weighted distributed in hemisphere */
 ccl_device_inline void sample_cos_hemisphere(
-    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
 {
   to_unit_disk(&randu, &randv);
   float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
@@ -66,7 +69,7 @@ ccl_device_inline void sample_cos_hemisphere(
 
 /* sample direction uniformly distributed in hemisphere */
 ccl_device_inline void sample_uniform_hemisphere(
-    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
 {
   float z = randu;
   float r = sqrtf(max(0.0f, 1.0f - z * z));
@@ -81,8 +84,12 @@ ccl_device_inline void sample_uniform_hemisphere(
 }
 
 /* sample direction uniformly distributed in cone */
-ccl_device_inline void sample_uniform_cone(
-    const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_cone(const float3 N,
+                                           float angle,
+                                           float randu,
+                                           float randv,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float *pdf)
 {
   float zMin = cosf(angle);
   float z = zMin - zMin * randu + randu;
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 67466b28170..b981e750dda 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -36,7 +36,9 @@ ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
 #ifdef __DENOISING_FEATURES__
 
 ccl_device_forceinline void kernel_write_denoising_features_surface(
-    INTEGRATOR_STATE_ARGS, const ShaderData *sd, ccl_global float *ccl_restrict render_buffer)
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
 {
   if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DENOISING_FEATURES)) {
     return;
@@ -55,7 +57,7 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
   float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
       continue;
@@ -71,11 +73,11 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
      * To account for this, we scale their weight by the average fresnel factor (the same is also
      * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
     if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
       closure_albedo *= bsdf->extra->fresnel_color;
     }
     else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
-      PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc;
+      ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
       closure_albedo *= bsdf->avg_value;
     }
     else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
@@ -151,7 +153,9 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_ST
 
 /* Write shadow catcher passes on a bounce from the shadow catcher object. */
 ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
-    INTEGRATOR_STATE_ARGS, const ShaderData *sd, ccl_global float *ccl_restrict render_buffer)
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
 {
   if (!kernel_data.integrator.has_shadow_catcher) {
     return;
@@ -178,7 +182,7 @@ ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
 
 #endif /* __SHADOW_CATCHER__ */
 
-ccl_device_inline size_t kernel_write_id_pass(float *ccl_restrict buffer,
+ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
                                               size_t depth,
                                               float id,
                                               float matte_weight)
@@ -188,7 +192,7 @@ ccl_device_inline size_t kernel_write_id_pass(float *ccl_restrict buffer,
 }
 
 ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
-                                                const ShaderData *sd,
+                                                ccl_private const ShaderData *sd,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
 #ifdef __PASSES__
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index ebb2c0df4f1..e04ed5b1cc1 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -32,7 +32,7 @@ ccl_device_inline void path_state_init_queues(INTEGRATOR_STATE_ARGS)
 /* Minimalistic initialization of the path state, which is needed for early outputs in the
  * integrator initialization to work. */
 ccl_device_inline void path_state_init(INTEGRATOR_STATE_ARGS,
-                                       const ccl_global KernelWorkTile *ccl_restrict tile,
+                                       ccl_global const KernelWorkTile *ccl_restrict tile,
                                        const int x,
                                        const int y)
 {
@@ -281,14 +281,16 @@ typedef struct RNGState {
   int sample;
 } RNGState;
 
-ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, RNGState *rng_state)
+ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+                                           ccl_private RNGState *rng_state)
 {
   rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash);
   rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset);
   rng_state->sample = INTEGRATOR_STATE(path, sample);
 }
 
-ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, RNGState *rng_state)
+ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+                                                  ccl_private RNGState *rng_state)
 {
   const uint shadow_bounces = INTEGRATOR_STATE(shadow_path, transparent_bounce) -
                               INTEGRATOR_STATE(path, transparent_bounce);
@@ -298,23 +300,26 @@ ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, R
   rng_state->sample = INTEGRATOR_STATE(path, sample);
 }
 
-ccl_device_inline float path_state_rng_1D(const KernelGlobals *kg,
-                                          const RNGState *rng_state,
+ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg,
+                                          ccl_private const RNGState *rng_state,
                                           int dimension)
 {
   return path_rng_1D(
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(
-    const KernelGlobals *kg, const RNGState *rng_state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg,
+                                         ccl_private const RNGState *rng_state,
+                                         int dimension,
+                                         ccl_private float *fx,
+                                         ccl_private float *fy)
 {
   path_rng_2D(
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
 }
 
-ccl_device_inline float path_state_rng_1D_hash(const KernelGlobals *kg,
-                                               const RNGState *rng_state,
+ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *kg,
+                                               ccl_private const RNGState *rng_state,
                                                uint hash)
 {
   /* Use a hash instead of dimension, this is not great but avoids adding
@@ -324,8 +329,8 @@ ccl_device_inline float path_state_rng_1D_hash(const KernelGlobals *kg,
       kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
 }
 
-ccl_device_inline float path_branched_rng_1D(const KernelGlobals *kg,
-                                             const RNGState *rng_state,
+ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg,
+                                             ccl_private const RNGState *rng_state,
                                              int branch,
                                              int num_branches,
                                              int dimension)
@@ -336,13 +341,13 @@ ccl_device_inline float path_branched_rng_1D(const KernelGlobals *kg,
                      rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(const KernelGlobals *kg,
-                                            const RNGState *rng_state,
+ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg,
+                                            ccl_private const RNGState *rng_state,
                                             int branch,
                                             int num_branches,
                                             int dimension,
-                                            float *fx,
-                                            float *fy)
+                                            ccl_private float *fx,
+                                            ccl_private float *fy)
 {
   path_rng_2D(kg,
               rng_state->rng_hash,
@@ -355,8 +360,8 @@ ccl_device_inline void path_branched_rng_2D(const KernelGlobals *kg,
 /* Utility functions to get light termination value,
  * since it might not be needed in many cases.
  */
-ccl_device_inline float path_state_rng_light_termination(const KernelGlobals *kg,
-                                                         const RNGState *state)
+ccl_device_inline float path_state_rng_light_termination(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const RNGState *state)
 {
   if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
     return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index 192bf7ca5aa..0aea82fa812 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -215,8 +215,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f
 }
 
 ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
-                                                  float3 *P,
-                                                  float3 *D)
+                                                  ccl_private float3 *P,
+                                                  ccl_private float3 *D)
 {
   float interocular_offset = cam->interocular_offset;
 
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 240c92bf9d0..7db4289acec 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN
  */
 #  define SOBOL_SKIP 64
 
-ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimension)
+ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, int dimension)
 {
   uint result = 0;
   uint i = index + SOBOL_SKIP;
@@ -51,7 +51,7 @@ ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimensio
 
 #endif /* __SOBOL__ */
 
-ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg,
+ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg,
                                          uint rng_hash,
                                          int sample,
                                          int dimension)
@@ -85,8 +85,12 @@ ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(
-    const KernelGlobals *kg, uint rng_hash, int sample, int dimension, float *fx, float *fy)
+ccl_device_forceinline void path_rng_2D(ccl_global const KernelGlobals *kg,
+                                        uint rng_hash,
+                                        int sample,
+                                        int dimension,
+                                        ccl_private float *fx,
+                                        ccl_private float *fy)
 {
 #ifdef __DEBUG_CORRELATION__
   *fx = (float)drand48();
@@ -137,7 +141,7 @@ ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
   return n;
 }
 
-ccl_device_inline uint path_rng_hash_init(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline uint path_rng_hash_init(ccl_global const KernelGlobals *ccl_restrict kg,
                                           const int sample,
                                           const int x,
                                           const int y)
@@ -184,13 +188,6 @@ ccl_device_inline uint lcg_state_init(const uint rng_hash,
   return lcg_init(rng_hash + rng_offset + sample * scramble);
 }
 
-ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
-{
-  /* Implicit mod 2^32 */
-  *rng = (1103515245 * (*rng) + 12345);
-  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
-}
-
 ccl_device_inline bool sample_is_even(int pattern, int sample)
 {
   if (pattern == SAMPLING_PATTERN_PMJ) {
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index d1b53832793..4174a27406b 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -37,24 +37,26 @@ CCL_NAMESPACE_BEGIN
 /* Merging */
 
 #if defined(__VOLUME__)
-ccl_device_inline void shader_merge_volume_closures(ShaderData *sd)
+ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
 {
   /* Merge identical closures to save closure space with stacked volumes. */
   for (int i = 0; i < sd->num_closure; i++) {
-    ShaderClosure *sci = &sd->closure[i];
+    ccl_private ShaderClosure *sci = &sd->closure[i];
 
     if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
       continue;
     }
 
     for (int j = i + 1; j < sd->num_closure; j++) {
-      ShaderClosure *scj = &sd->closure[j];
+      ccl_private ShaderClosure *scj = &sd->closure[j];
       if (sci->type != scj->type) {
         continue;
       }
 
-      const HenyeyGreensteinVolume *hgi = (const HenyeyGreensteinVolume *)sci;
-      const HenyeyGreensteinVolume *hgj = (const HenyeyGreensteinVolume *)scj;
+      ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+          sci;
+      ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+          scj;
       if (!(hgi->g == hgj->g)) {
         continue;
       }
@@ -76,17 +78,19 @@ ccl_device_inline void shader_merge_volume_closures(ShaderData *sd)
   }
 }
 
-ccl_device_inline void shader_copy_volume_phases(ShaderVolumePhases *ccl_restrict phases,
-                                                 const ShaderData *ccl_restrict sd)
+ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
+                                                     phases,
+                                                 ccl_private const ShaderData *ccl_restrict sd)
 {
   phases->num_closure = 0;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *from_sc = &sd->closure[i];
-    const HenyeyGreensteinVolume *from_hg = (const HenyeyGreensteinVolume *)from_sc;
+    ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+    ccl_private const HenyeyGreensteinVolume *from_hg =
+        (ccl_private const HenyeyGreensteinVolume *)from_sc;
 
     if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-      ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+      ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
 
       to_sc->weight = from_sc->weight;
       to_sc->sample_weight = from_sc->sample_weight;
@@ -100,7 +104,8 @@ ccl_device_inline void shader_copy_volume_phases(ShaderVolumePhases *ccl_restric
 }
 #endif /* __VOLUME__ */
 
-ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
+ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS,
+                                                       ccl_private ShaderData *sd)
 {
   /* Defensive sampling.
    *
@@ -112,14 +117,14 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
     float sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
       }
     }
 
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
       }
@@ -137,7 +142,7 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
       float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
 
       for (int i = 0; i < sd->num_closure; i++) {
-        ShaderClosure *sc = &sd->closure[i];
+        ccl_private ShaderClosure *sc = &sd->closure[i];
         if (CLOSURE_IS_BSDF(sc->type)) {
           bsdf_blur(kg, sc, blur_roughness);
         }
@@ -148,7 +153,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
 
 /* BSDF */
 
-ccl_device_inline bool shader_bsdf_is_transmission(const ShaderData *sd, const float3 omega_in)
+ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
+                                                   const float3 omega_in)
 {
   return dot(sd->N, omega_in) < 0.0f;
 }
@@ -176,12 +182,12 @@ ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_sh
   return false;
 }
 
-ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
-                                                ShaderData *sd,
+ccl_device_inline float _shader_bsdf_multi_eval(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
                                                 const float3 omega_in,
                                                 const bool is_transmission,
-                                                const ShaderClosure *skip_sc,
-                                                BsdfEval *result_eval,
+                                                ccl_private const ShaderClosure *skip_sc,
+                                                ccl_private BsdfEval *result_eval,
                                                 float sum_pdf,
                                                 float sum_sample_weight,
                                                 const uint light_shader_flags)
@@ -189,7 +195,7 @@ ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
   /* This is the veach one-sample model with balance heuristic,
    * some PDF factors drop out when using balance heuristic weighting. */
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (sc == skip_sc) {
       continue;
@@ -220,11 +226,11 @@ ccl_device
 ccl_device_inline
 #endif
     float
-    shader_bsdf_eval(const KernelGlobals *kg,
-                     ShaderData *sd,
+    shader_bsdf_eval(ccl_global const KernelGlobals *kg,
+                     ccl_private ShaderData *sd,
                      const float3 omega_in,
                      const bool is_transmission,
-                     BsdfEval *bsdf_eval,
+                     ccl_private BsdfEval *bsdf_eval,
                      const uint light_shader_flags)
 {
   bsdf_eval_init(bsdf_eval, false, zero_float3());
@@ -234,8 +240,8 @@ ccl_device_inline
 }
 
 /* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
-ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData *ccl_restrict sd,
-                                                               float *randu)
+ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
+    ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
 {
   int sampled = 0;
 
@@ -244,7 +250,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
     float sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
@@ -255,7 +261,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
     float partial_sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         float next_sum = partial_sum + sc->sample_weight;
@@ -277,15 +283,16 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
 }
 
 /* Return weight for picked BSSRDF. */
-ccl_device_inline float3 shader_bssrdf_sample_weight(const ShaderData *ccl_restrict sd,
-                                                     const ShaderClosure *ccl_restrict bssrdf_sc)
+ccl_device_inline float3
+shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
+                            ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
 {
   float3 weight = bssrdf_sc->weight;
 
   if (sd->num_closure > 1) {
     float sum = 0.0f;
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
@@ -299,15 +306,15 @@ ccl_device_inline float3 shader_bssrdf_sample_weight(const ShaderData *ccl_restr
 
 /* Sample direction for picked BSDF, and return evaluation and pdf for all
  * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          const ShaderClosure *sc,
+ccl_device int shader_bsdf_sample_closure(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private const ShaderClosure *sc,
                                           float randu,
                                           float randv,
-                                          BsdfEval *bsdf_eval,
-                                          float3 *omega_in,
-                                          differential3 *domega_in,
-                                          float *pdf)
+                                          ccl_private BsdfEval *bsdf_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
 {
   /* BSSRDF should already have been handled elsewhere. */
   kernel_assert(CLOSURE_IS_BSDF(sc->type));
@@ -333,13 +340,13 @@ ccl_device int shader_bsdf_sample_closure(const KernelGlobals *kg,
   return label;
 }
 
-ccl_device float shader_bsdf_average_roughness(const ShaderData *sd)
+ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
 {
   float roughness = 0.0f;
   float sum_weight = 0.0f;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF(sc->type)) {
       /* sqrt once to undo the squaring from multiplying roughness on the
@@ -353,7 +360,8 @@ ccl_device float shader_bsdf_average_roughness(const ShaderData *sd)
   return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
 }
 
-ccl_device float3 shader_bsdf_transparency(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_HAS_ONLY_VOLUME) {
     return one_float3();
@@ -366,11 +374,12 @@ ccl_device float3 shader_bsdf_transparency(const KernelGlobals *kg, const Shader
   }
 }
 
-ccl_device void shader_bsdf_disable_transparency(const KernelGlobals *kg, ShaderData *sd)
+ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd)
 {
   if (sd->flag & SD_TRANSPARENT) {
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
 
       if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
         sc->sample_weight = 0.0f;
@@ -382,7 +391,8 @@ ccl_device void shader_bsdf_disable_transparency(const KernelGlobals *kg, Shader
   }
 }
 
-ccl_device float3 shader_bsdf_alpha(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd)
 {
   float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
 
@@ -392,12 +402,13 @@ ccl_device float3 shader_bsdf_alpha(const KernelGlobals *kg, const ShaderData *s
   return alpha;
 }
 
-ccl_device float3 shader_bsdf_diffuse(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg,
+                                      ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
       eval += sc->weight;
@@ -406,12 +417,13 @@ ccl_device float3 shader_bsdf_diffuse(const KernelGlobals *kg, const ShaderData
   return eval;
 }
 
-ccl_device float3 shader_bsdf_glossy(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg,
+                                     ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
       eval += sc->weight;
@@ -420,12 +432,13 @@ ccl_device float3 shader_bsdf_glossy(const KernelGlobals *kg, const ShaderData *
   return eval;
 }
 
-ccl_device float3 shader_bsdf_transmission(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
       eval += sc->weight;
@@ -434,12 +447,13 @@ ccl_device float3 shader_bsdf_transmission(const KernelGlobals *kg, const Shader
   return eval;
 }
 
-ccl_device float3 shader_bsdf_average_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg,
+                                             ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
     if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
       N += sc->N * fabsf(average(sc->weight));
   }
@@ -447,14 +461,15 @@ ccl_device float3 shader_bsdf_average_normal(const KernelGlobals *kg, const Shad
   return (is_zero(N)) ? sd->N : normalize(N);
 }
 
-ccl_device float3 shader_bsdf_ao_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_ao_normal(ccl_global const KernelGlobals *kg,
+                                        ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
     if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
-      const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+      ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
       N += bsdf->N * fabsf(average(sc->weight));
     }
   }
@@ -463,15 +478,15 @@ ccl_device float3 shader_bsdf_ao_normal(const KernelGlobals *kg, const ShaderDat
 }
 
 #ifdef __SUBSURFACE__
-ccl_device float3 shader_bssrdf_normal(const ShaderData *sd)
+ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSSRDF(sc->type)) {
-      const Bssrdf *bssrdf = (const Bssrdf *)sc;
+      ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
       float avg_weight = fabsf(average(sc->weight));
 
       N += bssrdf->N * avg_weight;
@@ -484,7 +499,9 @@ ccl_device float3 shader_bssrdf_normal(const ShaderData *sd)
 
 /* Constant emission optimization */
 
-ccl_device bool shader_constant_emission_eval(const KernelGlobals *kg, int shader, float3 *eval)
+ccl_device bool shader_constant_emission_eval(ccl_global const KernelGlobals *kg,
+                                              int shader,
+                                              ccl_private float3 *eval)
 {
   int shader_index = shader & SHADER_MASK;
   int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
@@ -502,7 +519,7 @@ ccl_device bool shader_constant_emission_eval(const KernelGlobals *kg, int shade
 
 /* Background */
 
-ccl_device float3 shader_background_eval(const ShaderData *sd)
+ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_EMISSION) {
     return sd->closure_emission_background;
@@ -514,7 +531,7 @@ ccl_device float3 shader_background_eval(const ShaderData *sd)
 
 /* Emission */
 
-ccl_device float3 shader_emissive_eval(const ShaderData *sd)
+ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_EMISSION) {
     return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
@@ -526,7 +543,8 @@ ccl_device float3 shader_emissive_eval(const ShaderData *sd)
 
 /* Holdout */
 
-ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
+ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd)
 {
   float3 weight = zero_float3();
 
@@ -537,7 +555,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
       weight = one_float3() - sd->closure_transparent_extinction;
 
       for (int i = 0; i < sd->num_closure; i++) {
-        ShaderClosure *sc = &sd->closure[i];
+        ccl_private ShaderClosure *sc = &sd->closure[i];
         if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
           sc->type = NBUILTIN_CLOSURES;
         }
@@ -551,7 +569,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
   }
   else {
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_HOLDOUT(sc->type)) {
         weight += sc->weight;
       }
@@ -565,7 +583,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
 
 template<uint node_feature_mask>
 ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
-                                    ShaderData *ccl_restrict sd,
+                                    ccl_private ShaderData *ccl_restrict sd,
                                     ccl_global float *ccl_restrict buffer,
                                     int path_flag)
 {
@@ -604,7 +622,7 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
       sd->flag |= SD_EMISSION;
     }
     else {
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
           sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
       if (bsdf != NULL) {
         bsdf->N = sd->N;
@@ -626,19 +644,20 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
 
 #ifdef __VOLUME__
 
-ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
-                                                        const ShaderVolumePhases *phases,
-                                                        const float3 omega_in,
-                                                        int skip_phase,
-                                                        BsdfEval *result_eval,
-                                                        float sum_pdf,
-                                                        float sum_sample_weight)
+ccl_device_inline float _shader_volume_phase_multi_eval(
+    ccl_private const ShaderData *sd,
+    ccl_private const ShaderVolumePhases *phases,
+    const float3 omega_in,
+    int skip_phase,
+    ccl_private BsdfEval *result_eval,
+    float sum_pdf,
+    float sum_sample_weight)
 {
   for (int i = 0; i < phases->num_closure; i++) {
     if (i == skip_phase)
       continue;
 
-    const ShaderVolumeClosure *svc = &phases->closure[i];
+    ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
     float phase_pdf = 0.0f;
     float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
 
@@ -653,26 +672,26 @@ ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
   return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
 }
 
-ccl_device float shader_volume_phase_eval(const KernelGlobals *kg,
-                                          const ShaderData *sd,
-                                          const ShaderVolumePhases *phases,
+ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
                                           const float3 omega_in,
-                                          BsdfEval *phase_eval)
+                                          ccl_private BsdfEval *phase_eval)
 {
   bsdf_eval_init(phase_eval, false, zero_float3());
 
   return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
 }
 
-ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
-                                          const ShaderData *sd,
-                                          const ShaderVolumePhases *phases,
+ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
                                           float randu,
                                           float randv,
-                                          BsdfEval *phase_eval,
-                                          float3 *omega_in,
-                                          differential3 *domega_in,
-                                          float *pdf)
+                                          ccl_private BsdfEval *phase_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
 {
   int sampled = 0;
 
@@ -681,7 +700,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
     float sum = 0.0f;
 
     for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
       sum += svc->sample_weight;
     }
 
@@ -689,7 +708,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
     float partial_sum = 0.0f;
 
     for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
       float next_sum = partial_sum + svc->sample_weight;
 
       if (r <= next_sum) {
@@ -709,7 +728,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
 
   /* todo: this isn't quite correct, we don't weight anisotropy properly
    * depending on color channels, even if this is perhaps not a common case */
-  const ShaderVolumeClosure *svc = &phases->closure[sampled];
+  ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
   int label;
   float3 eval = zero_float3();
 
@@ -723,15 +742,15 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
   return label;
 }
 
-ccl_device int shader_phase_sample_closure(const KernelGlobals *kg,
-                                           const ShaderData *sd,
-                                           const ShaderVolumeClosure *sc,
+ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd,
+                                           ccl_private const ShaderVolumeClosure *sc,
                                            float randu,
                                            float randv,
-                                           BsdfEval *phase_eval,
-                                           float3 *omega_in,
-                                           differential3 *domega_in,
-                                           float *pdf)
+                                           ccl_private BsdfEval *phase_eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private differential3 *domega_in,
+                                           ccl_private float *pdf)
 {
   int label;
   float3 eval = zero_float3();
@@ -749,7 +768,7 @@ ccl_device int shader_phase_sample_closure(const KernelGlobals *kg,
 
 template<const bool shadow, typename StackReadOp>
 ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
-                                          ShaderData *ccl_restrict sd,
+                                          ccl_private ShaderData *ccl_restrict sd,
                                           const int path_flag,
                                           StackReadOp stack_read)
 {
@@ -824,7 +843,7 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
 
 /* Displacement Evaluation */
 
-ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
+ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_private ShaderData *sd)
 {
   sd->num_closure = 0;
   sd->num_closure_left = 0;
@@ -846,13 +865,14 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ShaderData
 /* Transparent Shadows */
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device bool shader_transparent_shadow(const KernelGlobals *kg, Intersection *isect)
+ccl_device bool shader_transparent_shadow(ccl_global const KernelGlobals *kg,
+                                          ccl_private Intersection *isect)
 {
   return (intersection_get_shader_flags(kg, isect) & SD_HAS_TRANSPARENT_SHADOW) != 0;
 }
 #endif /* __TRANSPARENT_SHADOWS__ */
 
-ccl_device float shader_cryptomatte_id(const KernelGlobals *kg, int shader)
+ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader)
 {
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
 }
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 00457695e53..3a5a11d2c10 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -636,7 +636,7 @@ typedef struct AttributeDescriptor {
   float sample_weight; \
   float3 N
 
-typedef ccl_addr_space struct ccl_align(16) ShaderClosure
+typedef struct ccl_align(16) ShaderClosure
 {
   SHADER_CLOSURE_BASE;
 
@@ -747,7 +747,7 @@ enum ShaderDataObjectFlag {
                      SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
 };
 
-typedef ccl_addr_space struct ccl_align(16) ShaderData
+typedef struct ccl_align(16) ShaderData
 {
   /* position */
   float3 P;
@@ -837,27 +837,28 @@ ShaderData;
 
 /* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
  * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
-typedef ccl_addr_space struct ccl_align(16) ShaderDataTinyStorage
+typedef struct ccl_align(16) ShaderDataTinyStorage
 {
   char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
 }
 ShaderDataTinyStorage;
-#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
+#define AS_SHADER_DATA(shader_data_tiny_storage) \
+  ((ccl_private ShaderData *)shader_data_tiny_storage)
 
 /* Compact volume closures storage.
  *
  * Used for decoupled direct/indirect light closure storage. */
 
-ccl_addr_space struct ShaderVolumeClosure {
+typedef struct ShaderVolumeClosure {
   float3 weight;
   float sample_weight;
   float g;
-};
+} ShaderVolumeClosure;
 
-ccl_addr_space struct ShaderVolumePhases {
+typedef struct ShaderVolumePhases {
   ShaderVolumeClosure closure[MAX_VOLUME_CLOSURE];
   int num_closure;
-};
+} ShaderVolumePhases;
 
 /* Volume Stack */
 
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ad609b15f86..871e370123e 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -44,56 +44,56 @@ CCL_NAMESPACE_BEGIN
 
 /* Stack */
 
-ccl_device_inline float3 stack_load_float3(float *stack, uint a)
+ccl_device_inline float3 stack_load_float3(ccl_private float *stack, uint a)
 {
   kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-  float *stack_a = stack + a;
+  ccl_private float *stack_a = stack + a;
   return make_float3(stack_a[0], stack_a[1], stack_a[2]);
 }
 
-ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f)
+ccl_device_inline void stack_store_float3(ccl_private float *stack, uint a, float3 f)
 {
   kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-  float *stack_a = stack + a;
+  ccl_private float *stack_a = stack + a;
   stack_a[0] = f.x;
   stack_a[1] = f.y;
   stack_a[2] = f.z;
 }
 
-ccl_device_inline float stack_load_float(float *stack, uint a)
+ccl_device_inline float stack_load_float(ccl_private float *stack, uint a)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   return stack[a];
 }
 
-ccl_device_inline float stack_load_float_default(float *stack, uint a, uint value)
+ccl_device_inline float stack_load_float_default(ccl_private float *stack, uint a, uint value)
 {
   return (a == (uint)SVM_STACK_INVALID) ? __uint_as_float(value) : stack_load_float(stack, a);
 }
 
-ccl_device_inline void stack_store_float(float *stack, uint a, float f)
+ccl_device_inline void stack_store_float(ccl_private float *stack, uint a, float f)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   stack[a] = f;
 }
 
-ccl_device_inline int stack_load_int(float *stack, uint a)
+ccl_device_inline int stack_load_int(ccl_private float *stack, uint a)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   return __float_as_int(stack[a]);
 }
 
-ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value)
+ccl_device_inline int stack_load_int_default(ccl_private float *stack, uint a, uint value)
 {
   return (a == (uint)SVM_STACK_INVALID) ? (int)value : stack_load_int(stack, a);
 }
 
-ccl_device_inline void stack_store_int(float *stack, uint a, int i)
+ccl_device_inline void stack_store_int(ccl_private float *stack, uint a, int i)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
@@ -107,14 +107,15 @@ ccl_device_inline bool stack_valid(uint a)
 
 /* Reading Nodes */
 
-ccl_device_inline uint4 read_node(const KernelGlobals *kg, int *offset)
+ccl_device_inline uint4 read_node(ccl_global const KernelGlobals *kg, ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   (*offset)++;
   return node;
 }
 
-ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset)
+ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg,
+                                         ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   float4 f = make_float4(__uint_as_float(node.x),
@@ -125,7 +126,7 @@ ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset)
   return f;
 }
 
-ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset)
+ccl_device_inline float4 fetch_node_float(ccl_global const KernelGlobals *kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return make_float4(__uint_as_float(node.x),
@@ -134,20 +135,26 @@ ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset)
                      __uint_as_float(node.w));
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y)
+ccl_device_forceinline void svm_unpack_node_uchar2(uint i,
+                                                   ccl_private uint *x,
+                                                   ccl_private uint *y)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z)
+ccl_device_forceinline void svm_unpack_node_uchar3(uint i,
+                                                   ccl_private uint *x,
+                                                   ccl_private uint *y,
+                                                   ccl_private uint *z)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
   *z = ((i >> 16) & 0xFF);
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+ccl_device_forceinline void svm_unpack_node_uchar4(
+    uint i, ccl_private uint *x, ccl_private uint *y, ccl_private uint *z, ccl_private uint *w)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 34ac2cb8fbf..092f3817fd8 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -25,7 +25,7 @@ extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONS
 #  else
 ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
 #  endif
-                                                           ShaderData *sd,
+                                                           ccl_private ShaderData *sd,
                                                            float3 N,
                                                            float max_dist,
                                                            int num_samples,
@@ -96,7 +96,10 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
+    svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
+                ccl_private ShaderData *sd,
+                ccl_private float *stack,
+                uint4 node)
 {
   uint flags, dist_offset, normal_offset, out_ao_offset;
   svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
index 26dec9717b3..640bec87ac9 100644
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -26,8 +26,8 @@ ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float
 }
 
 ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
-                                   ShaderData *sd,
-                                   float *stack,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
                                    uint4 node,
                                    ccl_global float *render_buffer)
 {
@@ -44,8 +44,8 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
 }
 
 ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS,
-                                   ShaderData *sd,
-                                   float *stack,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
                                    uint4 node,
                                    ccl_global float *render_buffer)
 {
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 5f94b20af73..9fd401ba1c3 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,11 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Attribute Node */
 
-ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg,
-                                                  ShaderData *sd,
+ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
                                                   uint4 node,
-                                                  NodeAttributeOutputType *type,
-                                                  uint *out_offset)
+                                                  ccl_private NodeAttributeOutputType *type,
+                                                  ccl_private uint *out_offset)
 {
   *out_offset = node.z;
   *type = (NodeAttributeOutputType)node.w;
@@ -48,9 +48,9 @@ ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg,
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline void svm_node_attr(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
@@ -148,9 +148,9 @@ ccl_device_noinline void svm_node_attr(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
@@ -244,9 +244,9 @@ ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dy(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_attr_bump_dy(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index 60302b8e3d7..a76584e6bc8 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -77,7 +77,10 @@ ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
   return x;
 }
 
-ccl_device void svm_bevel_cubic_sample(const float radius, float xi, float *r, float *h)
+ccl_device void svm_bevel_cubic_sample(const float radius,
+                                       float xi,
+                                       ccl_private float *r,
+                                       ccl_private float *h)
 {
   float Rm = radius;
   float r_ = svm_bevel_cubic_quintic_root_find(xi);
@@ -100,7 +103,7 @@ extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_
 #  else
 ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
 #  endif
-                                                               ShaderData *sd,
+                                                               ccl_private ShaderData *sd,
                                                                float radius,
                                                                int num_samples)
 {
@@ -284,7 +287,10 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
+    svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS,
+                   ccl_private ShaderData *sd,
+                   ccl_private float *stack,
+                   uint4 node)
 {
   uint num_samples, radius_offset, normal_offset, out_offset;
   svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 96b3703b954..521afb42adc 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,9 +34,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Blackbody Node */
 
-ccl_device_noinline void svm_node_blackbody(const KernelGlobals *kg,
-                                            ShaderData *sd,
-                                            float *stack,
+ccl_device_noinline void svm_node_blackbody(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
                                             uint temperature_offset,
                                             uint col_offset)
 {
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index dca1b220dd5..29a8350f1c1 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -72,8 +72,11 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p,
   return make_float2(tint, mortar);
 }
 
-ccl_device_noinline int svm_node_tex_brick(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_brick(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index 2ed812acd71..0a44ffe6359 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -17,7 +17,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline void svm_node_brightness(
-    ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
+    ccl_private ShaderData *sd, ccl_private float *stack, uint in_color, uint out_color, uint node)
 {
   uint bright_offset, contrast_offset;
   float3 color = stack_load_float3(stack, in_color);
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index 8672839dbab..70935c730f4 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Eval Nodes */
 
-ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint offset)
 {
   /* save state */
@@ -45,9 +45,9 @@ ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_leave_bump_eval(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_leave_bump_eval(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint offset)
 {
   /* restore state */
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index 40c0edcdad0..2b786757af8 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_camera(const KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         float *stack,
+ccl_device_noinline void svm_node_camera(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
                                          uint out_vector,
                                          uint out_zdepth,
                                          uint out_distance)
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index a9919c9ddc9..e22367f4f59 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -32,9 +32,9 @@ ccl_device float svm_checker(float3 p)
   return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
 }
 
-ccl_device_noinline void svm_node_tex_checker(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline void svm_node_tex_checker(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint4 node)
 {
   uint co_offset, color1_offset, color2_offset, scale_offset;
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
index 656bd31c085..cb5224aebb2 100644
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Clamp Node */
 
-ccl_device_noinline int svm_node_clamp(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline int svm_node_clamp(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint value_stack_offset,
                                        uint parameters_stack_offsets,
                                        uint result_stack_offset,
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index e55f76a4400..87be73bb2cc 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Closure Nodes */
 
-ccl_device void svm_node_glass_setup(
-    ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
+ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
+                                     ccl_private MicrofacetBsdf *bsdf,
+                                     int type,
+                                     float eta,
+                                     float roughness,
+                                     bool refract)
 {
   if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
     if (refract) {
@@ -58,8 +62,12 @@ ccl_device void svm_node_glass_setup(
 }
 
 template<uint node_feature_mask, ShaderType shader_type>
-ccl_device_noinline int svm_node_closure_bsdf(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
+ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node,
+                                              int path_flag,
+                                              int offset)
 {
   uint type, param1_offset, param2_offset;
 
@@ -213,8 +221,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
           float3 diff_weight = weight * base_color * diffuse_weight;
 
-          PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
-              sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+          ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)
+              bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
 
           if (bsdf) {
             bsdf->N = N;
@@ -225,7 +233,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
           }
         }
         else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
-          Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
+          ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
 
           if (bssrdf) {
             bssrdf->radius = subsurface_radius * subsurface;
@@ -247,7 +255,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
         float3 diff_weight = weight * base_color * diffuse_weight;
 
-        PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+        ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
             sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
 
         if (bsdf) {
@@ -273,7 +281,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
 
         float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
 
-        PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc(
+        ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
             sd, sizeof(PrincipledSheenBsdf), sheen_weight);
 
         if (bsdf) {
@@ -292,11 +300,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
           float3 spec_weight = weight * specular_weight;
 
-          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
               sd, sizeof(MicrofacetBsdf), spec_weight);
-          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                        sd, sizeof(MicrofacetExtra)) :
-                                                    NULL;
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
 
           if (bsdf && extra) {
             bsdf->N = N;
@@ -355,11 +364,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #  endif
             {
-              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                   sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
-              MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                            sd, sizeof(MicrofacetExtra)) :
-                                                        NULL;
+              ccl_private MicrofacetExtra *extra =
+                  (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                       sd, sizeof(MicrofacetExtra)) :
+                                   NULL;
 
               if (bsdf && extra) {
                 bsdf->N = N;
@@ -384,7 +394,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
             if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #  endif
             {
-              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                   sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
               if (bsdf) {
                 bsdf->N = N;
@@ -407,11 +417,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             }
           }
           else { /* use multi-scatter GGX */
-            MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+            ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                 sd, sizeof(MicrofacetBsdf), glass_weight);
-            MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                          sd, sizeof(MicrofacetExtra)) :
-                                                      NULL;
+            ccl_private MicrofacetExtra *extra =
+                (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                     sd, sizeof(MicrofacetExtra)) :
+                                 NULL;
 
             if (bsdf && extra) {
               bsdf->N = N;
@@ -440,10 +451,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
 #  endif
         if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
-          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                        sd, sizeof(MicrofacetExtra)) :
-                                                    NULL;
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+              sd, sizeof(MicrofacetBsdf), weight);
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
 
           if (bsdf && extra) {
             bsdf->N = clearcoat_normal;
@@ -471,7 +484,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
 #endif /* __PRINCIPLED__ */
     case CLOSURE_BSDF_DIFFUSE_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      OrenNayarBsdf *bsdf = (OrenNayarBsdf *)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
+      ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
+          sd, sizeof(OrenNayarBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -479,7 +493,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
         float roughness = param1;
 
         if (roughness == 0.0f) {
-          sd->flag |= bsdf_diffuse_setup((DiffuseBsdf *)bsdf);
+          sd->flag |= bsdf_diffuse_setup((ccl_private DiffuseBsdf *)bsdf);
         }
         else {
           bsdf->roughness = roughness;
@@ -490,7 +504,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     }
     case CLOSURE_BSDF_TRANSLUCENT_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -513,7 +528,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
 
       if (!bsdf) {
         break;
@@ -559,7 +575,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
       else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
         kernel_assert(stack_valid(data_node.w));
-        bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+        bsdf->extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(sd,
+                                                                         sizeof(MicrofacetExtra));
         if (bsdf->extra) {
           bsdf->extra->color = stack_load_float3(stack, data_node.w);
           bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
@@ -581,7 +598,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -639,7 +657,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
       {
-        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
             sd, sizeof(MicrofacetBsdf), weight * fresnel);
 
         if (bsdf) {
@@ -655,7 +673,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
       {
-        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
             sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
 
         if (bsdf) {
@@ -675,12 +693,14 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
       if (!bsdf) {
         break;
       }
 
-      MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+      ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+          sd, sizeof(MicrofacetExtra));
       if (!extra) {
         break;
       }
@@ -706,7 +726,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     }
     case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
+      ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
+          sd, sizeof(VelvetBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -724,7 +745,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
 #endif
     case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      ToonBsdf *bsdf = (ToonBsdf *)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
+      ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
+          sd, sizeof(ToonBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -771,11 +793,11 @@ ccl_device_noinline int svm_node_closure_bsdf(
         random = stack_load_float_default(stack, random_ofs, data_node3.y);
       }
 
-      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc(
+      ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc(
           sd, sizeof(PrincipledHairBSDF), weight);
       if (bsdf) {
-        PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
-            sd, sizeof(PrincipledHairExtra));
+        ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)
+            closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
 
         if (!extra)
           break;
@@ -854,7 +876,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
 
-      HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+      ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
+          sd, sizeof(HairBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -889,7 +912,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
     case CLOSURE_BSSRDF_RANDOM_WALK_ID:
     case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+      ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
 
       if (bssrdf) {
         /* disable in case of diffuse ancestor, can't see it well then and
@@ -921,9 +944,9 @@ ccl_device_noinline int svm_node_closure_bsdf(
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
-                                                 ShaderData *sd,
-                                                 float *stack,
+ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
                                                  uint4 node)
 {
 #ifdef __VOLUME__
@@ -958,7 +981,7 @@ ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
 
   /* Add closure for volume scattering. */
   if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
         sd, sizeof(HenyeyGreensteinVolume), weight);
 
     if (volume) {
@@ -976,8 +999,12 @@ ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline int svm_node_principled_volume(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
+ccl_device_noinline int svm_node_principled_volume(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int path_flag,
+                                                   int offset)
 {
 #ifdef __VOLUME__
   uint4 value_node = read_node(kg, &offset);
@@ -1023,7 +1050,7 @@ ccl_device_noinline int svm_node_principled_volume(
     }
 
     /* Add closure for volume scattering. */
-    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
         sd, sizeof(HenyeyGreensteinVolume), color * density);
     if (volume) {
       float anisotropy = (stack_valid(anisotropy_offset)) ?
@@ -1087,7 +1114,9 @@ ccl_device_noinline int svm_node_principled_volume(
   return offset;
 }
 
-ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node)
 {
   uint mix_weight_offset = node.y;
   float3 weight = sd->svm_closure_weight;
@@ -1104,7 +1133,9 @@ ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack,
   emission_setup(sd, weight);
 }
 
-ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd,
+                                                     ccl_private float *stack,
+                                                     uint4 node)
 {
   uint mix_weight_offset = node.y;
   float3 weight = sd->svm_closure_weight;
@@ -1121,7 +1152,9 @@ ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stac
   background_setup(sd, weight);
 }
 
-ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint4 node)
 {
   uint mix_weight_offset = node.y;
 
@@ -1142,26 +1175,28 @@ ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack,
 
 /* Closure Nodes */
 
-ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
+ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight)
 {
   sd->svm_closure_weight = weight;
 }
 
-ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
+ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b)
 {
   float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
+ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint weight_offset)
 {
   float3 weight = stack_load_float3(stack, weight_offset);
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_emission_weight(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint4 node)
 {
   uint color_offset = node.y;
@@ -1173,7 +1208,9 @@ ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg,
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node)
 {
   /* fetch weight from blend input, previous mix closures,
    * and write to stack to be used by closure nodes later */
@@ -1195,8 +1232,11 @@ ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint
 
 /* (Bump) normal */
 
-ccl_device void svm_node_set_normal(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
+ccl_device void svm_node_set_normal(ccl_global const KernelGlobals *kg,
+                                    ccl_private ShaderData *sd,
+                                    ccl_private float *stack,
+                                    uint in_direction,
+                                    uint out_normal)
 {
   float3 normal = stack_load_float3(stack, in_direction);
   sd->N = normal;
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 37d40167ccc..0d53779a5c8 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Conversion Nodes */
 
-ccl_device_noinline void svm_node_convert(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
+ccl_device_noinline void svm_node_convert(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint type,
+                                          uint from,
+                                          uint to)
 {
   switch (type) {
     case NODE_CONVERT_FI: {
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index a1d952173d8..7a3c8a6d36d 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -20,9 +20,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Node */
 
-ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint4 node)
 {
 #ifdef __RAY_DIFFERENTIALS__
@@ -88,18 +88,18 @@ ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg,
 
 /* Displacement Node */
 
-ccl_device void svm_node_set_displacement(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint fac_offset)
 {
   float3 dP = stack_load_float3(stack, fac_offset);
   sd->P += dP;
 }
 
-ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   uint height_offset, midlevel_offset, scale_offset, normal_offset;
@@ -127,8 +127,11 @@ ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg,
   stack_store_float3(stack, node.z, dP);
 }
 
-ccl_device_noinline int svm_node_vector_displacement(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_vector_displacement(ccl_global const KernelGlobals *kg,
+                                                     ccl_private ShaderData *sd,
+                                                     ccl_private float *stack,
+                                                     uint4 node,
+                                                     int offset)
 {
   uint4 data_node = read_node(kg, &offset);
   uint space = data_node.x;
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index b5ecdbe2abf..449ec84370f 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Fresnel Node */
 
-ccl_device_noinline void svm_node_fresnel(
-    ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
+ccl_device_noinline void svm_node_fresnel(ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint ior_offset,
+                                          uint ior_value,
+                                          uint node)
 {
   uint normal_offset, out_offset;
   svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
@@ -37,7 +40,9 @@ ccl_device_noinline void svm_node_fresnel(
 
 /* Layer Weight Node */
 
-ccl_device_noinline void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_layer_weight(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
 {
   uint blend_offset = node.y;
   uint blend_value = node.z;
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index f6fafdee941..7ec6c31065d 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -16,8 +16,11 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_gamma(
-    ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
+ccl_device_noinline void svm_node_gamma(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_gamma,
+                                        uint in_color,
+                                        uint out_color)
 {
   float3 color = stack_load_float3(stack, in_color);
   float gamma = stack_load_float(stack, in_gamma);
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index 432529eb061..a94464d3a52 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Geometry Node */
 
-ccl_device_noinline void svm_node_geometry(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint type,
+                                           uint out_offset)
 {
   float3 data;
 
@@ -51,8 +54,11 @@ ccl_device_noinline void svm_node_geometry(
   stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dx(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -75,8 +81,11 @@ ccl_device_noinline void svm_node_geometry_bump_dx(
 #endif
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dy(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -101,8 +110,11 @@ ccl_device_noinline void svm_node_geometry_bump_dy(
 
 /* Object Info */
 
-ccl_device_noinline void svm_node_object_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint type,
+                                              uint out_offset)
 {
   float data;
 
@@ -140,8 +152,11 @@ ccl_device_noinline void svm_node_object_info(
 
 /* Particle Info */
 
-ccl_device_noinline void svm_node_particle_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint type,
+                                                uint out_offset)
 {
   switch (type) {
     case NODE_INFO_PAR_INDEX: {
@@ -199,8 +214,11 @@ ccl_device_noinline void svm_node_particle_info(
 
 /* Hair Info */
 
-ccl_device_noinline void svm_node_hair_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_hair_info(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
+                                            uint type,
+                                            uint out_offset)
 {
   float data;
   float3 data3;
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index cd15f7097e7..8cc37be606f 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -60,7 +60,9 @@ ccl_device float svm_gradient(float3 p, NodeGradientType type)
   return 0.0f;
 }
 
-ccl_device_noinline void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
 {
   uint type, co_offset, color_offset, fac_offset;
 
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 6f49a8385aa..feb85eda122 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,9 +19,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_hsv(const KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
+ccl_device_noinline void svm_node_hsv(ccl_global const KernelGlobals *kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
                                       uint4 node)
 {
   uint in_color_offset, fac_offset, out_color_offset;
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 9c13734ecf0..7d41205c9ef 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 /* IES Light */
 
 ccl_device_inline float interpolate_ies_vertical(
-    const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+    ccl_global const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
 {
   /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
    * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
@@ -39,7 +39,7 @@ ccl_device_inline float interpolate_ies_vertical(
   return cubic_interp(a, b, c, d, v_frac);
 }
 
-ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg,
+ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg,
                                           int slot,
                                           float h_angle,
                                           float v_angle)
@@ -98,9 +98,9 @@ ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg,
   return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
 }
 
-ccl_device_noinline void svm_node_ies(const KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
+ccl_device_noinline void svm_node_ies(ccl_global const KernelGlobals *kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
                                       uint4 node)
 {
   uint vector_offset, strength_offset, fac_offset, slot = node.z;
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index ce70109392b..2de80d5fc29 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,7 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4 svm_image_texture(const KernelGlobals *kg, int id, float x, float y, uint flags)
+ccl_device float4
+svm_image_texture(ccl_global const KernelGlobals *kg, int id, float x, float y, uint flags)
 {
   if (id == -1) {
     return make_float4(
@@ -44,8 +45,11 @@ ccl_device_inline float3 texco_remap_square(float3 co)
   return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
 }
 
-ccl_device_noinline int svm_node_tex_image(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint co_offset, out_offset, alpha_offset, flags;
 
@@ -117,9 +121,9 @@ ccl_device_noinline int svm_node_tex_image(
   return offset;
 }
 
-ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg,
-                                                ShaderData *sd,
-                                                float *stack,
+ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
                                                 uint4 node)
 {
   /* get object space normal */
@@ -219,9 +223,9 @@ ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg,
     stack_store_float(stack, alpha_offset, f.w);
 }
 
-ccl_device_noinline void svm_node_tex_environment(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_tex_environment(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint4 node)
 {
   uint id = node.y;
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 27cdaaff473..60668ec00f1 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -21,8 +21,11 @@ ccl_device float invert(float color, float factor)
   return factor * (1.0f - color) + (1.0f - factor) * color;
 }
 
-ccl_device_noinline void svm_node_invert(
-    ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
+ccl_device_noinline void svm_node_invert(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint in_fac,
+                                         uint in_color,
+                                         uint out_color)
 {
   float factor = stack_load_float(stack, in_fac);
   float3 color = stack_load_float3(stack, in_color);
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 49fabad1cc5..aaff8376c7c 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -19,8 +19,8 @@ CCL_NAMESPACE_BEGIN
 /* Light Path Node */
 
 ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
-                                             const ShaderData *sd,
-                                             float *stack,
+                                             ccl_private const ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint type,
                                              uint out_offset,
                                              int path_flag)
@@ -106,7 +106,9 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
 
 /* Light Falloff Node */
 
-ccl_device_noinline void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_light_falloff(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint4 node)
 {
   uint strength_offset, out_offset, smooth_offset;
 
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 8784c760860..4c4f3bcf523 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -87,8 +87,11 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
   return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
 }
 
-ccl_device_noinline int svm_node_tex_magic(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_magic(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint depth;
   uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
index c8684981e31..f4f7d3ca76f 100644
--- a/intern/cycles/kernel/svm/svm_map_range.h
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -24,9 +24,9 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x)
   return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
 }
 
-ccl_device_noinline int svm_node_map_range(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline int svm_node_map_range(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint value_stack_offset,
                                            uint parameters_stack_offsets,
                                            uint results_stack_offsets,
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index fcc724405f5..8102afc637e 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Mapping Node */
 
-ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint type,
                                           uint inputs_stack_offsets,
                                           uint result_stack_offset)
@@ -43,9 +43,9 @@ ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg,
 
 /* Texture Mapping */
 
-ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg,
-                                                 ShaderData *sd,
-                                                 float *stack,
+ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
                                                  uint vec_offset,
                                                  uint out_offset,
                                                  int offset)
@@ -62,9 +62,9 @@ ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_min_max(const KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         float *stack,
+ccl_device_noinline int svm_node_min_max(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
                                          uint vec_offset,
                                          uint out_offset,
                                          int offset)
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 99e7a8f2bda..3897a453873 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_math(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint type,
                                        uint inputs_stack_offsets,
                                        uint result_stack_offset)
@@ -34,9 +34,9 @@ ccl_device_noinline void svm_node_math(const KernelGlobals *kg,
   stack_store_float(stack, result_stack_offset, result);
 }
 
-ccl_device_noinline int svm_node_vector_math(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_vector_math(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint type,
                                              uint inputs_stack_offsets,
                                              uint outputs_stack_offsets,
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 11b1e8f57f8..d3225b55ef0 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -16,8 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_vector_math(float *value,
-                                float3 *vector,
+ccl_device void svm_vector_math(ccl_private float *value,
+                                ccl_private float3 *vector,
                                 NodeVectorMathType type,
                                 float3 a,
                                 float3 b,
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 3e38080977f..0064c5e643c 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Node */
 
-ccl_device_noinline int svm_node_mix(const KernelGlobals *kg,
-                                     ShaderData *sd,
-                                     float *stack,
+ccl_device_noinline int svm_node_mix(ccl_global const KernelGlobals *kg,
+                                     ccl_private ShaderData *sd,
+                                     ccl_private float *stack,
                                      uint fac_offset,
                                      uint c1_offset,
                                      uint c2_offset,
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 03a8b68b3ef..8523f45b95f 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -700,9 +700,9 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
   return value;
 }
 
-ccl_device_noinline int svm_node_tex_musgrave(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline int svm_node_tex_musgrave(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint offsets1,
                                               uint offsets2,
                                               uint offsets3,
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 29b262ac06e..61da8227efa 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -55,8 +55,8 @@ ccl_device void noise_texture_1d(float co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float p = co;
   if (distortion != 0.0f) {
@@ -76,8 +76,8 @@ ccl_device void noise_texture_2d(float2 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float2 p = co;
   if (distortion != 0.0f) {
@@ -98,8 +98,8 @@ ccl_device void noise_texture_3d(float3 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float3 p = co;
   if (distortion != 0.0f) {
@@ -121,8 +121,8 @@ ccl_device void noise_texture_4d(float4 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float4 p = co;
   if (distortion != 0.0f) {
@@ -140,9 +140,9 @@ ccl_device void noise_texture_4d(float4 co,
   }
 }
 
-ccl_device_noinline int svm_node_tex_noise(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline int svm_node_tex_noise(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint dimensions,
                                            uint offsets1,
                                            uint offsets2,
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index 724b5f281f9..0d1b4200d54 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_normal(const KernelGlobals *kg,
-                                        ShaderData *sd,
-                                        float *stack,
+ccl_device_noinline int svm_node_normal(ccl_global const KernelGlobals *kg,
+                                        ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
                                         uint in_normal_offset,
                                         uint out_normal_offset,
                                         uint out_dot_offset,
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 563e5bcb5e4..ef8b0d103c1 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,13 +21,13 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device_inline float fetch_float(const KernelGlobals *kg, int offset)
+ccl_device_inline float fetch_float(ccl_global const KernelGlobals *kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return __uint_as_float(node.x);
 }
 
-ccl_device_inline float float_ramp_lookup(const KernelGlobals *kg,
+ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg,
                                           int offset,
                                           float f,
                                           bool interpolate,
@@ -63,7 +63,7 @@ ccl_device_inline float float_ramp_lookup(const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
+ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg,
                                          int offset,
                                          float f,
                                          bool interpolate,
@@ -99,8 +99,11 @@ ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_noinline int svm_node_rgb_ramp(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint4 node,
+                                          int offset)
 {
   uint fac_offset, color_offset, alpha_offset;
   uint interpolate = node.z;
@@ -121,8 +124,11 @@ ccl_device_noinline int svm_node_rgb_ramp(
   return offset;
 }
 
-ccl_device_noinline int svm_node_curves(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg,
+                                        ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint4 node,
+                                        int offset)
 {
   uint fac_offset, color_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
@@ -147,8 +153,11 @@ ccl_device_noinline int svm_node_curves(
   return offset;
 }
 
-ccl_device_noinline int svm_node_curve(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_curve(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
+                                       uint4 node,
+                                       int offset)
 {
   uint fac_offset, value_in_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 8d52845ea3d..3cd4ba87a55 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint hue_in,
                                              uint saturation_in,
                                              uint value_in,
@@ -39,9 +39,9 @@ ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_separate_hsv(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline int svm_node_separate_hsv(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint color_in,
                                               uint hue_out,
                                               uint saturation_out,
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
index cbf77f1e640..11e440f2cbf 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector combine / separate, used for the RGB and XYZ nodes */
 
-ccl_device void svm_node_combine_vector(
-    ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_combine_vector(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_offset,
+                                        uint vector_index,
+                                        uint out_offset)
 {
   float vector = stack_load_float(stack, in_offset);
 
@@ -27,8 +30,11 @@ ccl_device void svm_node_combine_vector(
     stack_store_float(stack, out_offset + vector_index, vector);
 }
 
-ccl_device void svm_node_separate_vector(
-    ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_separate_vector(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint ivector_offset,
+                                         uint vector_index,
+                                         uint out_offset)
 {
   float3 vector = stack_load_float3(stack, ivector_offset);
 
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index b77c4311e72..04db8109170 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -28,7 +28,7 @@ ccl_device float sky_angle_between(float thetav, float phiv, float theta, float
  * "A Practical Analytic Model for Daylight"
  * A. J. Preetham, Peter Shirley, Brian Smits
  */
-ccl_device float sky_perez_function(float *lam, float theta, float gamma)
+ccl_device float sky_perez_function(ccl_private float *lam, float theta, float gamma)
 {
   float ctheta = cosf(theta);
   float cgamma = cosf(gamma);
@@ -37,16 +37,16 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma)
          (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
 }
 
-ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_preetham(ccl_global const KernelGlobals *kg,
                                         float3 dir,
                                         float sunphi,
                                         float suntheta,
                                         float radiance_x,
                                         float radiance_y,
                                         float radiance_z,
-                                        float *config_x,
-                                        float *config_y,
-                                        float *config_z)
+                                        ccl_private float *config_x,
+                                        ccl_private float *config_y,
+                                        ccl_private float *config_z)
 {
   /* convert vector to spherical coordinates */
   float2 spherical = direction_to_spherical(dir);
@@ -73,7 +73,7 @@ ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg,
  * "An Analytic Model for Full Spectral Sky-Dome Radiance"
  * Lukas Hosek, Alexander Wilkie
  */
-ccl_device float sky_radiance_internal(float *configuration, float theta, float gamma)
+ccl_device float sky_radiance_internal(ccl_private float *configuration, float theta, float gamma)
 {
   float ctheta = cosf(theta);
   float cgamma = cosf(gamma);
@@ -90,16 +90,16 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
           configuration[6] * mieM + configuration[7] * zenith);
 }
 
-ccl_device float3 sky_radiance_hosek(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_hosek(ccl_global const KernelGlobals *kg,
                                      float3 dir,
                                      float sunphi,
                                      float suntheta,
                                      float radiance_x,
                                      float radiance_y,
                                      float radiance_z,
-                                     float *config_x,
-                                     float *config_y,
-                                     float *config_z)
+                                     ccl_private float *config_x,
+                                     ccl_private float *config_y,
+                                     ccl_private float *config_z)
 {
   /* convert vector to spherical coordinates */
   float2 spherical = direction_to_spherical(dir);
@@ -127,9 +127,9 @@ ccl_device float3 geographical_to_direction(float lat, float lon)
   return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
 }
 
-ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg,
                                        float3 dir,
-                                       float *nishita_data,
+                                       ccl_private float *nishita_data,
                                        uint texture_id)
 {
   /* definitions */
@@ -209,8 +209,11 @@ ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg,
   return xyz_to_rgb(kg, xyz);
 }
 
-ccl_device_noinline int svm_node_tex_sky(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_sky(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint4 node,
+                                         int offset)
 {
   /* Load data */
   uint dir_offset = node.y;
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 8869001015b..295d5e9f65b 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -22,8 +22,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Texture Coordinate Node */
 
-ccl_device_noinline int svm_node_tex_coord(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           int path_flag,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   float3 data;
   uint type = node.y;
@@ -99,8 +103,12 @@ ccl_device_noinline int svm_node_tex_coord(
   return offset;
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dx(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   int path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -180,8 +188,12 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(
 #endif
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dy(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   int path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -261,9 +273,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(
 #endif
 }
 
-ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint4 node)
 {
   uint color_offset, strength_offset, normal_offset, space;
@@ -354,9 +366,9 @@ ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
   stack_store_float3(stack, normal_offset, N);
 }
 
-ccl_device_noinline void svm_node_tangent(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device_noinline void svm_node_tangent(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint4 node)
 {
   uint tangent_offset, direction_type, axis;
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index d0478660094..d1038bc072d 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -18,14 +18,20 @@ CCL_NAMESPACE_BEGIN
 
 /* Value Nodes */
 
-ccl_device void svm_node_value_f(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
+ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private float *stack,
+                                 uint ivalue,
+                                 uint out_offset)
 {
   stack_store_float(stack, out_offset, __uint_as_float(ivalue));
 }
 
-ccl_device int svm_node_value_v(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int offset)
+ccl_device int svm_node_value_v(ccl_global const KernelGlobals *kg,
+                                ccl_private ShaderData *sd,
+                                ccl_private float *stack,
+                                uint out_offset,
+                                int offset)
 {
   /* read extra data */
   uint4 node1 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
index 55e1bce0158..c20f9b2556f 100644
--- a/intern/cycles/kernel/svm/svm_vector_rotate.h
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Rotate */
 
-ccl_device_noinline void svm_node_vector_rotate(ShaderData *sd,
-                                                float *stack,
+ccl_device_noinline void svm_node_vector_rotate(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
                                                 uint input_stack_offsets,
                                                 uint axis_stack_offsets,
                                                 uint result_stack_offset)
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 8aedb7e0f54..b6c898c3952 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Transform */
 
-ccl_device_noinline void svm_node_vector_transform(const KernelGlobals *kg,
-                                                   ShaderData *sd,
-                                                   float *stack,
+ccl_device_noinline void svm_node_vector_transform(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
                                                    uint4 node)
 {
   uint itype, ifrom, ito;
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
index 986ea244f3a..3641f05ca43 100644
--- a/intern/cycles/kernel/svm/svm_vertex_color.h
+++ b/intern/cycles/kernel/svm/svm_vertex_color.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint layer_id,
                                                uint color_offset,
                                                uint alpha_offset)
@@ -35,9 +35,9 @@ ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg,
-                                                       ShaderData *sd,
-                                                       float *stack,
+ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGlobals *kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
                                                        uint layer_id,
                                                        uint color_offset,
                                                        uint alpha_offset)
@@ -56,9 +56,9 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dy(const KernelGlobals *kg,
-                                                       ShaderData *sd,
-                                                       float *stack,
+ccl_device_noinline void svm_node_vertex_color_bump_dy(ccl_global const KernelGlobals *kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
                                                        uint layer_id,
                                                        uint color_offset,
                                                        uint alpha_offset)
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index b1d2eff7f37..e7112087e17 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -46,9 +46,9 @@ ccl_device void voronoi_f1_1d(float w,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float *outW)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -76,9 +76,9 @@ ccl_device void voronoi_smooth_f1_1d(float w,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float *outW)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -108,9 +108,9 @@ ccl_device void voronoi_f2_1d(float w,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float *outW)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -144,7 +144,9 @@ ccl_device void voronoi_f2_1d(float w,
   *outW = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_1d(float w, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_1d(float w,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -158,7 +160,7 @@ ccl_device void voronoi_distance_to_edge_1d(float w, float randomness, float *ou
   *outDistance = min(distanceToMidLeft, distanceToMidRight);
 }
 
-ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, ccl_private float *outRadius)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -223,9 +225,9 @@ ccl_device void voronoi_f1_2d(float2 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float2 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -256,9 +258,9 @@ ccl_device void voronoi_smooth_f1_2d(float2 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float2 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -291,9 +293,9 @@ ccl_device void voronoi_f2_2d(float2 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float2 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -330,7 +332,9 @@ ccl_device void voronoi_f2_2d(float2 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_2d(float2 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_2d(float2 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -369,7 +373,9 @@ ccl_device void voronoi_distance_to_edge_2d(float2 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_2d(float2 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_2d(float2 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -441,9 +447,9 @@ ccl_device void voronoi_f1_3d(float3 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float3 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -477,9 +483,9 @@ ccl_device void voronoi_smooth_f1_3d(float3 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float3 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -515,9 +521,9 @@ ccl_device void voronoi_f2_3d(float3 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float3 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -557,7 +563,9 @@ ccl_device void voronoi_f2_3d(float3 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_3d(float3 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_3d(float3 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -600,7 +608,9 @@ ccl_device void voronoi_distance_to_edge_3d(float3 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_3d(float3 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_3d(float3 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -676,9 +686,9 @@ ccl_device void voronoi_f1_4d(float4 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float4 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -715,9 +725,9 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float4 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -756,9 +766,9 @@ ccl_device void voronoi_f2_4d(float4 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float4 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -801,7 +811,9 @@ ccl_device void voronoi_f2_4d(float4 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_4d(float4 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -850,7 +862,9 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_4d(float4 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -903,9 +917,9 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline int svm_node_tex_voronoi(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint dimensions,
                                              uint feature,
                                              uint metric,
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 78b75405356..764fb71ba72 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,8 +19,11 @@ CCL_NAMESPACE_BEGIN
 /* TODO(sergey): Think of making it more generic volume-type attribute
  * sampler.
  */
-ccl_device_noinline int svm_node_tex_voxel(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_voxel(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint co_offset, density_out_offset, color_out_offset, space;
   svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 00f980c16df..1ac130e2006 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -82,8 +82,11 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
   }
 }
 
-ccl_device_noinline int svm_node_tex_wave(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_wave(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint4 node,
+                                          int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index aa291fd2741..e891744f276 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
 
-ccl_device_noinline void svm_node_wavelength(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
+ccl_device_noinline void svm_node_wavelength(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint wavelength,
+                                             uint color_out)
 {
   // CIE colour matching functions xBar, yBar, and zBar for
   //   wavelengths from 380 through 780 nanometers, every 5
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
index 0306d2e7b9c..ccc49bf1a7c 100644
--- a/intern/cycles/kernel/svm/svm_white_noise.h
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_tex_white_noise(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_tex_white_noise(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint dimensions,
                                                   uint inputs_stack_offsets,
                                                   uint ouptuts_stack_offsets)
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 7ec913789d2..70d1211aa4a 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device_inline float wireframe(
-    const KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
+ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg,
+                                  ccl_private ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  ccl_private float3 *P)
 {
 #ifdef __HAIR__
   if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
@@ -88,9 +91,9 @@ ccl_device_inline float wireframe(
   return 0.0f;
 }
 
-ccl_device_noinline void svm_node_wireframe(const KernelGlobals *kg,
-                                            ShaderData *sd,
-                                            float *stack,
+ccl_device_noinline void svm_node_wireframe(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
                                             uint4 node)
 {
   uint in_size = node.y;
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 7b67b90e44d..361c36d9061 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -277,7 +277,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
 #endif
 }
 
-ccl_device float3 color_highlight_compress(float3 color, float3 *variance)
+ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance)
 {
   color += one_float3();
   if (variance) {
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index f36a492a1b0..81723abe1e2 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -61,7 +61,7 @@ struct half4 {
 
 #if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
 
-ccl_device_inline void float4_store_half(half *h, float4 f)
+ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
 {
   h[0] = __float2half(f.x);
   h[1] = __float2half(f.y);
@@ -71,7 +71,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
 
 #else
 
-ccl_device_inline void float4_store_half(half *h, float4 f)
+ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
 {
 
 #  ifndef __KERNEL_SSE2__
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index cb1e94c838c..f834011a032 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -338,7 +338,7 @@ ccl_device_inline int quick_floor_to_int(float x)
   return float_to_int(x) - ((x < 0) ? 1 : 0);
 }
 
-ccl_device_inline float floorfrac(float x, int *i)
+ccl_device_inline float floorfrac(float x, ccl_private int *i)
 {
   *i = quick_floor_to_int(x);
   return x - *i;
@@ -465,14 +465,18 @@ template<class A, class B> A lerp(const A &a, const A &b, const B &t)
 
 /* Triangle */
 
-ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
+ccl_device_inline float triangle_area(ccl_private const float3 &v1,
+                                      ccl_private const float3 &v2,
+                                      ccl_private const float3 &v3)
 {
   return len(cross(v3 - v2, v1 - v2)) * 0.5f;
 }
 
 /* Orthonormal vectors */
 
-ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
+ccl_device_inline void make_orthonormals(const float3 N,
+                                         ccl_private float3 *a,
+                                         ccl_private float3 *b)
 {
 #if 0
   if (fabsf(N.y) >= 0.999f) {
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index 38afa163db5..cc924f36a71 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -156,7 +156,7 @@ ccl_device float fast_cosf(float x)
   return u;
 }
 
-ccl_device void fast_sincosf(float x, float *sine, float *cosine)
+ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
 {
   /* Same argument reduction as fast_sin. */
   int q = fast_rint(x * M_1_PI_F);
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 70b80c33544..25eda840214 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -207,7 +207,7 @@ ccl_device_inline float2 normalize(const float2 &a)
   return a / len(a);
 }
 
-ccl_device_inline float2 normalize_len(const float2 &a, float *t)
+ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t)
 {
   *t = len(a);
   return a / (*t);
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 30a1b4c3f77..c3230a8068c 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -411,7 +411,7 @@ ccl_device_inline float3 saturate3(float3 a)
   return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
 }
 
-ccl_device_inline float3 normalize_len(const float3 a, float *t)
+ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
 {
   *t = len(a);
   float x = 1.0f / *t;
@@ -424,7 +424,7 @@ ccl_device_inline float3 safe_normalize(const float3 a)
   return (t != 0.0f) ? a * (1.0f / t) : a;
 }
 
-ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
+ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
 {
   *t = len(a);
   return (*t != 0.0f) ? a / (*t) : a;
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index 19af5c8c638..f30a78cfc69 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -497,7 +497,7 @@ ccl_device_inline float4 reduce_max(const float4 &a)
 #  endif
 }
 
-ccl_device_inline float4 load_float4(const float *v)
+ccl_device_inline float4 load_float4(ccl_private const float *v)
 {
 #  ifdef __KERNEL_SSE__
   return float4(_mm_loadu_ps(v));
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index fd0c9124345..0c431a36afb 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -26,8 +26,8 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
                                      float ray_t,
                                      float3 sphere_P,
                                      float sphere_radius,
-                                     float3 *isect_P,
-                                     float *isect_t)
+                                     ccl_private float3 *isect_P,
+                                     ccl_private float *isect_t)
 {
   const float3 d = sphere_P - ray_P;
   const float radiussq = sphere_radius * sphere_radius;
@@ -60,8 +60,8 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
                                            float ray_t,
                                            float3 disk_P,
                                            float disk_radius,
-                                           float3 *isect_P,
-                                           float *isect_t)
+                                           ccl_private float3 *isect_P,
+                                           ccl_private float *isect_t)
 {
   /* Aligned disk normal. */
   float disk_t;
@@ -95,9 +95,9 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
                                                    const float3 tri_b,
                                                    const float3 tri_c,
 #endif
-                                                   float *isect_u,
-                                                   float *isect_v,
-                                                   float *isect_t)
+                                                   ccl_private float *isect_u,
+                                                   ccl_private float *isect_v,
+                                                   ccl_private float *isect_t)
 {
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
   typedef ssef float3;
@@ -207,10 +207,10 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
                                    float3 quad_u,
                                    float3 quad_v,
                                    float3 quad_n,
-                                   float3 *isect_P,
-                                   float *isect_t,
-                                   float *isect_u,
-                                   float *isect_v,
+                                   ccl_private float3 *isect_P,
+                                   ccl_private float *isect_t,
+                                   ccl_private float *isect_u,
+                                   ccl_private float *isect_v,
                                    bool ellipse)
 {
   /* Perform intersection test. */
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index 123736f75a6..bff7ddb4cee 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -35,14 +35,14 @@ CCL_NAMESPACE_BEGIN
 
 /* Zeroing helpers. */
 
-ccl_device_inline void math_vector_zero(float *v, int n)
+ccl_device_inline void math_vector_zero(ccl_private float *v, int n)
 {
   for (int i = 0; i < n; i++) {
     v[i] = 0.0f;
   }
 }
 
-ccl_device_inline void math_matrix_zero(float *A, int n)
+ccl_device_inline void math_matrix_zero(ccl_private float *A, int n)
 {
   for (int row = 0; row < n; row++) {
     for (int col = 0; col <= row; col++) {
@@ -53,14 +53,18 @@ ccl_device_inline void math_matrix_zero(float *A, int n)
 
 /* Elementary vector operations. */
 
-ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_add(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] += b[i];
   }
 }
 
-ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_mul(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] *= b[i];
@@ -68,7 +72,7 @@ ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, in
 }
 
 ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
-                                               const float *ccl_restrict b,
+                                               ccl_private const float *ccl_restrict b,
                                                int astride,
                                                int n)
 {
@@ -77,21 +81,23 @@ ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
   }
 }
 
-ccl_device_inline void math_vector_scale(float *a, float b, int n)
+ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] *= b;
   }
 }
 
-ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_max(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] = max(a[i], b[i]);
   }
 }
 
-ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w)
+ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w)
 {
   for (int i = 0; i < n; i++) {
     v[i] += w * x[i];
@@ -99,7 +105,7 @@ ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w)
 }
 
 ccl_device_inline void math_vec3_add_strided(
-    ccl_global float3 *v, int n, float *x, float3 w, int stride)
+    ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride)
 {
   for (int i = 0; i < n; i++) {
     ccl_global float *elem = (ccl_global float *)(v + i * stride);
@@ -125,9 +131,9 @@ ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
 
 /* Add Gramian matrix of v to A.
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian(float *A,
+ccl_device_inline void math_matrix_add_gramian(ccl_private float *A,
                                                int n,
-                                               const float *ccl_restrict v,
+                                               ccl_private const float *ccl_restrict v,
                                                float weight)
 {
   for (int row = 0; row < n; row++) {
@@ -140,7 +146,7 @@ ccl_device_inline void math_matrix_add_gramian(float *A,
 /* Add Gramian matrix of v to A.
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
 ccl_device_inline void math_trimatrix_add_gramian_strided(
-    ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride)
+    ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride)
 {
   for (int row = 0; row < n; row++) {
     for (int col = 0; col <= row; col++) {
@@ -151,7 +157,7 @@ ccl_device_inline void math_trimatrix_add_gramian_strided(
 
 ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A,
                                                   int n,
-                                                  const float *ccl_restrict v,
+                                                  ccl_private const float *ccl_restrict v,
                                                   float weight)
 {
   for (int row = 0; row < n; row++) {
@@ -244,7 +250,7 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
  * and V will contain the eigenvectors of the original A in its rows (!),
  * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A.
  */
-ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
+ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A,
                                                       ccl_global float *V,
                                                       int n,
                                                       int v_stride)
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
index 9c7e0061c82..04b4574d75b 100644
--- a/intern/cycles/util/util_projection.h
+++ b/intern/cycles/util/util_projection.h
@@ -45,7 +45,8 @@ typedef struct PerspectiveMotionTransform {
 
 /* Functions */
 
-ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, const float3 a)
+ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t,
+                                               const float3 a)
 {
   float4 b = make_float4(a.x, a.y, a.z, 1.0f);
   float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
@@ -54,7 +55,7 @@ ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, con
   return (w != 0.0f) ? c / w : zero_float3();
 }
 
-ccl_device_inline float3 transform_perspective_direction(const ProjectionTransform *t,
+ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t,
                                                          const float3 a)
 {
   float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h
index 36f02a01f7b..32df9327cbd 100644
--- a/intern/cycles/util/util_rect.h
+++ b/intern/cycles/util/util_rect.h
@@ -54,7 +54,10 @@ ccl_device_inline int coord_to_local_index(int4 rect, int x, int y)
 
 /* Finds the coordinates of a pixel given by its row-major index in the rect,
  * and returns whether the pixel is inside it. */
-ccl_device_inline bool local_index_to_coord(int4 rect, int idx, int *x, int *y)
+ccl_device_inline bool local_index_to_coord(int4 rect,
+                                            int idx,
+                                            ccl_private int *x,
+                                            ccl_private int *y)
 {
   int w = rect.z - rect.x;
   *x = (idx % w) + rect.x;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index e9cd3b0b483..fc04f9aab46 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -53,7 +53,7 @@ typedef struct DecomposedTransform {
 
 /* Functions */
 
-ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a)
 {
   /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
@@ -82,7 +82,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 #endif
 }
 
-ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a)
 {
 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
   ssef x, y, z, w, aa;
@@ -108,7 +108,8 @@ ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
 #endif
 }
 
-ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t,
+                                                        const float3 a)
 {
   float3 x = make_float3(t->x.x, t->y.x, t->z.x);
   float3 y = make_float3(t->x.y, t->y.y, t->z.y);
@@ -409,7 +410,8 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
   return R;
 }
 
-ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
+ccl_device_inline void transform_compose(ccl_private Transform *tfm,
+                                         ccl_private const DecomposedTransform *decomp)
 {
   /* rotation */
   float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
@@ -449,7 +451,7 @@ ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransfo
 
 /* Interpolate from array of decomposed transforms. */
 ccl_device void transform_motion_array_interpolate(Transform *tfm,
-                                                   const ccl_global DecomposedTransform *motion,
+                                                   const DecomposedTransform *motion,
                                                    uint numsteps,
                                                    float time)
 {
@@ -458,8 +460,8 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
   int step = min((int)(time * maxstep), maxstep - 1);
   float t = time * maxstep - step;
 
-  const ccl_global DecomposedTransform *a = motion + step;
-  const ccl_global DecomposedTransform *b = motion + step + 1;
+  const DecomposedTransform *a = motion + step;
+  const DecomposedTransform *b = motion + step + 1;
 
   /* Interpolate rotation, translation and scale. */
   DecomposedTransform decomp;
@@ -472,12 +474,12 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
   transform_compose(tfm, &decomp);
 }
 
-ccl_device_inline bool transform_isfinite_safe(Transform *tfm)
+ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm)
 {
   return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
 }
 
-ccl_device_inline bool transform_decomposed_isfinite_safe(DecomposedTransform *decomp)
+ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp)
 {
   return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
          isfinite4_safe(decomp->w);
author	Michael Jones <michael_p_jones@apple.com>	2021-10-14 15:53:40 +0300
committer	Michael Jones <michael_p_jones@apple.com>	2021-10-14 18:14:43 +0300
commit	a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree	0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles
parent	47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)