148 files changed, 2146 insertions, 1648 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0b44cc5db34..8f6dcd0adb9 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -139,7 +139,7 @@ CCL_NAMESPACE_BEGIN
 
 #endif /* __KERNEL_OPTIX__ */
 
-ccl_device_inline bool scene_intersect_valid(const Ray *ray)
+ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
 {
   /* NOTE: Due to some vectorization code  non-finite origin point might
    * cause lots of false-positive intersections which will overflow traversal
@@ -154,10 +154,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
 }
 
-ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
-                                          const Ray *ray,
+ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg,
+                                          ccl_private const Ray *ray,
                                           const uint visibility,
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
 #ifdef __KERNEL_OPTIX__
   uint p0 = 0;
@@ -248,11 +248,11 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
 }
 
 #ifdef __BVH_LOCAL__
-ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
-                                                const Ray *ray,
-                                                LocalIntersection *local_isect,
+ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
                                                 int local_object,
-                                                uint *lcg_state,
+                                                ccl_private uint *lcg_state,
                                                 int max_hits)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -360,12 +360,12 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
 #endif
 
 #ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      uint visibility,
                                                      uint max_hits,
-                                                     uint *num_hits)
+                                                     ccl_private uint *num_hits)
 {
 #  ifdef __KERNEL_OPTIX__
   uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
@@ -445,9 +445,9 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
 #endif /* __SHADOW_RECORD_ALL__ */
 
 #ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
-                                                 const Ray *ray,
-                                                 Intersection *isect,
+ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
                                                  const uint visibility)
 {
 #  ifdef __KERNEL_OPTIX__
@@ -507,9 +507,9 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
 #endif /* __VOLUME__ */
 
 #ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_intersect uint scene_intersect_volume_all(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint max_hits,
                                                      const uint visibility)
 {
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 90b9f410b29..78ad4a34da9 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -36,11 +36,11 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     LocalIntersection *local_isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private LocalIntersection *local_isect,
                                      int local_object,
-                                     uint *lcg_state,
+                                     ccl_private uint *lcg_state,
                                      int max_hits)
 {
   /* todo:
@@ -196,11 +196,11 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         LocalIntersection *local_isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private LocalIntersection *local_isect,
                                          int local_object,
-                                         uint *lcg_state,
+                                         ccl_private uint *lcg_state,
                                          int max_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 15cd0f22213..49b37f39671 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -16,7 +16,7 @@
 
 // TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
 // 3-vector which might be faster.
-ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlobals *kg,
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const KernelGlobals *kg,
                                                                 int node_addr,
                                                                 int child)
 {
@@ -28,7 +28,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(const KernelGlob
   return space;
 }
 
-ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                       const float3 P,
                                                       const float3 idir,
                                                       const float t,
@@ -76,7 +76,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGlobals *kg,
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const KernelGlobals *kg,
                                                                const float3 P,
                                                                const float3 dir,
                                                                const float t,
@@ -102,7 +102,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(const KernelGloba
   return tnear <= tfar;
 }
 
-ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelGlobals *kg,
                                                         const float3 P,
                                                         const float3 dir,
                                                         const float3 idir,
@@ -134,7 +134,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(const KernelGlobals *kg,
   return mask;
 }
 
-ccl_device_forceinline int bvh_node_intersect(const KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(ccl_global const KernelGlobals *kg,
                                               const float3 P,
                                               const float3 dir,
                                               const float3 idir,
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 82c7c1a8a6c..c67c820edbc 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -36,12 +36,12 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect_array,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect_array,
                                      const uint visibility,
                                      const uint max_hits,
-                                     uint *num_hits)
+                                     ccl_private uint *num_hits)
 {
   /* todo:
    * - likely and unlikely for if() statements
@@ -71,7 +71,7 @@ ccl_device_inline
   float t_world_to_instance = 1.0f;
 
   *num_hits = 0;
-  Intersection *isect = isect_array;
+  ccl_private Intersection *isect = isect_array;
 
   /* traversal loop */
   do {
@@ -284,12 +284,12 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect_array,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect_array,
                                          const uint visibility,
                                          const uint max_hits,
-                                         uint *num_hits)
+                                         ccl_private uint *num_hits)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 2feff593c10..a46c45d3529 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -31,9 +31,9 @@
  * BVH_MOTION: motion blur rendering
  */
 
-ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                                     const Ray *ray,
-                                                     Intersection *isect,
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
                                                      const uint visibility)
 {
   /* todo:
@@ -226,9 +226,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index d143fe4aeab..fb546f568f3 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -88,7 +88,7 @@ ccl_device int intersections_compare(const void *a, const void *b)
 #endif
 
 #if defined(__SHADOW_RECORD_ALL__)
-ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
+ccl_device_inline void sort_intersections(ccl_private Intersection *hits, uint num_hits)
 {
   kernel_assert(num_hits > 0);
 
@@ -115,8 +115,8 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
 
 /* For subsurface scattering, only sorting a small amount of intersections
  * so bubble sort is fine for CPU and GPU. */
-ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
-                                                      float3 *Ng,
+ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
+                                                      ccl_private float3 *Ng,
                                                       uint num_hits)
 {
   bool swapped;
@@ -139,8 +139,9 @@ ccl_device_inline void sort_intersections_and_normals(Intersection *hits,
 
 /* Utility to quickly get flags from an intersection. */
 
-ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   const int prim = isect->prim;
   int shader = 0;
@@ -161,7 +162,7 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc
 }
 
 ccl_device_forceinline int intersection_get_shader_from_isect_prim(
-    const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
+    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
 {
   int shader = 0;
 
@@ -180,14 +181,16 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
   return shader & SHADER_MASK;
 }
 
-ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
-                                                   const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                   ccl_private const Intersection *ccl_restrict
+                                                       isect)
 {
   return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
 }
 
-ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline int intersection_get_object_flags(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const Intersection *ccl_restrict isect)
 {
   return kernel_tex_fetch(__object_flag, isect->object);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 0411d9c522d..d3bfce2d96b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -35,9 +35,9 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
-                                     Intersection *isect,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect,
                                      const uint visibility)
 {
   /* todo:
@@ -221,9 +221,9 @@ ccl_device_inline
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
-                                         Intersection *isect,
+ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
                                          const uint visibility)
 {
   return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 4874270f15d..f0fe95924cf 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -35,8 +35,8 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    uint BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
-                                     const Ray *ray,
+    uint BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+                                     ccl_private const Ray *ray,
                                      Intersection *isect_array,
                                      const uint max_hits,
                                      const uint visibility)
@@ -289,8 +289,8 @@ ccl_device_inline
   return num_hits;
 }
 
-ccl_device_inline uint BVH_FUNCTION_NAME(const KernelGlobals *kg,
-                                         const Ray *ray,
+ccl_device_inline uint BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+                                         ccl_private const Ray *ray,
                                          Intersection *isect_array,
                                          const uint max_hits,
                                          const uint visibility)
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index 72a8c2ba090..211eedbddbd 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -18,14 +18,17 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
+ccl_device ccl_private ShaderClosure *closure_alloc(ccl_private ShaderData *sd,
+                                                    int size,
+                                                    ClosureType type,
+                                                    float3 weight)
 {
   kernel_assert(size <= sizeof(ShaderClosure));
 
   if (sd->num_closure_left == 0)
     return NULL;
 
-  ShaderClosure *sc = &sd->closure[sd->num_closure];
+  ccl_private ShaderClosure *sc = &sd->closure[sd->num_closure];
 
   sc->type = type;
   sc->weight = weight;
@@ -36,7 +39,7 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
   return sc;
 }
 
-ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
+ccl_device ccl_private void *closure_alloc_extra(ccl_private ShaderData *sd, int size)
 {
   /* Allocate extra space for closure that need more parameters. We allocate
    * in chunks of sizeof(ShaderClosure) starting from the end of the closure
@@ -54,10 +57,12 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
   }
 
   sd->num_closure_left -= num_extra;
-  return (ccl_addr_space void *)(sd->closure + sd->num_closure + sd->num_closure_left);
+  return (ccl_private void *)(sd->closure + sd->num_closure + sd->num_closure_left);
 }
 
-ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
+ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *sd,
+                                                        int size,
+                                                        float3 weight)
 {
   kernel_assert(isfinite3_safe(weight));
 
@@ -66,7 +71,7 @@ ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 wei
   /* Use comparison this way to help dealing with non-finite weight: if the average is not finite
    * we will not allocate new closure. */
   if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
-    ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+    ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
     if (sc == NULL) {
       return NULL;
     }
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index bb80b9636bb..e115bef3170 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -41,32 +41,32 @@ CCL_NAMESPACE_BEGIN
 
 /* Returns the square of the roughness of the closure if it has roughness,
  * 0 for singular closures and 1 otherwise. */
-ccl_device_inline float bsdf_get_specular_roughness_squared(const ShaderClosure *sc)
+ccl_device_inline float bsdf_get_specular_roughness_squared(ccl_private const ShaderClosure *sc)
 {
   if (CLOSURE_IS_BSDF_SINGULAR(sc->type)) {
     return 0.0f;
   }
 
   if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+    ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
     return bsdf->alpha_x * bsdf->alpha_y;
   }
 
   return 1.0f;
 }
 
-ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc)
+ccl_device_inline float bsdf_get_roughness_squared(ccl_private const ShaderClosure *sc)
 {
   /* This version includes diffuse, mainly for baking Principled BSDF
    * where specular and metallic zero otherwise does not bake the
    * specified roughness parameter. */
   if (sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) {
-    OrenNayarBsdf *bsdf = (OrenNayarBsdf *)sc;
+    ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)sc;
     return sqr(sqr(bsdf->roughness));
   }
 
   if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)sc;
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)sc;
     return sqr(sqr(bsdf->roughness));
   }
 
@@ -111,15 +111,15 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl
   return val;
 }
 
-ccl_device_inline int bsdf_sample(const KernelGlobals *kg,
-                                  ShaderData *sd,
-                                  const ShaderClosure *sc,
+ccl_device_inline int bsdf_sample(ccl_global const KernelGlobals *kg,
+                                  ccl_private ShaderData *sd,
+                                  ccl_private const ShaderClosure *sc,
                                   float randu,
                                   float randv,
-                                  float3 *eval,
-                                  float3 *omega_in,
-                                  differential3 *domega_in,
-                                  float *pdf)
+                                  ccl_private float3 *eval,
+                                  ccl_private float3 *omega_in,
+                                  ccl_private differential3 *domega_in,
+                                  ccl_private float *pdf)
 {
   /* For curves use the smooth normal, particularly for ribbons the geometric
    * normal gives too much darkening otherwise. */
@@ -467,12 +467,12 @@ ccl_device
 ccl_device_inline
 #endif
     float3
-    bsdf_eval(const KernelGlobals *kg,
-              ShaderData *sd,
-              const ShaderClosure *sc,
+    bsdf_eval(ccl_global const KernelGlobals *kg,
+              ccl_private ShaderData *sd,
+              ccl_private const ShaderClosure *sc,
               const float3 omega_in,
               const bool is_transmission,
-              float *pdf)
+              ccl_private float *pdf)
 {
   float3 eval = zero_float3();
 
@@ -652,7 +652,9 @@ ccl_device_inline
   return eval;
 }
 
-ccl_device void bsdf_blur(const KernelGlobals *kg, ShaderClosure *sc, float roughness)
+ccl_device void bsdf_blur(ccl_global const KernelGlobals *kg,
+                          ccl_private ShaderClosure *sc,
+                          float roughness)
 {
   /* TODO: do we want to blur volume closures? */
 #ifdef __SVM__
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index be6383e521a..6cd8739ce39 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -30,7 +30,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_ashikhmin_shirley_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
@@ -39,9 +39,9 @@ ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_ashikhmin_shirley_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -52,12 +52,13 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
   return 2.0f / (roughness * roughness) - 2.0f;
 }
 
-ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc,
-                                                                  const float3 I,
-                                                                  const float3 omega_in,
-                                                                  float *pdf)
+ccl_device_forceinline float3
+bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc,
+                                    const float3 I,
+                                    const float3 omega_in,
+                                    ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
 
   float NdotI = dot(N, I);        /* in Cycles/OSL convention I is omega_out */
@@ -119,16 +120,20 @@ ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderCl
   return make_float3(out, out, out);
 }
 
-ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
-    float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta)
+ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
+                                                                    float n_y,
+                                                                    float randu,
+                                                                    float randv,
+                                                                    ccl_private float *phi,
+                                                                    ccl_private float *cos_theta)
 {
   *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu));
   float cos_phi = cosf(*phi);
@@ -136,20 +141,20 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(
   *cos_theta = powf(randv, 1.0f / (n_x * cos_phi * cos_phi + n_y * sin_phi * sin_phi + 1.0f));
 }
 
-ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc,
+ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc,
                                              float3 Ng,
                                              float3 I,
                                              float3 dIdx,
                                              float3 dIdy,
                                              float randu,
                                              float randv,
-                                             float3 *eval,
-                                             float3 *omega_in,
-                                             float3 *domega_in_dx,
-                                             float3 *domega_in_dy,
-                                             float *pdf)
+                                             ccl_private float3 *eval,
+                                             ccl_private float3 *omega_in,
+                                             ccl_private float3 *domega_in_dx,
+                                             ccl_private float3 *domega_in_dy,
+                                             ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
   int label = LABEL_REFLECT | LABEL_GLOSSY;
 
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index f51027f5701..c00890be54c 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -36,7 +36,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct VelvetBsdf {
+typedef struct VelvetBsdf {
   SHADER_CLOSURE_BASE;
 
   float sigma;
@@ -45,7 +45,7 @@ typedef ccl_addr_space struct VelvetBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(VelvetBsdf), "VelvetBsdf is too large!");
 
-ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
+ccl_device int bsdf_ashikhmin_velvet_setup(ccl_private VelvetBsdf *bsdf)
 {
   float sigma = fmaxf(bsdf->sigma, 0.01f);
   bsdf->invsigma2 = 1.0f / (sigma * sigma);
@@ -55,12 +55,12 @@ ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
-  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
   float m_invsigma2 = bsdf->invsigma2;
   float3 N = bsdf->N;
 
@@ -97,28 +97,28 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc,
+ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
                                             float3 Ng,
                                             float3 I,
                                             float3 dIdx,
                                             float3 dIdy,
                                             float randu,
                                             float randv,
-                                            float3 *eval,
-                                            float3 *omega_in,
-                                            float3 *domega_in_dx,
-                                            float3 *domega_in_dy,
-                                            float *pdf)
+                                            ccl_private float3 *eval,
+                                            ccl_private float3 *omega_in,
+                                            ccl_private float3 *domega_in_dx,
+                                            ccl_private float3 *domega_in_dy,
+                                            ccl_private float *pdf)
 {
-  const VelvetBsdf *bsdf = (const VelvetBsdf *)sc;
+  ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
   float m_invsigma2 = bsdf->invsigma2;
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 1555aa30304..16c9b428004 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct DiffuseBsdf {
+typedef struct DiffuseBsdf {
   SHADER_CLOSURE_BASE;
 } DiffuseBsdf;
 
@@ -42,18 +42,18 @@ static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseBsdf), "DiffuseBsdf is too
 
 /* DIFFUSE */
 
-ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
+ccl_device int bsdf_diffuse_setup(ccl_private DiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
                                             const float3 I,
                                             const float3 omega_in,
-                                            float *pdf)
+                                            ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
@@ -61,28 +61,28 @@ ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
   return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
                                              const float3 I,
                                              const float3 omega_in,
-                                             float *pdf)
+                                             ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
                                    float3 Ng,
                                    float3 I,
                                    float3 dIdx,
                                    float3 dIdy,
                                    float randu,
                                    float randv,
-                                   float3 *eval,
-                                   float3 *omega_in,
-                                   float3 *domega_in_dx,
-                                   float3 *domega_in_dy,
-                                   float *pdf)
+                                   ccl_private float3 *eval,
+                                   ccl_private float3 *omega_in,
+                                   ccl_private float3 *domega_in_dx,
+                                   ccl_private float3 *domega_in_dy,
+                                   ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   // distribution over the hemisphere
@@ -104,26 +104,26 @@ ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc,
 
 /* TRANSLUCENT */
 
-ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf)
+ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_translucent_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_translucent_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
@@ -131,20 +131,20 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc,
   return make_float3(cos_pi, cos_pi, cos_pi);
 }
 
-ccl_device int bsdf_translucent_sample(const ShaderClosure *sc,
+ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
-  const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+  ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
   float3 N = bsdf->N;
 
   // we are viewing the surface from the right side - send a ray out with cosine
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index b06dd196b9e..8bff7709a32 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -36,10 +36,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __OSL__
 
-typedef ccl_addr_space struct DiffuseRampBsdf {
+typedef struct DiffuseRampBsdf {
   SHADER_CLOSURE_BASE;
 
-  float3 *colors;
+  ccl_private float3 *colors;
 } DiffuseRampBsdf;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(DiffuseRampBsdf), "DiffuseRampBsdf is too large!");
@@ -64,14 +64,14 @@ ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_diffuse_ramp_blur(ccl_private ShaderClosure *sc, float roughness)
 {
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
   float3 N = bsdf->N;
@@ -81,26 +81,26 @@ ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc,
   return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
 }
 
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
                                                   const float3 I,
                                                   const float3 omega_in,
-                                                  float *pdf)
+                                                  ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
                                         float3 Ng,
                                         float3 I,
                                         float3 dIdx,
                                         float3 dIdy,
                                         float randu,
                                         float randv,
-                                        float3 *eval,
-                                        float3 *omega_in,
-                                        float3 *domega_in_dx,
-                                        float3 *domega_in_dy,
-                                        float *pdf)
+                                        ccl_private float3 *eval,
+                                        ccl_private float3 *omega_in,
+                                        ccl_private float3 *domega_in_dx,
+                                        ccl_private float3 *domega_in_dy,
+                                        ccl_private float *pdf)
 {
   const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
   float3 N = bsdf->N;
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index f56f78aa1f0..449a314a90e 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct HairBsdf {
+typedef struct HairBsdf {
   SHADER_CLOSURE_BASE;
 
   float3 T;
@@ -45,7 +45,7 @@ typedef ccl_addr_space struct HairBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(HairBsdf), "HairBsdf is too large!");
 
-ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
+ccl_device int bsdf_hair_reflection_setup(ccl_private HairBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID;
   bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
@@ -53,7 +53,7 @@ ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
+ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
   bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
@@ -61,12 +61,12 @@ ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
                                                     const float3 I,
                                                     const float3 omega_in,
-                                                    float *pdf)
+                                                    ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -108,28 +108,28 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
   return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_transmission_eval_reflect(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -170,20 +170,20 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
   return make_float3(*pdf, *pdf, *pdf);
 }
 
-ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
+ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
                                            float3 Ng,
                                            float3 I,
                                            float3 dIdx,
                                            float3 dIdy,
                                            float randu,
                                            float randv,
-                                           float3 *eval,
-                                           float3 *omega_in,
-                                           float3 *domega_in_dx,
-                                           float3 *domega_in_dy,
-                                           float *pdf)
+                                           ccl_private float3 *eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float3 *domega_in_dx,
+                                           ccl_private float3 *domega_in_dy,
+                                           ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
@@ -231,20 +231,20 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc,
   return LABEL_REFLECT | LABEL_GLOSSY;
 }
 
-ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc,
+ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc,
                                              float3 Ng,
                                              float3 I,
                                              float3 dIdx,
                                              float3 dIdy,
                                              float randu,
                                              float randv,
-                                             float3 *eval,
-                                             float3 *omega_in,
-                                             float3 *domega_in_dx,
-                                             float3 *domega_in_dy,
-                                             float *pdf)
+                                             ccl_private float3 *eval,
+                                             ccl_private float3 *omega_in,
+                                             ccl_private float3 *domega_in_dx,
+                                             ccl_private float3 *domega_in_dy,
+                                             ccl_private float *pdf)
 {
-  const HairBsdf *bsdf = (const HairBsdf *)sc;
+  ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
   float offset = bsdf->offset;
   float3 Tg = bsdf->T;
   float roughness1 = bsdf->roughness1;
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index bfe56e5ab0e..17097b0739b 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -24,12 +24,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct PrincipledHairExtra {
+typedef struct PrincipledHairExtra {
   /* Geometry data. */
   float4 geom;
 } PrincipledHairExtra;
 
-typedef ccl_addr_space struct PrincipledHairBSDF {
+typedef struct PrincipledHairBSDF {
   SHADER_CLOSURE_BASE;
 
   /* Absorption coefficient. */
@@ -46,7 +46,7 @@ typedef ccl_addr_space struct PrincipledHairBSDF {
   float m0_roughness;
 
   /* Extra closure. */
-  PrincipledHairExtra *extra;
+  ccl_private PrincipledHairExtra *extra;
 } PrincipledHairBSDF;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF),
@@ -180,14 +180,15 @@ ccl_device_inline float longitudinal_scattering(
 }
 
 /* Combine the three values using their luminances. */
-ccl_device_inline float4 combine_with_energy(const KernelGlobals *kg, float3 c)
+ccl_device_inline float4 combine_with_energy(ccl_global const KernelGlobals *kg, float3 c)
 {
   return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
 }
 
 #ifdef __HAIR__
 /* Set up the hair closure. */
-ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bsdf)
+ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
+                                          ccl_private PrincipledHairBSDF *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID;
   bsdf->v = clamp(bsdf->v, 0.001f, 1.0f);
@@ -228,7 +229,10 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
 #endif /* __HAIR__ */
 
 /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(const KernelGlobals *kg, float f, float3 T, float4 *Ap)
+ccl_device_inline void hair_attenuation(ccl_global const KernelGlobals *kg,
+                                        float f,
+                                        float3 T,
+                                        ccl_private float4 *Ap)
 {
   /* Primary specular (R). */
   Ap[0] = make_float4(f, f, f, f);
@@ -259,7 +263,7 @@ ccl_device_inline void hair_attenuation(const KernelGlobals *kg, float f, float3
 ccl_device_inline void hair_alpha_angles(float sin_theta_i,
                                          float cos_theta_i,
                                          float alpha,
-                                         float *angles)
+                                         ccl_private float *angles)
 {
   float sin_1alpha = sinf(alpha);
   float cos_1alpha = cos_from_sin(sin_1alpha);
@@ -277,15 +281,15 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
 }
 
 /* Evaluation function for our shader. */
-ccl_device float3 bsdf_principled_hair_eval(const KernelGlobals *kg,
-                                            const ShaderData *sd,
-                                            const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
+                                            ccl_private const ShaderClosure *sc,
                                             const float3 omega_in,
-                                            float *pdf)
+                                            ccl_private float *pdf)
 {
   kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length));
 
-  const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF *)sc;
+  ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc;
   float3 Y = float4_to_float3(bsdf->extra->geom);
 
   float3 X = safe_normalize(sd->dPdu);
@@ -355,18 +359,18 @@ ccl_device float3 bsdf_principled_hair_eval(const KernelGlobals *kg,
 }
 
 /* Sampling function for the hair shader. */
-ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
-                                           const ShaderClosure *sc,
-                                           ShaderData *sd,
+ccl_device int bsdf_principled_hair_sample(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderClosure *sc,
+                                           ccl_private ShaderData *sd,
                                            float randu,
                                            float randv,
-                                           float3 *eval,
-                                           float3 *omega_in,
-                                           float3 *domega_in_dx,
-                                           float3 *domega_in_dy,
-                                           float *pdf)
+                                           ccl_private float3 *eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float3 *domega_in_dx,
+                                           ccl_private float3 *domega_in_dy,
+                                           ccl_private float *pdf)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
 
   float3 Y = float4_to_float3(bsdf->extra->geom);
 
@@ -378,8 +382,8 @@ ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
 
   float2 u[2];
   u[0] = make_float2(randu, randv);
-  u[1].x = lcg_step_float_addrspace(&sd->lcg_state);
-  u[1].y = lcg_step_float_addrspace(&sd->lcg_state);
+  u[1].x = lcg_step_float(&sd->lcg_state);
+  u[1].y = lcg_step_float(&sd->lcg_state);
 
   float sin_theta_o = wo.x;
   float cos_theta_o = cos_from_sin(sin_theta_o);
@@ -482,9 +486,9 @@ ccl_device int bsdf_principled_hair_sample(const KernelGlobals *kg,
 }
 
 /* Implements Filter Glossy by capping the effective roughness. */
-ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_principled_hair_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
 
   bsdf->v = fmaxf(roughness, bsdf->v);
   bsdf->s = fmaxf(roughness, bsdf->s);
@@ -500,9 +504,9 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
   return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
 }
 
-ccl_device float3 bsdf_principled_hair_albedo(const ShaderClosure *sc)
+ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
 {
-  PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc;
+  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
   return exp3(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
 }
 
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 227cb448b47..41c35867a6b 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -37,17 +37,17 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct MicrofacetExtra {
+typedef struct MicrofacetExtra {
   float3 color, cspec0;
   float3 fresnel_color;
   float clearcoat;
 } MicrofacetExtra;
 
-typedef ccl_addr_space struct MicrofacetBsdf {
+typedef struct MicrofacetBsdf {
   SHADER_CLOSURE_BASE;
 
   float alpha_x, alpha_y, ior;
-  MicrofacetExtra *extra;
+  ccl_private MicrofacetExtra *extra;
   float3 T;
 } MicrofacetBsdf;
 
@@ -55,14 +55,14 @@ static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf i
 
 /* Beckmann and GGX microfacet importance sampling. */
 
-ccl_device_inline void microfacet_beckmann_sample_slopes(const KernelGlobals *kg,
+ccl_device_inline void microfacet_beckmann_sample_slopes(ccl_global const KernelGlobals *kg,
                                                          const float cos_theta_i,
                                                          const float sin_theta_i,
                                                          float randu,
                                                          float randv,
-                                                         float *slope_x,
-                                                         float *slope_y,
-                                                         float *G1i)
+                                                         ccl_private float *slope_x,
+                                                         ccl_private float *slope_y,
+                                                         ccl_private float *G1i)
 {
   /* special case (normal incidence) */
   if (cos_theta_i >= 0.99999f) {
@@ -146,9 +146,9 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
                                                     const float sin_theta_i,
                                                     float randu,
                                                     float randv,
-                                                    float *slope_x,
-                                                    float *slope_y,
-                                                    float *G1i)
+                                                    ccl_private float *slope_x,
+                                                    ccl_private float *slope_y,
+                                                    ccl_private float *G1i)
 {
   /* special case (normal incidence) */
   if (cos_theta_i >= 0.99999f) {
@@ -195,14 +195,14 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
   *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
 }
 
-ccl_device_forceinline float3 microfacet_sample_stretched(const KernelGlobals *kg,
+ccl_device_forceinline float3 microfacet_sample_stretched(ccl_global const KernelGlobals *kg,
                                                           const float3 omega_i,
                                                           const float alpha_x,
                                                           const float alpha_y,
                                                           const float randu,
                                                           const float randv,
                                                           bool beckmann,
-                                                          float *G1i)
+                                                          ccl_private float *G1i)
 {
   /* 1. stretch omega_i */
   float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z);
@@ -254,7 +254,9 @@ ccl_device_forceinline float3 microfacet_sample_stretched(const KernelGlobals *k
  *
  * Else it is simply white
  */
-ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H)
+ccl_device_forceinline float3 reflection_color(ccl_private const MicrofacetBsdf *bsdf,
+                                               float3 L,
+                                               float3 H)
 {
   float3 F = make_float3(1.0f, 1.0f, 1.0f);
   bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID ||
@@ -277,8 +279,8 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha)
   return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t);
 }
 
-ccl_device_forceinline void bsdf_microfacet_fresnel_color(const ShaderData *sd,
-                                                          MicrofacetBsdf *bsdf)
+ccl_device_forceinline void bsdf_microfacet_fresnel_color(ccl_private const ShaderData *sd,
+                                                          ccl_private MicrofacetBsdf *bsdf)
 {
   kernel_assert(CLOSURE_IS_BSDF_MICROFACET_FRESNEL(bsdf->type));
 
@@ -306,7 +308,7 @@ ccl_device_forceinline void bsdf_microfacet_fresnel_color(const ShaderData *sd,
  * Anisotropy is only supported for reflection currently, but adding it for
  * transmission is just a matter of copying code from reflection if needed. */
 
-ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->extra = NULL;
 
@@ -319,14 +321,15 @@ ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf)
 }
 
 /* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_ggx_isotropic_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
   return bsdf_microfacet_ggx_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                 ccl_private const ShaderData *sd)
 {
   bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
 
@@ -340,7 +343,8 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const Sha
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                   ccl_private const ShaderData *sd)
 {
   bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);
 
@@ -354,7 +358,7 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const S
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->extra = NULL;
 
@@ -366,20 +370,20 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosure *sc,
                                                    const float3 I,
                                                    const float3 omega_in,
-                                                   float *pdf)
+                                                   ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -487,12 +491,12 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClosure *sc,
                                                     const float3 I,
                                                     const float3 omega_in,
-                                                    float *pdf)
+                                                    ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   float m_eta = bsdf->ior;
@@ -545,21 +549,21 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc,
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_ggx_sample(const KernelGlobals *kg,
-                                          const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_ggx_sample(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderClosure *sc,
                                           float3 Ng,
                                           float3 I,
                                           float3 dIdx,
                                           float3 dIdy,
                                           float randu,
                                           float randv,
-                                          float3 *eval,
-                                          float3 *omega_in,
-                                          float3 *domega_in_dx,
-                                          float3 *domega_in_dy,
-                                          float *pdf)
+                                          ccl_private float3 *eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private float3 *domega_in_dx,
+                                          ccl_private float3 *domega_in_dy,
+                                          ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@@ -774,7 +778,7 @@ ccl_device int bsdf_microfacet_ggx_sample(const KernelGlobals *kg,
  * Microfacet Models for Refraction through Rough Surfaces
  * B. Walter, S. R. Marschner, H. Li, K. E. Torrance, EGSR 2007 */
 
-ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = saturate(bsdf->alpha_x);
   bsdf->alpha_y = saturate(bsdf->alpha_y);
@@ -784,14 +788,14 @@ ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf)
 }
 
 /* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_beckmann_isotropic_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
   return bsdf_microfacet_beckmann_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = saturate(bsdf->alpha_x);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -800,9 +804,9 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -839,12 +843,12 @@ ccl_device_inline float bsdf_beckmann_aniso_G1(
   return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const ShaderClosure *sc,
                                                         const float3 I,
                                                         const float3 omega_in,
-                                                        float *pdf)
+                                                        ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
@@ -918,12 +922,12 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const ShaderClosure *sc,
                                                          const float3 I,
                                                          const float3 omega_in,
-                                                         float *pdf)
+                                                         ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   float m_eta = bsdf->ior;
@@ -973,21 +977,21 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_beckmann_sample(const KernelGlobals *kg,
-                                               const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_beckmann_sample(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderClosure *sc,
                                                float3 Ng,
                                                float3 I,
                                                float3 dIdx,
                                                float3 dIdy,
                                                float randu,
                                                float randv,
-                                               float3 *eval,
-                                               float3 *omega_in,
-                                               float3 *domega_in_dx,
-                                               float3 *domega_in_dy,
-                                               float *pdf)
+                                               ccl_private float3 *eval,
+                                               ccl_private float3 *omega_in,
+                                               ccl_private float3 *domega_in_dx,
+                                               ccl_private float3 *domega_in_dy,
+                                               ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float alpha_x = bsdf->alpha_x;
   float alpha_y = bsdf->alpha_y;
   bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 68d5071dbce..6ee1139ddbb 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -105,7 +105,7 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi,
 
 /* Phase function for reflective materials. */
 ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi,
-                                                     float3 *weight,
+                                                     ccl_private float3 *weight,
                                                      const float3 wm)
 {
   return -wi + 2.0f * wm * dot(wi, wm);
@@ -140,8 +140,11 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w,
 
 /* Phase function for dielectric transmissive materials, including both reflection and refraction
  * according to the dielectric fresnel term. */
-ccl_device_forceinline float3 mf_sample_phase_glass(
-    const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
+ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi,
+                                                    const float eta,
+                                                    const float3 wm,
+                                                    const float randV,
+                                                    ccl_private bool *outside)
 {
   float cosI = dot(wi, wm);
   float f = fresnel_dielectric_cos(cosI, eta);
@@ -234,8 +237,12 @@ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float l
 
 /* Sampling from the visible height distribution (based on page 17 of the supplemental
  * implementation). */
-ccl_device_forceinline bool mf_sample_height(
-    const float3 w, float *h, float *C1, float *G1, float *lambda, const float U)
+ccl_device_forceinline bool mf_sample_height(const float3 w,
+                                             ccl_private float *h,
+                                             ccl_private float *C1,
+                                             ccl_private float *G1,
+                                             ccl_private float *lambda,
+                                             const float U)
 {
   if (w.z > 0.9999f)
     return false;
@@ -364,9 +371,9 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi,
 #define MF_MULTI_GLOSSY
 #include "kernel/closure/bsdf_microfacet_multi_impl.h"
 
-ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness)
+ccl_device void bsdf_microfacet_multi_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
 {
-  MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
 
   bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x);
   bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
@@ -376,7 +383,7 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughnes
 
 /* Multiscattering GGX Glossy closure */
 
-ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_common_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
@@ -386,7 +393,7 @@ ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   if (is_zero(bsdf->T))
     bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
@@ -396,7 +403,8 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf)
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                       ccl_private const ShaderData *sd)
 {
   if (is_zero(bsdf->T))
     bsdf->T = make_float3(1.0f, 0.0f, 0.0f);
@@ -408,7 +416,7 @@ ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, con
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_y = bsdf->alpha_x;
 
@@ -417,23 +425,23 @@ ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf)
   return bsdf_microfacet_multi_ggx_common_setup(bsdf);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(ccl_private const ShaderClosure *sc,
                                                           const float3 I,
                                                           const float3 omega_in,
-                                                          float *pdf,
-                                                          ccl_addr_space uint *lcg_state)
+                                                          ccl_private float *pdf,
+                                                          ccl_private uint *lcg_state)
 {
   *pdf = 0.0f;
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const ShaderClosure *sc,
                                                          const float3 I,
                                                          const float3 omega_in,
-                                                         float *pdf,
-                                                         ccl_addr_space uint *lcg_state)
+                                                         ccl_private float *pdf,
+                                                         ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -468,22 +476,22 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc
                         bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_sample(const KernelGlobals *kg,
-                                                const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_multi_ggx_sample(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderClosure *sc,
                                                 float3 Ng,
                                                 float3 I,
                                                 float3 dIdx,
                                                 float3 dIdy,
                                                 float randu,
                                                 float randv,
-                                                float3 *eval,
-                                                float3 *omega_in,
-                                                float3 *domega_in_dx,
-                                                float3 *domega_in_dy,
-                                                float *pdf,
-                                                ccl_addr_space uint *lcg_state)
+                                                ccl_private float3 *eval,
+                                                ccl_private float3 *omega_in,
+                                                ccl_private float3 *domega_in_dx,
+                                                ccl_private float3 *domega_in_dy,
+                                                ccl_private float *pdf,
+                                                ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   float3 X, Y, Z;
   Z = bsdf->N;
@@ -536,7 +544,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(const KernelGlobals *kg,
 
 /* Multiscattering GGX Glass closure */
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -548,8 +556,8 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf,
-                                                             const ShaderData *sd)
+ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
+                                                             ccl_private const ShaderData *sd)
 {
   bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
   bsdf->alpha_y = bsdf->alpha_x;
@@ -564,13 +572,14 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsd
   return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc,
-                                                                const float3 I,
-                                                                const float3 omega_in,
-                                                                float *pdf,
-                                                                ccl_addr_space uint *lcg_state)
+ccl_device float3
+bsdf_microfacet_multi_ggx_glass_eval_transmit(ccl_private const ShaderClosure *sc,
+                                              const float3 I,
+                                              const float3 omega_in,
+                                              ccl_private float *pdf,
+                                              ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -596,13 +605,13 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClos
                        bsdf->extra->color);
 }
 
-ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const ShaderClosure *sc,
                                                                const float3 I,
                                                                const float3 omega_in,
-                                                               float *pdf,
-                                                               ccl_addr_space uint *lcg_state)
+                                                               ccl_private float *pdf,
+                                                               ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -630,22 +639,22 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosu
                        bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_sample(const KernelGlobals *kg,
-                                                      const ShaderClosure *sc,
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(ccl_global const KernelGlobals *kg,
+                                                      ccl_private const ShaderClosure *sc,
                                                       float3 Ng,
                                                       float3 I,
                                                       float3 dIdx,
                                                       float3 dIdy,
                                                       float randu,
                                                       float randv,
-                                                      float3 *eval,
-                                                      float3 *omega_in,
-                                                      float3 *domega_in_dx,
-                                                      float3 *domega_in_dy,
-                                                      float *pdf,
-                                                      ccl_addr_space uint *lcg_state)
+                                                      ccl_private float3 *eval,
+                                                      ccl_private float3 *omega_in,
+                                                      ccl_private float3 *domega_in_dx,
+                                                      ccl_private float3 *domega_in_dy,
+                                                      ccl_private float *pdf,
+                                                      ccl_private uint *lcg_state)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
 
   float3 X, Y, Z;
   Z = bsdf->N;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 04d9b22d7d2..d23cc16cff3 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -31,7 +31,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
                                                              const float3 color,
                                                              const float alpha_x,
                                                              const float alpha_y,
-                                                             ccl_addr_space uint *lcg_state,
+                                                             ccl_private uint *lcg_state,
                                                              const float eta,
                                                              bool use_fresnel,
                                                              const float3 cspec0)
@@ -101,12 +101,12 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
 
   for (int order = 0; order < 10; order++) {
     /* Sample microfacet height. */
-    float height_rand = lcg_step_float_addrspace(lcg_state);
+    float height_rand = lcg_step_float(lcg_state);
     if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand))
       break;
     /* Sample microfacet normal. */
-    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_y = lcg_step_float(lcg_state);
+    float vndf_rand_x = lcg_step_float(lcg_state);
     float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
 #ifdef MF_MULTI_GLASS
@@ -145,7 +145,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
 #ifdef MF_MULTI_GLASS
       bool next_outside;
       float3 wi_prev = -wr;
-      float phase_rand = lcg_step_float_addrspace(lcg_state);
+      float phase_rand = lcg_step_float(lcg_state);
       wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
       if (!next_outside) {
         outside = !outside;
@@ -186,11 +186,11 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi,
  * reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
  */
 ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
-                                                               float3 *wo,
+                                                               ccl_private float3 *wo,
                                                                const float3 color,
                                                                const float alpha_x,
                                                                const float alpha_y,
-                                                               ccl_addr_space uint *lcg_state,
+                                                               ccl_private uint *lcg_state,
                                                                const float eta,
                                                                bool use_fresnel,
                                                                const float3 cspec0)
@@ -213,15 +213,15 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
   int order;
   for (order = 0; order < 10; order++) {
     /* Sample microfacet height. */
-    float height_rand = lcg_step_float_addrspace(lcg_state);
+    float height_rand = lcg_step_float(lcg_state);
     if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) {
       /* The random walk has left the surface. */
       *wo = outside ? wr : -wr;
       return throughput;
     }
     /* Sample microfacet normal. */
-    float vndf_rand_y = lcg_step_float_addrspace(lcg_state);
-    float vndf_rand_x = lcg_step_float_addrspace(lcg_state);
+    float vndf_rand_y = lcg_step_float(lcg_state);
+    float vndf_rand_x = lcg_step_float(lcg_state);
     float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y);
 
     /* First-bounce color is already accounted for in mix weight. */
@@ -232,7 +232,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi,
 #ifdef MF_MULTI_GLASS
     bool next_outside;
     float3 wi_prev = -wr;
-    float phase_rand = lcg_step_float_addrspace(lcg_state);
+    float phase_rand = lcg_step_float(lcg_state);
     wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside);
     if (!next_outside) {
       hr = -hr;
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index be12d47f0ea..00c2678f0a0 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -18,7 +18,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct OrenNayarBsdf {
+typedef struct OrenNayarBsdf {
   SHADER_CLOSURE_BASE;
 
   float roughness;
@@ -28,12 +28,12 @@ typedef ccl_addr_space struct OrenNayarBsdf {
 
 static_assert(sizeof(ShaderClosure) >= sizeof(OrenNayarBsdf), "OrenNayarBsdf is too large!");
 
-ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc,
                                                 float3 n,
                                                 float3 v,
                                                 float3 l)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   float nl = max(dot(n, l), 0.0f);
   float nv = max(dot(n, v), 0.0f);
   float t = dot(l, v) - nl * nv;
@@ -44,7 +44,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc,
   return make_float3(is, is, is);
 }
 
-ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
+ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)
 {
   float sigma = bsdf->roughness;
 
@@ -60,12 +60,12 @@ ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   if (dot(bsdf->N, omega_in) > 0.0f) {
     *pdf = 0.5f * M_1_PI_F;
     return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in);
@@ -76,28 +76,28 @@ ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc,
+ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc;
+  ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
   sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf);
 
   if (dot(Ng, *omega_in) > 0.0f) {
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 43f8cf71c59..74cc62d917b 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -36,11 +36,11 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __OSL__
 
-typedef ccl_addr_space struct PhongRampBsdf {
+typedef struct PhongRampBsdf {
   SHADER_CLOSURE_BASE;
 
   float exponent;
-  float3 *colors;
+  ccl_private float3 *colors;
 } PhongRampBsdf;
 
 static_assert(sizeof(ShaderClosure) >= sizeof(PhongRampBsdf), "PhongRampBsdf is too large!");
@@ -59,19 +59,19 @@ ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
   return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset;
 }
 
-ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf)
+ccl_device int bsdf_phong_ramp_setup(ccl_private PhongRampBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID;
   bsdf->exponent = max(bsdf->exponent, 0.0f);
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
-  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
   float m_exponent = bsdf->exponent;
   float cosNI = dot(bsdf->N, omega_in);
   float cosNO = dot(bsdf->N, I);
@@ -92,28 +92,28 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc,
+ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc;
+  ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
   float cosNO = dot(bsdf->N, I);
   float m_exponent = bsdf->exponent;
 
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 52a37eafd9f..6d25daa2356 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -36,7 +36,7 @@ enum PrincipledDiffuseBsdfComponents {
   PRINCIPLED_DIFFUSE_RETRO_REFLECTION = 8,
 };
 
-typedef ccl_addr_space struct PrincipledDiffuseBsdf {
+typedef struct PrincipledDiffuseBsdf {
   SHADER_CLOSURE_BASE;
 
   float roughness;
@@ -46,14 +46,18 @@ typedef ccl_addr_space struct PrincipledDiffuseBsdf {
 static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledDiffuseBsdf),
               "PrincipledDiffuseBsdf is too large!");
 
-ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
+ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_diffuse_compute_brdf(
-    const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float *pdf)
+ccl_device float3
+bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bsdf,
+                                     float3 N,
+                                     float3 V,
+                                     float3 L,
+                                     ccl_private float *pdf)
 {
   const float NdotL = dot(N, L);
 
@@ -102,24 +106,25 @@ ccl_device_inline float bsdf_principled_diffuse_compute_entry_fresnel(const floa
 /* Ad-hoc weight adjustment to avoid retro-reflection taking away half the
  * samples from BSSRDF. */
 ccl_device_inline float bsdf_principled_diffuse_retro_reflection_sample_weight(
-    PrincipledDiffuseBsdf *bsdf, const float3 I)
+    ccl_private PrincipledDiffuseBsdf *bsdf, const float3 I)
 {
   return bsdf->roughness * schlick_fresnel(dot(bsdf->N, I));
 }
 
-ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf, int components)
+ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *bsdf,
+                                             int components)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID;
   bsdf->components = components;
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
                                                        const float3 I,
                                                        const float3 omega_in,
-                                                       float *pdf)
+                                                       ccl_private float *pdf)
 {
-  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+  ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
 
   float3 N = bsdf->N;
   float3 V = I;         // outgoing
@@ -135,28 +140,28 @@ ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
                                                         const float3 I,
                                                         const float3 omega_in,
-                                                        float *pdf)
+                                                        ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
+ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc,
                                               float3 Ng,
                                               float3 I,
                                               float3 dIdx,
                                               float3 dIdy,
                                               float randu,
                                               float randv,
-                                              float3 *eval,
-                                              float3 *omega_in,
-                                              float3 *domega_in_dx,
-                                              float3 *domega_in_dy,
-                                              float *pdf)
+                                              ccl_private float3 *eval,
+                                              ccl_private float3 *omega_in,
+                                              ccl_private float3 *domega_in_dx,
+                                              ccl_private float3 *domega_in_dy,
+                                              ccl_private float *pdf)
 {
-  const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc;
+  ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
 
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index 60ce7e4eb75..cc0a5accb95 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -25,7 +25,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct PrincipledSheenBsdf {
+typedef struct PrincipledSheenBsdf {
   SHADER_CLOSURE_BASE;
   float avg_value;
 } PrincipledSheenBsdf;
@@ -46,7 +46,7 @@ ccl_device_inline float calculate_avg_principled_sheen_brdf(float3 N, float3 I)
 }
 
 ccl_device float3
-calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, float *pdf)
+calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_private float *pdf)
 {
   float NdotL = dot(N, L);
   float NdotV = dot(N, V);
@@ -63,7 +63,8 @@ calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, float *p
   return make_float3(value, value, value);
 }
 
-ccl_device int bsdf_principled_sheen_setup(const ShaderData *sd, PrincipledSheenBsdf *bsdf)
+ccl_device int bsdf_principled_sheen_setup(ccl_private const ShaderData *sd,
+                                           ccl_private PrincipledSheenBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID;
   bsdf->avg_value = calculate_avg_principled_sheen_brdf(bsdf->N, sd->I);
@@ -71,12 +72,12 @@ ccl_device int bsdf_principled_sheen_setup(const ShaderData *sd, PrincipledSheen
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClosure *sc,
                                                      const float3 I,
                                                      const float3 omega_in,
-                                                     float *pdf)
+                                                     ccl_private float *pdf)
 {
-  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+  ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
 
   float3 N = bsdf->N;
   float3 V = I;         // outgoing
@@ -93,28 +94,28 @@ ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc,
   }
 }
 
-ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_principled_sheen_eval_transmit(ccl_private const ShaderClosure *sc,
                                                       const float3 I,
                                                       const float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc,
+ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
                                             float3 Ng,
                                             float3 I,
                                             float3 dIdx,
                                             float3 dIdy,
                                             float randu,
                                             float randv,
-                                            float3 *eval,
-                                            float3 *omega_in,
-                                            float3 *domega_in_dx,
-                                            float3 *domega_in_dy,
-                                            float *pdf)
+                                            ccl_private float3 *eval,
+                                            ccl_private float3 *omega_in,
+                                            ccl_private float3 *domega_in_dx,
+                                            ccl_private float3 *domega_in_dy,
+                                            ccl_private float *pdf)
 {
-  const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc;
+  ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
 
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index 31283971d5a..758bfd2b2d0 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -36,42 +36,42 @@ CCL_NAMESPACE_BEGIN
 
 /* REFLECTION */
 
-ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
   return SD_BSDF;
 }
 
-ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_reflection_sample(const ShaderClosure *sc,
+ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float3 N = bsdf->N;
 
   // only one direction is possible
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index cfedb5dfe2c..74e149b059e 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -36,42 +36,42 @@ CCL_NAMESPACE_BEGIN
 
 /* REFRACTION */
 
-ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf)
+ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
   return SD_BSDF;
 }
 
-ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_refraction_eval_reflect(ccl_private const ShaderClosure *sc,
                                                const float3 I,
                                                const float3 omega_in,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_refraction_eval_transmit(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_refraction_sample(const ShaderClosure *sc,
+ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
                                       float3 Ng,
                                       float3 I,
                                       float3 dIdx,
                                       float3 dIdy,
                                       float randu,
                                       float randv,
-                                      float3 *eval,
-                                      float3 *omega_in,
-                                      float3 *domega_in_dx,
-                                      float3 *domega_in_dy,
-                                      float *pdf)
+                                      ccl_private float3 *eval,
+                                      ccl_private float3 *omega_in,
+                                      ccl_private float3 *domega_in_dx,
+                                      ccl_private float3 *domega_in_dy,
+                                      ccl_private float *pdf)
 {
-  const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc;
+  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
   float m_eta = bsdf->ior;
   float3 N = bsdf->N;
 
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index acdafe0f735..7f20a328b5e 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -34,7 +34,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct ToonBsdf {
+typedef struct ToonBsdf {
   SHADER_CLOSURE_BASE;
 
   float size;
@@ -45,7 +45,7 @@ static_assert(sizeof(ShaderClosure) >= sizeof(ToonBsdf), "ToonBsdf is too large!
 
 /* DIFFUSE TOON */
 
-ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
+ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
   bsdf->size = saturate(bsdf->size);
@@ -73,12 +73,12 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
   return fminf(max_angle + smooth, M_PI_2_F);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_toon_eval_reflect(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
@@ -95,28 +95,28 @@ ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClosure *sc,
                                                   const float3 I,
                                                   const float3 omega_in,
-                                                  float *pdf)
+                                                  ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
+ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
                                         float3 Ng,
                                         float3 I,
                                         float3 dIdx,
                                         float3 dIdy,
                                         float randu,
                                         float randv,
-                                        float3 *eval,
-                                        float3 *omega_in,
-                                        float3 *domega_in_dx,
-                                        float3 *domega_in_dy,
-                                        float *pdf)
+                                        ccl_private float3 *eval,
+                                        ccl_private float3 *omega_in,
+                                        ccl_private float3 *domega_in_dx,
+                                        ccl_private float3 *domega_in_dy,
+                                        ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
@@ -143,7 +143,7 @@ ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc,
 
 /* GLOSSY TOON */
 
-ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
+ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf)
 {
   bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
   bsdf->size = saturate(bsdf->size);
@@ -152,12 +152,12 @@ ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf)
   return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float cosNI = dot(bsdf->N, omega_in);
@@ -180,28 +180,28 @@ ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc,
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc,
+ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
-  const ToonBsdf *bsdf = (const ToonBsdf *)sc;
+  ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
   float max_angle = bsdf->size * M_PI_2_F;
   float smooth = bsdf->smooth * M_PI_2_F;
   float cosNO = dot(bsdf->N, I);
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index f1dc7efb345..8313ab964d7 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -34,7 +34,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int path_flag)
+ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd,
+                                       const float3 weight,
+                                       int path_flag)
 {
   /* Check cutoff weight. */
   float sample_weight = fabsf(average(weight));
@@ -47,7 +49,7 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
 
     /* Add weight to existing transparent BSDF. */
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
 
       if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
         sc->weight += weight;
@@ -68,7 +70,7 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
     }
 
     /* Create new transparent BSDF. */
-    ShaderClosure *bsdf = closure_alloc(
+    ccl_private ShaderClosure *bsdf = closure_alloc(
         sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight);
 
     if (bsdf) {
@@ -81,34 +83,34 @@ ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int
   }
 }
 
-ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc,
+ccl_device float3 bsdf_transparent_eval_reflect(ccl_private const ShaderClosure *sc,
                                                 const float3 I,
                                                 const float3 omega_in,
-                                                float *pdf)
+                                                ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc,
+ccl_device float3 bsdf_transparent_eval_transmit(ccl_private const ShaderClosure *sc,
                                                  const float3 I,
                                                  const float3 omega_in,
-                                                 float *pdf)
+                                                 ccl_private float *pdf)
 {
   return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device int bsdf_transparent_sample(const ShaderClosure *sc,
+ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc,
                                        float3 Ng,
                                        float3 I,
                                        float3 dIdx,
                                        float3 dIdy,
                                        float randu,
                                        float randv,
-                                       float3 *eval,
-                                       float3 *omega_in,
-                                       float3 *domega_in_dx,
-                                       float3 *domega_in_dy,
-                                       float *pdf)
+                                       ccl_private float3 *eval,
+                                       ccl_private float3 *omega_in,
+                                       ccl_private float3 *domega_in_dx,
+                                       ccl_private float3 *domega_in_dy,
+                                       ccl_private float *pdf)
 {
   // only one direction is possible
   *omega_in = -I;
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index beec5f768a1..873494c1e03 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -37,17 +37,17 @@ CCL_NAMESPACE_BEGIN
 ccl_device float fresnel_dielectric(float eta,
                                     const float3 N,
                                     const float3 I,
-                                    float3 *R,
-                                    float3 *T,
+                                    ccl_private float3 *R,
+                                    ccl_private float3 *T,
 #ifdef __RAY_DIFFERENTIALS__
                                     const float3 dIdx,
                                     const float3 dIdy,
-                                    float3 *dRdx,
-                                    float3 *dRdy,
-                                    float3 *dTdx,
-                                    float3 *dTdy,
+                                    ccl_private float3 *dRdx,
+                                    ccl_private float3 *dRdy,
+                                    ccl_private float3 *dTdx,
+                                    ccl_private float3 *dTdy,
 #endif
-                                    bool *is_inside)
+                                    ccl_private bool *is_inside)
 {
   float cos = dot(N, I), neta;
   float3 Nn;
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 07415c53ec5..9df69e073c1 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -18,7 +18,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-typedef ccl_addr_space struct Bssrdf {
+typedef struct Bssrdf {
   SHADER_CLOSURE_BASE;
 
   float3 radius;
@@ -66,7 +66,9 @@ ccl_device float bssrdf_dipole_compute_alpha_prime(float rd, float fourthirdA)
   return xmid;
 }
 
-ccl_device void bssrdf_setup_radius(Bssrdf *bssrdf, const ClosureType type, const float eta)
+ccl_device void bssrdf_setup_radius(ccl_private Bssrdf *bssrdf,
+                                    const ClosureType type,
+                                    const float eta)
 {
   if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) {
     /* Scale mean free path length so it gives similar looking result to older
@@ -114,7 +116,7 @@ ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
   return 0.25f * M_1_PI_F * r;
 }
 
-ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
+ccl_device void bssrdf_burley_setup(ccl_private Bssrdf *bssrdf)
 {
   /* Mean free path length. */
   const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
@@ -195,7 +197,10 @@ ccl_device_forceinline float bssrdf_burley_root_find(float xi)
   return r;
 }
 
-ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
+ccl_device void bssrdf_burley_sample(const float d,
+                                     float xi,
+                                     ccl_private float *r,
+                                     ccl_private float *h)
 {
   const float Rm = BURLEY_TRUNCATE * d;
   const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
@@ -221,7 +226,10 @@ ccl_device float bssrdf_num_channels(const float3 radius)
   return channels;
 }
 
-ccl_device void bssrdf_sample(const float3 radius, float xi, float *r, float *h)
+ccl_device void bssrdf_sample(const float3 radius,
+                              float xi,
+                              ccl_private float *r,
+                              ccl_private float *h)
 {
   const float num_channels = bssrdf_num_channels(radius);
   float sampled_radius;
@@ -261,9 +269,10 @@ ccl_device_forceinline float bssrdf_pdf(const float3 radius, float r)
 
 /* Setup */
 
-ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
+ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, float3 weight)
 {
-  Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+  ccl_private Bssrdf *bssrdf = (ccl_private Bssrdf *)closure_alloc(
+      sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
 
   if (bssrdf == NULL) {
     return NULL;
@@ -274,13 +283,16 @@ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
   return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
 }
 
-ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, const float ior)
+ccl_device int bssrdf_setup(ccl_private ShaderData *sd,
+                            ccl_private Bssrdf *bssrdf,
+                            ClosureType type,
+                            const float ior)
 {
   int flag = 0;
 
   /* Add retro-reflection component as separate diffuse BSDF. */
   if (bssrdf->roughness != FLT_MAX) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
         sd, sizeof(PrincipledDiffuseBsdf), bssrdf->weight);
 
     if (bsdf) {
@@ -321,7 +333,7 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, co
     /* Add diffuse BSDF if any radius too small. */
 #ifdef __PRINCIPLED__
     if (bssrdf->roughness != FLT_MAX) {
-      PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+      ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
           sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
 
       if (bsdf) {
@@ -333,7 +345,8 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, co
     else
 #endif /* __PRINCIPLED__ */
     {
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), diffuse_weight);
 
       if (bsdf) {
         bsdf->N = bssrdf->N;
diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h
index a2519d97618..3d56e989522 100644
--- a/intern/cycles/kernel/closure/emissive.h
+++ b/intern/cycles/kernel/closure/emissive.h
@@ -36,7 +36,7 @@ CCL_NAMESPACE_BEGIN
 
 /* BACKGROUND CLOSURE */
 
-ccl_device void background_setup(ShaderData *sd, const float3 weight)
+ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight)
 {
   if (sd->flag & SD_EMISSION) {
     sd->closure_emission_background += weight;
@@ -49,7 +49,7 @@ ccl_device void background_setup(ShaderData *sd, const float3 weight)
 
 /* EMISSION CLOSURE */
 
-ccl_device void emission_setup(ShaderData *sd, const float3 weight)
+ccl_device void emission_setup(ccl_private ShaderData *sd, const float3 weight)
 {
   if (sd->flag & SD_EMISSION) {
     sd->closure_emission_background += weight;
@@ -69,8 +69,11 @@ ccl_device float emissive_pdf(const float3 Ng, const float3 I)
   return (cosNO > 0.0f) ? 1.0f : 0.0f;
 }
 
-ccl_device void emissive_sample(
-    const float3 Ng, float randu, float randv, float3 *omega_out, float *pdf)
+ccl_device void emissive_sample(const float3 Ng,
+                                float randu,
+                                float randv,
+                                ccl_private float3 *omega_out,
+                                ccl_private float *pdf)
 {
   /* todo: not implemented and used yet */
 }
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 69959a3f21b..023fb3ac4ea 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* VOLUME EXTINCTION */
 
-ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
+ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, float3 weight)
 {
   if (sd->flag & SD_EXTINCTION) {
     sd->closure_transparent_extinction += weight;
@@ -33,7 +33,7 @@ ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight)
 
 /* HENYEY-GREENSTEIN CLOSURE */
 
-typedef ccl_addr_space struct HenyeyGreensteinVolume {
+typedef struct HenyeyGreensteinVolume {
   SHADER_CLOSURE_BASE;
 
   float g;
@@ -51,7 +51,7 @@ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g)
          (M_1_PI_F * 0.25f);
 };
 
-ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
+ccl_device int volume_henyey_greenstein_setup(ccl_private HenyeyGreensteinVolume *volume)
 {
   volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID;
 
@@ -61,10 +61,10 @@ ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
   return SD_SCATTER;
 }
 
-ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderVolumeClosure *svc,
+ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc,
                                                       const float3 I,
                                                       float3 omega_in,
-                                                      float *pdf)
+                                                      ccl_private float *pdf)
 {
   float g = svc->g;
 
@@ -81,7 +81,7 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderVolumeClosure
 }
 
 ccl_device float3
-henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf)
+henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_private float *pdf)
 {
   /* match pdf for small g */
   float cos_theta;
@@ -112,17 +112,17 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pd
   return dir;
 }
 
-ccl_device int volume_henyey_greenstein_sample(const ShaderVolumeClosure *svc,
+ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc,
                                                float3 I,
                                                float3 dIdx,
                                                float3 dIdy,
                                                float randu,
                                                float randv,
-                                               float3 *eval,
-                                               float3 *omega_in,
-                                               float3 *domega_in_dx,
-                                               float3 *domega_in_dy,
-                                               float *pdf)
+                                               ccl_private float3 *eval,
+                                               ccl_private float3 *omega_in,
+                                               ccl_private float3 *domega_in_dx,
+                                               ccl_private float3 *domega_in_dy,
+                                               ccl_private float *pdf)
 {
   float g = svc->g;
 
@@ -141,22 +141,22 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderVolumeClosure *svc,
 
 /* VOLUME CLOSURE */
 
-ccl_device float3 volume_phase_eval(const ShaderData *sd,
-                                    const ShaderVolumeClosure *svc,
+ccl_device float3 volume_phase_eval(ccl_private const ShaderData *sd,
+                                    ccl_private const ShaderVolumeClosure *svc,
                                     float3 omega_in,
-                                    float *pdf)
+                                    ccl_private float *pdf)
 {
   return volume_henyey_greenstein_eval_phase(svc, sd->I, omega_in, pdf);
 }
 
-ccl_device int volume_phase_sample(const ShaderData *sd,
-                                   const ShaderVolumeClosure *svc,
+ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
+                                   ccl_private const ShaderVolumeClosure *svc,
                                    float randu,
                                    float randv,
-                                   float3 *eval,
-                                   float3 *omega_in,
-                                   differential3 *domega_in,
-                                   float *pdf)
+                                   ccl_private float3 *eval,
+                                   ccl_private float3 *omega_in,
+                                   ccl_private differential3 *domega_in,
+                                   ccl_private float *pdf)
 {
   return volume_henyey_greenstein_sample(svc,
                                          sd->I,
@@ -187,7 +187,10 @@ ccl_device float volume_channel_get(float3 value, int channel)
   return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
 }
 
-ccl_device int volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf)
+ccl_device int volume_sample_channel(float3 albedo,
+                                     float3 throughput,
+                                     float rand,
+                                     ccl_private float3 *pdf)
 {
   /* Sample color channel proportional to throughput and single scattering
    * albedo, to significantly reduce noise with many bounce, following:
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index bfd936c7bbd..888c0d5d872 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -32,8 +32,6 @@
 #include "util/util_texture.h"
 #include "util/util_types.h"
 
-#define ccl_addr_space
-
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
  */
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h
index 3c85a8e7bd2..685c7a5b753 100644
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -59,7 +59,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h
index 95338fe7d6e..089976d84e4 100644
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -52,7 +52,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
new file mode 100644
index 00000000000..77cea30914c
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#define __KERNEL_GPU__
+#define __KERNEL_METAL__
+#define CCL_NAMESPACE_BEGIN
+#define CCL_NAMESPACE_END
+
+#ifndef ATTR_FALLTHROUGH
+#  define ATTR_FALLTHROUGH
+#endif
+
+#include <metal_atomic>
+#include <metal_pack>
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wsign-compare"
+
+/* Qualifiers */
+
+#define ccl_device
+#define ccl_device_inline ccl_device
+#define ccl_device_forceinline ccl_device
+#define ccl_device_noinline ccl_device __attribute__((noinline))
+#define ccl_device_noinline_cpu ccl_device
+#define ccl_global device
+#define ccl_static_constant static constant constexpr
+#define ccl_device_constant constant
+#define ccl_constant const device
+#define ccl_gpu_shared threadgroup
+#define ccl_private thread
+#define ccl_may_alias
+#define ccl_restrict __restrict
+#define ccl_loop_no_unroll
+#define ccl_align(n) alignas(n)
+#define ccl_optional_struct_init
+
+/* No assert supported for Metal */
+
+#define kernel_assert(cond)
+
+/* make_type definitions with Metal style element initializers */
+#ifdef make_float2
+#  undef make_float2
+#endif
+#ifdef make_float3
+#  undef make_float3
+#endif
+#ifdef make_float4
+#  undef make_float4
+#endif
+#ifdef make_int2
+#  undef make_int2
+#endif
+#ifdef make_int3
+#  undef make_int3
+#endif
+#ifdef make_int4
+#  undef make_int4
+#endif
+#ifdef make_uchar4
+#  undef make_uchar4
+#endif
+
+#define make_float2(x, y) float2(x, y)
+#define make_float3(x, y, z) float3(x, y, z)
+#define make_float4(x, y, z, w) float4(x, y, z, w)
+#define make_int2(x, y) int2(x, y)
+#define make_int3(x, y, z) int3(x, y, z)
+#define make_int4(x, y, z, w) int4(x, y, z, w)
+#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+
+/* Math functions */
+
+#define __uint_as_float(x) as_type<float>(x)
+#define __float_as_uint(x) as_type<uint>(x)
+#define __int_as_float(x) as_type<float>(x)
+#define __float_as_int(x) as_type<int>(x)
+#define __float2half(x) half(x)
+#define powf(x, y) pow(float(x), float(y))
+#define fabsf(x) fabs(float(x))
+#define copysignf(x, y) copysign(float(x), float(y))
+#define asinf(x) asin(float(x))
+#define acosf(x) acos(float(x))
+#define atanf(x) atan(float(x))
+#define floorf(x) floor(float(x))
+#define ceilf(x) ceil(float(x))
+#define hypotf(x, y) hypot(float(x), float(y))
+#define atan2f(x, y) atan2(float(x), float(y))
+#define fmaxf(x, y) fmax(float(x), float(y))
+#define fminf(x, y) fmin(float(x), float(y))
+#define fmodf(x, y) fmod(float(x), float(y))
+#define sinhf(x) sinh(float(x))
+#define coshf(x) cosh(float(x))
+#define tanhf(x) tanh(float(x))
+
+/* Use native functions with possibly lower precision for performance,
+ * no issues found so far. */
+#define trigmode fast
+#define sinf(x) trigmode::sin(float(x))
+#define cosf(x) trigmode::cos(float(x))
+#define tanf(x) trigmode::tan(float(x))
+#define expf(x) trigmode::exp(float(x))
+#define sqrtf(x) trigmode::sqrt(float(x))
+#define logf(x) trigmode::log(float(x))
+
+#define NULL 0
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index fb9e094b535..c9ec9be05df 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -58,7 +58,6 @@ typedef unsigned long long uint64_t;
 #define ccl_gpu_shared __shared__
 #define ccl_private
 #define ccl_may_alias
-#define ccl_addr_space
 #define ccl_restrict __restrict__
 #define ccl_loop_no_unroll
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 9532a21fec7..850ac44e6e0 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -27,9 +27,11 @@ CCL_NAMESPACE_BEGIN
  * Lookup of attributes is different between OSL and SVM, as OSL is ustring
  * based while for SVM we use integer ids. */
 
-ccl_device_inline uint subd_triangle_patch(const KernelGlobals *kg, const ShaderData *sd);
+ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd);
 
-ccl_device_inline uint attribute_primitive_type(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline uint attribute_primitive_type(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd)
 {
   if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
     return ATTR_PRIM_SUBD;
@@ -48,13 +50,13 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
 
 /* Find attribute based on ID */
 
-ccl_device_inline uint object_attribute_map_offset(const KernelGlobals *kg, int object)
+ccl_device_inline uint object_attribute_map_offset(ccl_global const KernelGlobals *kg, int object)
 {
   return kernel_tex_fetch(__objects, object).attribute_map_offset;
 }
 
-ccl_device_inline AttributeDescriptor find_attribute(const KernelGlobals *kg,
-                                                     const ShaderData *sd,
+ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlobals *kg,
+                                                     ccl_private const ShaderData *sd,
                                                      uint id)
 {
   if (sd->object == OBJECT_NONE) {
@@ -100,8 +102,8 @@ ccl_device_inline AttributeDescriptor find_attribute(const KernelGlobals *kg,
 
 /* Transform matrix attribute on meshes */
 
-ccl_device Transform primitive_attribute_matrix(const KernelGlobals *kg,
-                                                const ShaderData *sd,
+ccl_device Transform primitive_attribute_matrix(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd,
                                                 const AttributeDescriptor desc)
 {
   Transform tfm;
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 811558edae9..07f218d781b 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -27,11 +27,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Reading attributes on various curve elements */
 
-ccl_device float curve_attribute_float(const KernelGlobals *kg,
-                                       const ShaderData *sd,
+ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg,
+                                       ccl_private const ShaderData *sd,
                                        const AttributeDescriptor desc,
-                                       float *dx,
-                                       float *dy)
+                                       ccl_private float *dx,
+                                       ccl_private float *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -69,11 +69,11 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 curve_attribute_float2(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float2 *dx,
-                                         float2 *dy)
+                                         ccl_private float2 *dx,
+                                         ccl_private float2 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -115,11 +115,11 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 curve_attribute_float3(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float3 *dx,
-                                         float3 *dy)
+                                         ccl_private float3 *dx,
+                                         ccl_private float3 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -157,11 +157,11 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 curve_attribute_float4(const KernelGlobals *kg,
-                                         const ShaderData *sd,
+ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
-                                         float4 *dx,
-                                         float4 *dy)
+                                         ccl_private float4 *dx,
+                                         ccl_private float4 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
     KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
@@ -201,7 +201,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg,
 
 /* Curve thickness */
 
-ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float curve_thickness(ccl_global const KernelGlobals *kg,
+                                 ccl_private const ShaderData *sd)
 {
   float r = 0.0f;
 
@@ -229,7 +230,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
 /* Curve location for motion pass, linear interpolation between keys and
  * ignoring radius because we do the same for the motion keys */
 
-ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderData *sd)
 {
   KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
   int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
@@ -245,7 +247,8 @@ ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const Sh
 
 /* Curve tangent normal */
 
-ccl_device float3 curve_tangent_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 curve_tangent_normal(ccl_global const KernelGlobals *kg,
+                                       ccl_private const ShaderData *sd)
 {
   float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
 
@@ -265,12 +268,12 @@ ccl_device float3 curve_tangent_normal(const KernelGlobals *kg, const ShaderData
 
 /* Curve bounds utility function */
 
-ccl_device_inline void curvebounds(float *lower,
-                                   float *upper,
-                                   float *extremta,
-                                   float *extrema,
-                                   float *extremtb,
-                                   float *extremb,
+ccl_device_inline void curvebounds(ccl_private float *lower,
+                                   ccl_private float *upper,
+                                   ccl_private float *extremta,
+                                   ccl_private float *extrema,
+                                   ccl_private float *extremtb,
+                                   ccl_private float *extremb,
                                    float p0,
                                    float p1,
                                    float p2,
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 30addb9616d..04af8ea1421 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -86,11 +86,11 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
                                           const float3 cylinder_end,
                                           const float cylinder_radius,
                                           const float3 ray_dir,
-                                          float2 *t_o,
-                                          float *u0_o,
-                                          float3 *Ng0_o,
-                                          float *u1_o,
-                                          float3 *Ng1_o)
+                                          ccl_private float2 *t_o,
+                                          ccl_private float *u0_o,
+                                          ccl_private float3 *Ng0_o,
+                                          ccl_private float *u1_o,
+                                          ccl_private float3 *Ng1_o)
 {
   /* Calculate quadratic equation to solve. */
   const float rl = 1.0f / len(cylinder_end - cylinder_start);
@@ -169,13 +169,13 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
 }
 
 ccl_device bool curve_intersect_iterative(const float3 ray_dir,
-                                          float *ray_tfar,
+                                          ccl_private float *ray_tfar,
                                           const float dt,
                                           const float4 curve[4],
                                           float u,
                                           float t,
                                           const bool use_backfacing,
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
   const float length_ray_dir = len(ray_dir);
 
@@ -265,7 +265,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
                                           const float3 ray_dir,
                                           float ray_tfar,
                                           float4 curve[4],
-                                          Intersection *isect)
+                                          ccl_private Intersection *isect)
 {
   /* Move ray closer to make intersection stable. */
   const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
@@ -474,9 +474,9 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
                                              const float3 quad_v1,
                                              const float3 quad_v2,
                                              const float3 quad_v3,
-                                             float *u_o,
-                                             float *v_o,
-                                             float *t_o)
+                                             ccl_private float *u_o,
+                                             ccl_private float *v_o,
+                                             ccl_private float *t_o)
 {
   /* Calculate vertices relative to ray origin? */
   const float3 O = make_float3(0.0f, 0.0f, 0.0f);
@@ -550,7 +550,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
                                         float ray_tfar,
                                         const int N,
                                         float4 curve[4],
-                                        Intersection *isect)
+                                        ccl_private Intersection *isect)
 {
   /* Transform control points into ray space. */
   float3 ray_space[3];
@@ -625,8 +625,8 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
   return false;
 }
 
-ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
-                                            Intersection *isect,
+ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg,
+                                            ccl_private Intersection *isect,
                                             const float3 P,
                                             const float3 dir,
                                             const float tmax,
@@ -679,8 +679,8 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
   }
 }
 
-ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
-                                          ShaderData *sd,
+ccl_device_inline void curve_shader_setup(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
                                           float3 P,
                                           float3 D,
                                           float t,
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 5294da03145..8e32df439cd 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-ccl_device_inline int find_attribute_curve_motion(const KernelGlobals *kg,
+ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg,
                                                   int object,
                                                   uint id,
-                                                  AttributeElement *elem)
+                                                  ccl_private AttributeElement *elem)
 {
   /* todo: find a better (faster) solution for this, maybe store offset per object.
    *
@@ -52,7 +52,7 @@ ccl_device_inline int find_attribute_curve_motion(const KernelGlobals *kg,
   return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_curve_keys_for_step_linear(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
                                                          int offset,
                                                          int numkeys,
                                                          int numsteps,
@@ -79,8 +79,13 @@ ccl_device_inline void motion_curve_keys_for_step_linear(const KernelGlobals *kg
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys_linear(
-    const KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *kg,
+                                                int object,
+                                                int prim,
+                                                float time,
+                                                int k0,
+                                                int k1,
+                                                float4 keys[2])
 {
   /* get motion info */
   int numsteps, numkeys;
@@ -107,7 +112,7 @@ ccl_device_inline void motion_curve_keys_linear(
   keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
 }
 
-ccl_device_inline void motion_curve_keys_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals *kg,
                                                   int offset,
                                                   int numkeys,
                                                   int numsteps,
@@ -140,7 +145,7 @@ ccl_device_inline void motion_curve_keys_for_step(const KernelGlobals *kg,
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
                                          int object,
                                          int prim,
                                          float time,
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index b7f182090aa..161b358110d 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -31,10 +31,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Time interpolation of vertex positions and normals */
 
-ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
+ccl_device_inline int find_attribute_motion(ccl_global const KernelGlobals *kg,
                                             int object,
                                             uint id,
-                                            AttributeElement *elem)
+                                            ccl_private AttributeElement *elem)
 {
   /* todo: find a better (faster) solution for this, maybe store offset per object */
   uint attr_offset = object_attribute_map_offset(kg, object);
@@ -62,7 +62,7 @@ ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
   return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
 }
 
-ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg,
                                                       uint4 tri_vindex,
                                                       int offset,
                                                       int numverts,
@@ -89,7 +89,7 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg,
   }
 }
 
-ccl_device_inline void motion_triangle_normals_for_step(const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelGlobals *kg,
                                                         uint4 tri_vindex,
                                                         int offset,
                                                         int numverts,
@@ -117,7 +117,7 @@ ccl_device_inline void motion_triangle_normals_for_step(const KernelGlobals *kg,
 }
 
 ccl_device_inline void motion_triangle_vertices(
-    const KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
 {
   /* get motion info */
   int numsteps, numverts;
@@ -146,8 +146,13 @@ ccl_device_inline void motion_triangle_vertices(
   verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
 }
 
-ccl_device_inline float3 motion_triangle_smooth_normal(
-    const KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time)
+ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGlobals *kg,
+                                                       float3 Ng,
+                                                       int object,
+                                                       int prim,
+                                                       float u,
+                                                       float v,
+                                                       float time)
 {
   /* get motion info */
   int numsteps, numverts;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 6fb9756ff92..94d00875f0a 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -34,8 +34,8 @@ CCL_NAMESPACE_BEGIN
  * a closer distance.
  */
 
-ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg,
-                                                ShaderData *sd,
+ccl_device_inline float3 motion_triangle_refine(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
                                                 float3 P,
                                                 float3 D,
                                                 float t,
@@ -92,8 +92,8 @@ ccl_device_noinline
 ccl_device_inline
 #  endif
     float3
-    motion_triangle_refine_local(const KernelGlobals *kg,
-                                 ShaderData *sd,
+    motion_triangle_refine_local(ccl_global const KernelGlobals *kg,
+                                 ccl_private ShaderData *sd,
                                  float3 P,
                                  float3 D,
                                  float t,
@@ -145,8 +145,8 @@ ccl_device_inline
  * time and do a ray intersection with the resulting triangle.
  */
 
-ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg,
-                                                 Intersection *isect,
+ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals *kg,
+                                                 ccl_private Intersection *isect,
                                                  float3 P,
                                                  float3 dir,
                                                  float tmax,
@@ -202,8 +202,8 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg,
  * Returns whether traversal should be stopped.
  */
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
-                                                       LocalIntersection *local_isect,
+ccl_device_inline bool motion_triangle_intersect_local(ccl_global const KernelGlobals *kg,
+                                                       ccl_private LocalIntersection *local_isect,
                                                        float3 P,
                                                        float3 dir,
                                                        float time,
@@ -211,7 +211,7 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
                                                        int local_object,
                                                        int prim_addr,
                                                        float tmax,
-                                                       uint *lcg_state,
+                                                       ccl_private uint *lcg_state,
                                                        int max_hits)
 {
   /* Only intersect with matching object, for instanced objects we
@@ -285,7 +285,7 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
   }
 
   /* Record intersection. */
-  Intersection *isect = &local_isect->hits[hit];
+  ccl_private Intersection *isect = &local_isect->hits[hit];
   isect->t = t;
   isect->u = u;
   isect->v = v;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
index 85c4f0ca522..03bb1fba2a2 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -34,8 +34,8 @@ CCL_NAMESPACE_BEGIN
  * normals */
 
 /* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(const KernelGlobals *kg,
-                                                      ShaderData *sd,
+ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlobals *kg,
+                                                      ccl_private ShaderData *sd,
                                                       const float3 P,
                                                       const float3 D,
                                                       const float ray_t,
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 7d6ad7b4fe3..730c01d4709 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -37,7 +37,7 @@ enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST
 
 /* Object to world space transformation */
 
-ccl_device_inline Transform object_fetch_transform(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobals *kg,
                                                    int object,
                                                    enum ObjectTransform type)
 {
@@ -51,7 +51,9 @@ ccl_device_inline Transform object_fetch_transform(const KernelGlobals *kg,
 
 /* Lamp to world space transformation */
 
-ccl_device_inline Transform lamp_fetch_transform(const KernelGlobals *kg, int lamp, bool inverse)
+ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals *kg,
+                                                 int lamp,
+                                                 bool inverse)
 {
   if (inverse) {
     return kernel_tex_fetch(__lights, lamp).itfm;
@@ -63,7 +65,7 @@ ccl_device_inline Transform lamp_fetch_transform(const KernelGlobals *kg, int la
 
 /* Object to world space transformation for motion vectors */
 
-ccl_device_inline Transform object_fetch_motion_pass_transform(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const KernelGlobals *kg,
                                                                int object,
                                                                enum ObjectVectorTransform type)
 {
@@ -74,12 +76,12 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(const KernelGloba
 /* Motion blurred object transformations */
 
 #ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform_motion(ccl_global const KernelGlobals *kg,
                                                           int object,
                                                           float time)
 {
   const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
-  const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+  ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
   const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
 
   Transform tfm;
@@ -88,10 +90,10 @@ ccl_device_inline Transform object_fetch_transform_motion(const KernelGlobals *k
   return tfm;
 }
 
-ccl_device_inline Transform object_fetch_transform_motion_test(const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const KernelGlobals *kg,
                                                                int object,
                                                                float time,
-                                                               Transform *itfm)
+                                                               ccl_private Transform *itfm)
 {
   int object_flag = kernel_tex_fetch(__object_flag, object);
   if (object_flag & SD_OBJECT_MOTION) {
@@ -115,7 +117,8 @@ ccl_device_inline Transform object_fetch_transform_motion_test(const KernelGloba
 
 /* Get transform matrix for shading point. */
 
-ccl_device_inline Transform object_get_transform(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
   return (sd->object_flag & SD_OBJECT_MOTION) ?
@@ -126,8 +129,8 @@ ccl_device_inline Transform object_get_transform(const KernelGlobals *kg, const
 #endif
 }
 
-ccl_device_inline Transform object_get_inverse_transform(const KernelGlobals *kg,
-                                                         const ShaderData *sd)
+ccl_device_inline Transform object_get_inverse_transform(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
   return (sd->object_flag & SD_OBJECT_MOTION) ?
@@ -139,9 +142,9 @@ ccl_device_inline Transform object_get_inverse_transform(const KernelGlobals *kg
 }
 /* Transform position from object to world space */
 
-ccl_device_inline void object_position_transform(const KernelGlobals *kg,
-                                                 const ShaderData *sd,
-                                                 float3 *P)
+ccl_device_inline void object_position_transform(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd,
+                                                 ccl_private float3 *P)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -156,9 +159,9 @@ ccl_device_inline void object_position_transform(const KernelGlobals *kg,
 
 /* Transform position from world to object space */
 
-ccl_device_inline void object_inverse_position_transform(const KernelGlobals *kg,
-                                                         const ShaderData *sd,
-                                                         float3 *P)
+ccl_device_inline void object_inverse_position_transform(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd,
+                                                         ccl_private float3 *P)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -173,9 +176,9 @@ ccl_device_inline void object_inverse_position_transform(const KernelGlobals *kg
 
 /* Transform normal from world to object space */
 
-ccl_device_inline void object_inverse_normal_transform(const KernelGlobals *kg,
-                                                       const ShaderData *sd,
-                                                       float3 *N)
+ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGlobals *kg,
+                                                       ccl_private const ShaderData *sd,
+                                                       ccl_private float3 *N)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -198,9 +201,9 @@ ccl_device_inline void object_inverse_normal_transform(const KernelGlobals *kg,
 
 /* Transform normal from object to world space */
 
-ccl_device_inline void object_normal_transform(const KernelGlobals *kg,
-                                               const ShaderData *sd,
-                                               float3 *N)
+ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *kg,
+                                               ccl_private const ShaderData *sd,
+                                               ccl_private float3 *N)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -215,9 +218,9 @@ ccl_device_inline void object_normal_transform(const KernelGlobals *kg,
 
 /* Transform direction vector from object to world space */
 
-ccl_device_inline void object_dir_transform(const KernelGlobals *kg,
-                                            const ShaderData *sd,
-                                            float3 *D)
+ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
+                                            ccl_private float3 *D)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -232,9 +235,9 @@ ccl_device_inline void object_dir_transform(const KernelGlobals *kg,
 
 /* Transform direction vector from world to object space */
 
-ccl_device_inline void object_inverse_dir_transform(const KernelGlobals *kg,
-                                                    const ShaderData *sd,
-                                                    float3 *D)
+ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGlobals *kg,
+                                                    ccl_private const ShaderData *sd,
+                                                    ccl_private float3 *D)
 {
 #ifdef __OBJECT_MOTION__
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -249,7 +252,8 @@ ccl_device_inline void object_inverse_dir_transform(const KernelGlobals *kg,
 
 /* Object center position */
 
-ccl_device_inline float3 object_location(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg,
+                                         ccl_private const ShaderData *sd)
 {
   if (sd->object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -266,18 +270,18 @@ ccl_device_inline float3 object_location(const KernelGlobals *kg, const ShaderDa
 
 /* Color of the object */
 
-ccl_device_inline float3 object_color(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
 }
 
 /* Pass ID number of object */
 
-ccl_device_inline float object_pass_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -287,7 +291,7 @@ ccl_device_inline float object_pass_id(const KernelGlobals *kg, int object)
 
 /* Per lamp random number for shader variation */
 
-ccl_device_inline float lamp_random_number(const KernelGlobals *kg, int lamp)
+ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, int lamp)
 {
   if (lamp == LAMP_NONE)
     return 0.0f;
@@ -297,7 +301,7 @@ ccl_device_inline float lamp_random_number(const KernelGlobals *kg, int lamp)
 
 /* Per object random number for shader variation */
 
-ccl_device_inline float object_random_number(const KernelGlobals *kg, int object)
+ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -307,7 +311,7 @@ ccl_device_inline float object_random_number(const KernelGlobals *kg, int object
 
 /* Particle ID from which this object was generated */
 
-ccl_device_inline int object_particle_id(const KernelGlobals *kg, int object)
+ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -317,31 +321,34 @@ ccl_device_inline int object_particle_id(const KernelGlobals *kg, int object)
 
 /* Generated texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_generated(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(
       kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
 }
 
 /* UV texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_uv(const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
 
-  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
   return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
 }
 
 /* Information about mesh for motion blurred triangles and curves */
 
-ccl_device_inline void object_motion_info(
-    const KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
+ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg,
+                                          int object,
+                                          ccl_private int *numsteps,
+                                          ccl_private int *numverts,
+                                          ccl_private int *numkeys)
 {
   if (numkeys) {
     *numkeys = kernel_tex_fetch(__objects, object).numkeys;
@@ -355,7 +362,7 @@ ccl_device_inline void object_motion_info(
 
 /* Offset to an objects patch map */
 
-ccl_device_inline uint object_patch_map_offset(const KernelGlobals *kg, int object)
+ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -365,7 +372,7 @@ ccl_device_inline uint object_patch_map_offset(const KernelGlobals *kg, int obje
 
 /* Volume step size */
 
-ccl_device_inline float object_volume_density(const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE) {
     return 1.0f;
@@ -374,7 +381,7 @@ ccl_device_inline float object_volume_density(const KernelGlobals *kg, int objec
   return kernel_tex_fetch(__objects, object).volume_density;
 }
 
-ccl_device_inline float object_volume_step_size(const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE) {
     return kernel_data.background.volume_step_size;
@@ -385,14 +392,14 @@ ccl_device_inline float object_volume_step_size(const KernelGlobals *kg, int obj
 
 /* Pass ID for shader */
 
-ccl_device int shader_pass_id(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device int shader_pass_id(ccl_global const KernelGlobals *kg, ccl_private const ShaderData *sd)
 {
   return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
 }
 
 /* Cryptomatte ID */
 
-ccl_device_inline float object_cryptomatte_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -400,7 +407,7 @@ ccl_device_inline float object_cryptomatte_id(const KernelGlobals *kg, int objec
   return kernel_tex_fetch(__objects, object).cryptomatte_object;
 }
 
-ccl_device_inline float object_cryptomatte_asset_id(const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGlobals *kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -410,42 +417,42 @@ ccl_device_inline float object_cryptomatte_asset_id(const KernelGlobals *kg, int
 
 /* Particle data from which object was instanced */
 
-ccl_device_inline uint particle_index(const KernelGlobals *kg, int particle)
+ccl_device_inline uint particle_index(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).index;
 }
 
-ccl_device float particle_age(const KernelGlobals *kg, int particle)
+ccl_device float particle_age(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).age;
 }
 
-ccl_device float particle_lifetime(const KernelGlobals *kg, int particle)
+ccl_device float particle_lifetime(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).lifetime;
 }
 
-ccl_device float particle_size(const KernelGlobals *kg, int particle)
+ccl_device float particle_size(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).size;
 }
 
-ccl_device float4 particle_rotation(const KernelGlobals *kg, int particle)
+ccl_device float4 particle_rotation(ccl_global const KernelGlobals *kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).rotation;
 }
 
-ccl_device float3 particle_location(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_location(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
 }
 
-ccl_device float3 particle_velocity(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_velocity(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
 }
 
-ccl_device float3 particle_angular_velocity(const KernelGlobals *kg, int particle)
+ccl_device float3 particle_angular_velocity(ccl_global const KernelGlobals *kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
 }
@@ -467,8 +474,12 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
 
 /* Transform ray into object space to enter static object in BVH */
 
-ccl_device_inline float bvh_instance_push(
-    const KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir)
+ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg,
+                                          int object,
+                                          ccl_private const Ray *ray,
+                                          ccl_private float3 *P,
+                                          ccl_private float3 *dir,
+                                          ccl_private float3 *idir)
 {
   Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
@@ -483,12 +494,12 @@ ccl_device_inline float bvh_instance_push(
 
 /* Transform ray to exit static object in BVH. */
 
-ccl_device_inline float bvh_instance_pop(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg,
                                          int object,
-                                         const Ray *ray,
-                                         float3 *P,
-                                         float3 *dir,
-                                         float3 *idir,
+                                         ccl_private const Ray *ray,
+                                         ccl_private float3 *P,
+                                         ccl_private float3 *dir,
+                                         ccl_private float3 *idir,
                                          float t)
 {
   if (t != FLT_MAX) {
@@ -505,13 +516,13 @@ ccl_device_inline float bvh_instance_pop(const KernelGlobals *kg,
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_pop_factor(const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *kg,
                                                int object,
-                                               const Ray *ray,
-                                               float3 *P,
-                                               float3 *dir,
-                                               float3 *idir,
-                                               float *t_fac)
+                                               ccl_private const Ray *ray,
+                                               ccl_private float3 *P,
+                                               ccl_private float3 *dir,
+                                               ccl_private float3 *idir,
+                                               ccl_private float *t_fac)
 {
   Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
   *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
@@ -524,13 +535,13 @@ ccl_device_inline void bvh_instance_pop_factor(const KernelGlobals *kg,
 #ifdef __OBJECT_MOTION__
 /* Transform ray into object space to enter motion blurred object in BVH */
 
-ccl_device_inline float bvh_instance_motion_push(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals *kg,
                                                  int object,
-                                                 const Ray *ray,
-                                                 float3 *P,
-                                                 float3 *dir,
-                                                 float3 *idir,
-                                                 Transform *itfm)
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private float3 *P,
+                                                 ccl_private float3 *dir,
+                                                 ccl_private float3 *idir,
+                                                 ccl_private Transform *itfm)
 {
   object_fetch_transform_motion_test(kg, object, ray->time, itfm);
 
@@ -545,14 +556,14 @@ ccl_device_inline float bvh_instance_motion_push(const KernelGlobals *kg,
 
 /* Transform ray to exit motion blurred object in BVH. */
 
-ccl_device_inline float bvh_instance_motion_pop(const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals *kg,
                                                 int object,
-                                                const Ray *ray,
-                                                float3 *P,
-                                                float3 *dir,
-                                                float3 *idir,
+                                                ccl_private const Ray *ray,
+                                                ccl_private float3 *P,
+                                                ccl_private float3 *dir,
+                                                ccl_private float3 *idir,
                                                 float t,
-                                                Transform *itfm)
+                                                ccl_private Transform *itfm)
 {
   if (t != FLT_MAX) {
     t /= len(transform_direction(itfm, ray->D));
@@ -567,14 +578,14 @@ ccl_device_inline float bvh_instance_motion_pop(const KernelGlobals *kg,
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_motion_pop_factor(const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_motion_pop_factor(ccl_global const KernelGlobals *kg,
                                                       int object,
-                                                      const Ray *ray,
-                                                      float3 *P,
-                                                      float3 *dir,
-                                                      float3 *idir,
-                                                      float *t_fac,
-                                                      Transform *itfm)
+                                                      ccl_private const Ray *ray,
+                                                      ccl_private float3 *P,
+                                                      ccl_private float3 *dir,
+                                                      ccl_private float3 *idir,
+                                                      ccl_private float *t_fac,
+                                                      ccl_private Transform *itfm)
 {
   *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
   *P = ray->P;
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index ce0fc15f196..b54eafd6220 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -32,7 +32,9 @@ typedef struct PatchHandle {
   int array_index, patch_index, vert_index;
 } PatchHandle;
 
-ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v)
+ccl_device_inline int patch_map_resolve_quadrant(float median,
+                                                 ccl_private float *u,
+                                                 ccl_private float *v)
 {
   int quadrant = -1;
 
@@ -62,7 +64,7 @@ ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *
 /* retrieve PatchHandle from patch coords */
 
 ccl_device_inline PatchHandle
-patch_map_find_patch(const KernelGlobals *kg, int object, int patch, float u, float v)
+patch_map_find_patch(ccl_global const KernelGlobals *kg, int object, int patch, float u, float v)
 {
   PatchHandle handle;
 
@@ -108,7 +110,9 @@ patch_map_find_patch(const KernelGlobals *kg, int object, int patch, float u, fl
   return handle;
 }
 
-ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv)
+ccl_device_inline void patch_eval_bspline_weights(float t,
+                                                  ccl_private float *point,
+                                                  ccl_private float *deriv)
 {
   /* The four uniform cubic B-Spline basis functions evaluated at t */
   float inv_6 = 1.0f / 6.0f;
@@ -128,7 +132,9 @@ ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *
   deriv[3] = 0.5f * t2;
 }
 
-ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t)
+ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits,
+                                                          ccl_private float *s,
+                                                          ccl_private float *t)
 {
   int boundary = ((bits >> 8) & 0xf);
 
@@ -175,7 +181,9 @@ ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
   }
 }
 
-ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v)
+ccl_device_inline void patch_eval_normalize_coords(uint patch_bits,
+                                                   ccl_private float *u,
+                                                   ccl_private float *v)
 {
   float frac = patch_eval_param_fraction(patch_bits);
 
@@ -193,8 +201,8 @@ ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, fl
 
 /* retrieve patch control indices */
 
-ccl_device_inline int patch_eval_indices(const KernelGlobals *kg,
-                                         const PatchHandle *handle,
+ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg,
+                                         ccl_private const PatchHandle *handle,
                                          int channel,
                                          int indices[PATCH_MAX_CONTROL_VERTS])
 {
@@ -210,8 +218,8 @@ ccl_device_inline int patch_eval_indices(const KernelGlobals *kg,
 
 /* evaluate patch basis functions */
 
-ccl_device_inline void patch_eval_basis(const KernelGlobals *kg,
-                                        const PatchHandle *handle,
+ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg,
+                                        ccl_private const PatchHandle *handle,
                                         float u,
                                         float v,
                                         float weights[PATCH_MAX_CONTROL_VERTS],
@@ -249,7 +257,7 @@ ccl_device_inline void patch_eval_basis(const KernelGlobals *kg,
 
 /* generic function for evaluating indices and weights from patch coords */
 
-ccl_device_inline int patch_eval_control_verts(const KernelGlobals *kg,
+ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *kg,
                                                int object,
                                                int patch,
                                                float u,
@@ -271,15 +279,15 @@ ccl_device_inline int patch_eval_control_verts(const KernelGlobals *kg,
 
 /* functions for evaluating attributes on patches */
 
-ccl_device float patch_eval_float(const KernelGlobals *kg,
-                                  const ShaderData *sd,
+ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg,
+                                  ccl_private const ShaderData *sd,
                                   int offset,
                                   int patch,
                                   float u,
                                   float v,
                                   int channel,
-                                  float *du,
-                                  float *dv)
+                                  ccl_private float *du,
+                                  ccl_private float *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -308,15 +316,15 @@ ccl_device float patch_eval_float(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float2 patch_eval_float2(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float2 *du,
-                                    float2 *dv)
+                                    ccl_private float2 *du,
+                                    ccl_private float2 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -345,15 +353,15 @@ ccl_device float2 patch_eval_float2(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float3 patch_eval_float3(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float3 *du,
-                                    float3 *dv)
+                                    ccl_private float3 *du,
+                                    ccl_private float3 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -382,15 +390,15 @@ ccl_device float3 patch_eval_float3(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_float4(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float4 *du,
-                                    float4 *dv)
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
@@ -419,15 +427,15 @@ ccl_device float4 patch_eval_float4(const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_uchar4(const KernelGlobals *kg,
-                                    const ShaderData *sd,
+ccl_device float4 patch_eval_uchar4(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
                                     float u,
                                     float v,
                                     int channel,
-                                    float4 *du,
-                                    float4 *dv)
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
 {
   int indices[PATCH_MAX_CONTROL_VERTS];
   float weights[PATCH_MAX_CONTROL_VERTS];
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index ba31b12e817..869b911f76f 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -31,11 +31,11 @@ CCL_NAMESPACE_BEGIN
  * attributes for performance, mainly for GPU performance to avoid bringing in
  * heavy volume interpolation code. */
 
-ccl_device_inline float primitive_surface_attribute_float(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_inline float primitive_surface_attribute_float(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float *dx,
-                                                          float *dy)
+                                                          ccl_private float *dx,
+                                                          ccl_private float *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -57,11 +57,11 @@ ccl_device_inline float primitive_surface_attribute_float(const KernelGlobals *k
   }
 }
 
-ccl_device_inline float2 primitive_surface_attribute_float2(const KernelGlobals *kg,
-                                                            const ShaderData *sd,
+ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const KernelGlobals *kg,
+                                                            ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float2 *dx,
-                                                            float2 *dy)
+                                                            ccl_private float2 *dx,
+                                                            ccl_private float2 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -83,11 +83,11 @@ ccl_device_inline float2 primitive_surface_attribute_float2(const KernelGlobals
   }
 }
 
-ccl_device_inline float3 primitive_surface_attribute_float3(const KernelGlobals *kg,
-                                                            const ShaderData *sd,
+ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                            ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
-                                                            float3 *dx,
-                                                            float3 *dy)
+                                                            ccl_private float3 *dx,
+                                                            ccl_private float3 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -109,11 +109,12 @@ ccl_device_inline float3 primitive_surface_attribute_float3(const KernelGlobals
   }
 }
 
-ccl_device_forceinline float4 primitive_surface_attribute_float4(const KernelGlobals *kg,
-                                                                 const ShaderData *sd,
-                                                                 const AttributeDescriptor desc,
-                                                                 float4 *dx,
-                                                                 float4 *dy)
+ccl_device_forceinline float4
+primitive_surface_attribute_float4(ccl_global const KernelGlobals *kg,
+                                   ccl_private const ShaderData *sd,
+                                   const AttributeDescriptor desc,
+                                   ccl_private float4 *dx,
+                                   ccl_private float4 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -142,14 +143,14 @@ ccl_device_forceinline float4 primitive_surface_attribute_float4(const KernelGlo
  * attributes for performance, mainly for GPU performance to avoid bringing in
  * heavy volume interpolation code. */
 
-ccl_device_inline bool primitive_is_volume_attribute(const ShaderData *sd,
+ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd,
                                                      const AttributeDescriptor desc)
 {
   return sd->type == PRIMITIVE_VOLUME;
 }
 
-ccl_device_inline float primitive_volume_attribute_float(const KernelGlobals *kg,
-                                                         const ShaderData *sd,
+ccl_device_inline float primitive_volume_attribute_float(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const ShaderData *sd,
                                                          const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -160,8 +161,8 @@ ccl_device_inline float primitive_volume_attribute_float(const KernelGlobals *kg
   }
 }
 
-ccl_device_inline float3 primitive_volume_attribute_float3(const KernelGlobals *kg,
-                                                           const ShaderData *sd,
+ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                           ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -172,8 +173,8 @@ ccl_device_inline float3 primitive_volume_attribute_float3(const KernelGlobals *
   }
 }
 
-ccl_device_inline float4 primitive_volume_attribute_float4(const KernelGlobals *kg,
-                                                           const ShaderData *sd,
+ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const KernelGlobals *kg,
+                                                           ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
   if (primitive_is_volume_attribute(sd, desc)) {
@@ -187,7 +188,8 @@ ccl_device_inline float4 primitive_volume_attribute_float4(const KernelGlobals *
 
 /* Default UV coordinate */
 
-ccl_device_inline float3 primitive_uv(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg,
+                                      ccl_private const ShaderData *sd)
 {
   const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
 
@@ -200,7 +202,10 @@ ccl_device_inline float3 primitive_uv(const KernelGlobals *kg, const ShaderData
 
 /* Ptex coordinates */
 
-ccl_device bool primitive_ptex(const KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
+ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg,
+                               ccl_private ShaderData *sd,
+                               ccl_private float2 *uv,
+                               ccl_private int *face_id)
 {
   /* storing ptex data as attributes is not memory efficient but simple for tests */
   const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
@@ -220,7 +225,7 @@ ccl_device bool primitive_ptex(const KernelGlobals *kg, ShaderData *sd, float2 *
 
 /* Surface tangent */
 
-ccl_device float3 primitive_tangent(const KernelGlobals *kg, ShaderData *sd)
+ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_private ShaderData *sd)
 {
 #ifdef __HAIR__
   if (sd->type & PRIMITIVE_ALL_CURVE)
@@ -252,7 +257,8 @@ ccl_device float3 primitive_tangent(const KernelGlobals *kg, ShaderData *sd)
 
 /* Motion vector for motion pass */
 
-ccl_device_inline float4 primitive_motion_vector(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline float4 primitive_motion_vector(ccl_global const KernelGlobals *kg,
+                                                 ccl_private const ShaderData *sd)
 {
   /* center position */
   float3 center;
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
index f78d194359d..2cf60e263c3 100644
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -25,8 +25,8 @@ CCL_NAMESPACE_BEGIN
 /* ShaderData setup from incoming ray */
 
 #ifdef __OBJECT_MOTION__
-ccl_device void shader_setup_object_transforms(const KernelGlobals *ccl_restrict kg,
-                                               ShaderData *ccl_restrict sd,
+ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *ccl_restrict kg,
+                                               ccl_private ShaderData *ccl_restrict sd,
                                                float time)
 {
   if (sd->object_flag & SD_OBJECT_MOTION) {
@@ -38,10 +38,10 @@ ccl_device void shader_setup_object_transforms(const KernelGlobals *ccl_restrict
 
 /* TODO: break this up if it helps reduce register pressure to load data from
  * global memory as we write it to shader-data. */
-ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict kg,
-                                             ShaderData *ccl_restrict sd,
-                                             const Ray *ccl_restrict ray,
-                                             const Intersection *ccl_restrict isect)
+ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl_restrict kg,
+                                             ccl_private ShaderData *ccl_restrict sd,
+                                             ccl_private const Ray *ccl_restrict ray,
+                                             ccl_private const Intersection *ccl_restrict isect)
 {
   /* Read intersection data into shader globals.
    *
@@ -135,8 +135,8 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
 
 /* ShaderData setup from position sampled on mesh */
 
-ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restrict kg,
-                                                ShaderData *ccl_restrict sd,
+ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
                                                 const float3 P,
                                                 const float3 Ng,
                                                 const float3 I,
@@ -247,8 +247,8 @@ ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restric
 
 /* ShaderData setup for displacement */
 
-ccl_device void shader_setup_from_displace(const KernelGlobals *ccl_restrict kg,
-                                           ShaderData *ccl_restrict sd,
+ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_restrict kg,
+                                           ccl_private ShaderData *ccl_restrict sd,
                                            int object,
                                            int prim,
                                            float u,
@@ -281,8 +281,9 @@ ccl_device void shader_setup_from_displace(const KernelGlobals *ccl_restrict kg,
 
 /* ShaderData setup from ray into background */
 
-ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_restrict kg,
-                                                    ShaderData *ccl_restrict sd,
+ccl_device_inline void shader_setup_from_background(ccl_global const KernelGlobals *ccl_restrict
+                                                        kg,
+                                                    ccl_private ShaderData *ccl_restrict sd,
                                                     const float3 ray_P,
                                                     const float3 ray_D,
                                                     const float ray_time)
@@ -325,9 +326,9 @@ ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_res
 /* ShaderData setup from point inside volume */
 
 #ifdef __VOLUME__
-ccl_device_inline void shader_setup_from_volume(const KernelGlobals *ccl_restrict kg,
-                                                ShaderData *ccl_restrict sd,
-                                                const Ray *ccl_restrict ray)
+ccl_device_inline void shader_setup_from_volume(ccl_global const KernelGlobals *ccl_restrict kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
+                                                ccl_private const Ray *ccl_restrict ray)
 {
 
   /* vectors */
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 877b2ece15b..927d630fe91 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,15 +22,16 @@ CCL_NAMESPACE_BEGIN
 
 /* Patch index for triangle, -1 if not subdivision triangle */
 
-ccl_device_inline uint subd_triangle_patch(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
 }
 
 /* UV coords of triangle within patch */
 
-ccl_device_inline void subd_triangle_patch_uv(const KernelGlobals *kg,
-                                              const ShaderData *sd,
+ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg,
+                                              ccl_private const ShaderData *sd,
                                               float2 uv[3])
 {
   uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
@@ -42,7 +43,7 @@ ccl_device_inline void subd_triangle_patch_uv(const KernelGlobals *kg,
 
 /* Vertex indices of patch */
 
-ccl_device_inline uint4 subd_triangle_patch_indices(const KernelGlobals *kg, int patch)
+ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGlobals *kg, int patch)
 {
   uint4 indices;
 
@@ -56,21 +57,22 @@ ccl_device_inline uint4 subd_triangle_patch_indices(const KernelGlobals *kg, int
 
 /* Originating face for patch */
 
-ccl_device_inline uint subd_triangle_patch_face(const KernelGlobals *kg, int patch)
+ccl_device_inline uint subd_triangle_patch_face(ccl_global const KernelGlobals *kg, int patch)
 {
   return kernel_tex_fetch(__patches, patch + 4);
 }
 
 /* Number of corners on originating face */
 
-ccl_device_inline uint subd_triangle_patch_num_corners(const KernelGlobals *kg, int patch)
+ccl_device_inline uint subd_triangle_patch_num_corners(ccl_global const KernelGlobals *kg,
+                                                       int patch)
 {
   return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
 }
 
 /* Indices of the four corners that are used by the patch */
 
-ccl_device_inline void subd_triangle_patch_corners(const KernelGlobals *kg,
+ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobals *kg,
                                                    int patch,
                                                    int corners[4])
 {
@@ -103,11 +105,11 @@ ccl_device_inline void subd_triangle_patch_corners(const KernelGlobals *kg,
 
 /* Reading attributes on various subdivision triangle elements */
 
-ccl_device_noinline float subd_triangle_attribute_float(const KernelGlobals *kg,
-                                                        const ShaderData *sd,
+ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelGlobals *kg,
+                                                        ccl_private const ShaderData *sd,
                                                         const AttributeDescriptor desc,
-                                                        float *dx,
-                                                        float *dy)
+                                                        ccl_private float *dx,
+                                                        ccl_private float *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -242,11 +244,11 @@ ccl_device_noinline float subd_triangle_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline float2 subd_triangle_attribute_float2(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float2 *dx,
-                                                          float2 *dy)
+                                                          ccl_private float2 *dx,
+                                                          ccl_private float2 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -385,11 +387,11 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(const KernelGlobals *k
   }
 }
 
-ccl_device_noinline float3 subd_triangle_attribute_float3(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float3 *dx,
-                                                          float3 *dy)
+                                                          ccl_private float3 *dx,
+                                                          ccl_private float3 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
@@ -527,11 +529,11 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(const KernelGlobals *k
   }
 }
 
-ccl_device_noinline float4 subd_triangle_attribute_float4(const KernelGlobals *kg,
-                                                          const ShaderData *sd,
+ccl_device_noinline float4 subd_triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+                                                          ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
-                                                          float4 *dx,
-                                                          float4 *dy)
+                                                          ccl_private float4 *dx,
+                                                          ccl_private float4 *dy)
 {
   int patch = subd_triangle_patch(kg, sd);
 
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 8edba46fd39..17f87b7c570 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -25,7 +25,8 @@
 CCL_NAMESPACE_BEGIN
 
 /* Normal on triangle. */
-ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd)
+ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
@@ -43,14 +44,14 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd
 }
 
 /* Point and normal on triangle. */
-ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
+ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg,
                                              int object,
                                              int prim,
                                              float u,
                                              float v,
-                                             float3 *P,
-                                             float3 *Ng,
-                                             int *shader)
+                                             ccl_private float3 *P,
+                                             ccl_private float3 *Ng,
+                                             ccl_private int *shader)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -75,7 +76,7 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
 
 /* Triangle vertex locations */
 
-ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3])
+ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int prim, float3 P[3])
 {
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
   P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
@@ -85,7 +86,7 @@ ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, floa
 
 /* Triangle vertex locations and vertex normals */
 
-ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg,
+ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlobals *kg,
                                                      int prim,
                                                      float3 P[3],
                                                      float3 N[3])
@@ -102,7 +103,7 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg,
 /* Interpolate smooth vertex normal from vertices */
 
 ccl_device_inline float3
-triangle_smooth_normal(const KernelGlobals *kg, float3 Ng, int prim, float u, float v)
+triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim, float u, float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -115,8 +116,12 @@ triangle_smooth_normal(const KernelGlobals *kg, float3 Ng, int prim, float u, fl
   return is_zero(N) ? Ng : N;
 }
 
-ccl_device_inline float3 triangle_smooth_normal_unnormalized(
-    const KernelGlobals *kg, const ShaderData *sd, float3 Ng, int prim, float u, float v)
+ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const KernelGlobals *kg,
+                                                             ccl_private const ShaderData *sd,
+                                                             float3 Ng,
+                                                             int prim,
+                                                             float u,
+                                                             float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -138,10 +143,10 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(
 
 /* Ray differentials on triangle */
 
-ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg,
+ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg,
                                        int prim,
-                                       ccl_addr_space float3 *dPdu,
-                                       ccl_addr_space float3 *dPdv)
+                                       ccl_private float3 *dPdu,
+                                       ccl_private float3 *dPdv)
 {
   /* fetch triangle vertex coordinates */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -156,11 +161,11 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg,
 
 /* Reading attributes on various triangle elements */
 
-ccl_device float triangle_attribute_float(const KernelGlobals *kg,
-                                          const ShaderData *sd,
+ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc,
-                                          float *dx,
-                                          float *dy)
+                                          ccl_private float *dx,
+                                          ccl_private float *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float f0, f1, f2;
@@ -206,11 +211,11 @@ ccl_device float triangle_attribute_float(const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 triangle_attribute_float2(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float2 *dx,
-                                            float2 *dy)
+                                            ccl_private float2 *dx,
+                                            ccl_private float2 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float2 f0, f1, f2;
@@ -256,11 +261,11 @@ ccl_device float2 triangle_attribute_float2(const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 triangle_attribute_float3(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float3 *dx,
-                                            float3 *dy)
+                                            ccl_private float3 *dx,
+                                            ccl_private float3 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
     float3 f0, f1, f2;
@@ -306,11 +311,11 @@ ccl_device float3 triangle_attribute_float3(const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 triangle_attribute_float4(const KernelGlobals *kg,
-                                            const ShaderData *sd,
+ccl_device float4 triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+                                            ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
-                                            float4 *dx,
-                                            float4 *dy)
+                                            ccl_private float4 *dx,
+                                            ccl_private float4 *dy)
 {
   if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER |
                       ATTR_ELEMENT_CORNER_BYTE)) {
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index b784cc75d08..f637206da19 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -26,8 +26,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
-                                          Intersection *isect,
+ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg,
+                                          ccl_private Intersection *isect,
                                           float3 P,
                                           float3 dir,
                                           float tmax,
@@ -85,15 +85,15 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
  */
 
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
-                                                LocalIntersection *local_isect,
+ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals *kg,
+                                                ccl_private LocalIntersection *local_isect,
                                                 float3 P,
                                                 float3 dir,
                                                 int object,
                                                 int local_object,
                                                 int prim_addr,
                                                 float tmax,
-                                                uint *lcg_state,
+                                                ccl_private uint *lcg_state,
                                                 int max_hits)
 {
   /* Only intersect with matching object, for instanced objects we
@@ -169,7 +169,7 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
   }
 
   /* Record intersection. */
-  Intersection *isect = &local_isect->hits[hit];
+  ccl_private Intersection *isect = &local_isect->hits[hit];
   isect->prim = prim;
   isect->object = local_object;
   isect->type = PRIMITIVE_TRIANGLE;
@@ -200,8 +200,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
  * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
  */
 
-ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
-                                         ShaderData *sd,
+ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
                                          float3 P,
                                          float3 D,
                                          float t,
@@ -256,8 +256,8 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
 /* Same as above, except that t is assumed to be in object space for
  * instancing.
  */
-ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg,
-                                               ShaderData *sd,
+ccl_device_inline float3 triangle_refine_local(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
                                                float3 P,
                                                float3 D,
                                                float t,
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 2bcd7e56b5f..c466c3fb07a 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -31,8 +31,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Return position normalized to 0..1 in mesh bounds */
 
-ccl_device_inline float3 volume_normalized_position(const KernelGlobals *kg,
-                                                    const ShaderData *sd,
+ccl_device_inline float3 volume_normalized_position(ccl_global const KernelGlobals *kg,
+                                                    ccl_private const ShaderData *sd,
                                                     float3 P)
 {
   /* todo: optimize this so it's just a single matrix multiplication when
@@ -70,8 +70,8 @@ ccl_device float3 volume_attribute_value_to_float3(const float4 value)
   }
 }
 
-ccl_device float4 volume_attribute_float4(const KernelGlobals *kg,
-                                          const ShaderData *sd,
+ccl_device float4 volume_attribute_float4(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc)
 {
   if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index 6e4e1be55fa..c822823de9c 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -44,7 +44,7 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
-                                          const ccl_global KernelWorkTile *ccl_restrict tile,
+                                          ccl_global const KernelWorkTile *ccl_restrict tile,
                                           ccl_global float *render_buffer,
                                           const int x,
                                           const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
index 58e7bde4c94..291f0f106f0 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -25,12 +25,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
                                                const int sample,
                                                const int x,
                                                const int y,
                                                const uint rng_hash,
-                                               Ray *ray)
+                                               ccl_private Ray *ray)
 {
   /* Filter sampling. */
   float filter_u, filter_v;
@@ -64,7 +64,7 @@ ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
-                                            const ccl_global KernelWorkTile *ccl_restrict tile,
+                                            ccl_global const KernelWorkTile *ccl_restrict tile,
                                             ccl_global float *render_buffer,
                                             const int x,
                                             const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index cd9af1c62fc..760c08159e3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -86,7 +86,7 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_shader_next_kernel(
     INTEGRATOR_STATE_ARGS,
-    const Intersection *ccl_restrict isect,
+    ccl_private const Intersection *ccl_restrict isect,
     const int shader,
     const int shader_flags)
 {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index 5bd9cfda4a4..00d44f0e5ed 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -32,7 +32,7 @@ ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STA
 }
 
 ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
-                                                  const Ray *ray,
+                                                  ccl_private const Ray *ray,
                                                   const uint visibility)
 {
   /* Mask which will pick only opaque visibility bits from the `visibility`.
@@ -62,7 +62,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STAT
 
 #ifdef __TRANSPARENT_SHADOWS__
 ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ray,
+                                                       ccl_private const Ray *ray,
                                                        const uint visibility)
 {
   Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE];
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
index 33a77d0fe29..192e9c6ab43 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -30,7 +30,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   kernel_assert(kernel_data.integrator.use_volumes);
 
@@ -78,7 +78,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   Ray volume_ray ccl_optional_struct_init;
   integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
index 234aa7cae63..a898f3fb2fc 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -49,7 +49,7 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
     /* TODO: does aliasing like this break automatic SoA in CUDA?
      * Should we instead store closures separate from ShaderData? */
     ShaderDataTinyStorage emission_sd_storage;
-    ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+    ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
 
     PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
     shader_setup_from_background(kg,
@@ -155,7 +155,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       /* Evaluate light shader. */
       /* TODO: does aliasing like this break automatic SoA in CUDA? */
       ShaderDataTinyStorage emission_sd_storage;
-      ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+      ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
       float3 light_eval = light_sample_shader_eval(
           INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
       if (is_zero(light_eval)) {
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
index 05b530f9665..d8f8da63023 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -72,7 +72,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   /* Evaluate light shader. */
   /* TODO: does aliasing like this break automatic SoA in CUDA? */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
   if (is_zero(light_eval)) {
     return;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index fd3c3ae1653..3857b522b25 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -39,7 +39,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
    * TODO: is it better to declare this outside the loop or keep it local
    * so the compiler can see there is no dependency between iterations? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data at surface. */
   Intersection isect ccl_optional_struct_init;
@@ -69,13 +69,14 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
                                                            const int hit,
                                                            const int num_recorded_hits,
-                                                           float3 *ccl_restrict throughput)
+                                                           ccl_private float3 *ccl_restrict
+                                                               throughput)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
 
   /* TODO: deduplicate with surface, or does it not matter for memory usage? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 27338f824c0..0d739517592 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -29,7 +29,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
-                                                           ShaderData *sd)
+                                                           ccl_private ShaderData *sd)
 {
   Intersection isect ccl_optional_struct_init;
   integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
@@ -42,7 +42,7 @@ ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONS
 
 #ifdef __HOLDOUT__
 ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
-                                                      ShaderData *sd,
+                                                      ccl_private ShaderData *sd,
                                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Write holdout transparency to render buffer and stop if fully holdout. */
@@ -67,7 +67,7 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG
 
 #ifdef __EMISSION__
 ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
-                                                       const ShaderData *sd,
+                                                       ccl_private const ShaderData *sd,
                                                        ccl_global float *ccl_restrict
                                                            render_buffer)
 {
@@ -103,8 +103,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state)
+                                                           ccl_private ShaderData *sd,
+                                                           ccl_private const RNGState *rng_state)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
@@ -134,7 +134,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
@@ -206,9 +206,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
 #endif
 
 /* Path tracing: bounce off or through surface with new direction. */
-ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE_ARGS,
-                                                                ShaderData *sd,
-                                                                const RNGState *rng_state)
+ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
+    INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state)
 {
   /* Sample BSDF or BSSRDF. */
   if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
@@ -217,7 +216,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
   float bsdf_u, bsdf_v;
   path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-  const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
 
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -281,7 +280,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
 #ifdef __VOLUME__
 ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
-                                                                 ShaderData *sd)
+                                                                 ccl_private ShaderData *sd)
 {
   if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
     return LABEL_NONE;
@@ -304,19 +303,21 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT
 #endif
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
-ccl_device_forceinline void integrate_surface_ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+ccl_device_forceinline void integrate_surface_ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  ifdef __KERNEL_OPTIX__
   optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
 }
 
-extern "C" __device__ void __direct_callable__ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+extern "C" __device__ void __direct_callable__ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  endif /* __KERNEL_OPTIX__ */
   float bsdf_u, bsdf_v;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index aa4c652c037..72c609751f7 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -71,8 +71,8 @@ typedef struct VolumeShaderCoefficients {
 
 /* Evaluate shader to get extinction coefficient at P. */
 ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                                   ShaderData *ccl_restrict sd,
-                                                   float3 *ccl_restrict extinction)
+                                                   ccl_private ShaderData *ccl_restrict sd,
+                                                   ccl_private float3 *ccl_restrict extinction)
 {
   shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
     return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
@@ -89,8 +89,8 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
 ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                            ShaderData *ccl_restrict sd,
-                                            VolumeShaderCoefficients *coeff)
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private VolumeShaderCoefficients *coeff)
 {
   const int path_flag = INTEGRATOR_STATE(path, flag);
   shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
@@ -107,7 +107,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
   if (sd->flag & SD_SCATTER) {
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_VOLUME(sc->type)) {
         coeff->sigma_s += sc->weight;
@@ -123,14 +123,14 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
   return true;
 }
 
-ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
-                                             const RNGState *rng_state,
+ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
+                                             ccl_private const RNGState *rng_state,
                                              const float object_step_size,
                                              float t,
-                                             float *step_size,
-                                             float *step_shade_offset,
-                                             float *steps_offset,
-                                             int *max_steps)
+                                             ccl_private float *step_size,
+                                             ccl_private float *step_shade_offset,
+                                             ccl_private float *steps_offset,
+                                             ccl_private int *max_steps)
 {
   if (object_step_size == FLT_MAX) {
     /* Homogeneous volume. */
@@ -170,9 +170,9 @@ ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
 ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
-                                          Ray *ccl_restrict ray,
-                                          ShaderData *ccl_restrict sd,
-                                          float3 *ccl_restrict throughput)
+                                          ccl_private Ray *ccl_restrict ray,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          ccl_global float3 *ccl_restrict throughput)
 {
   float3 sigma_t = zero_float3();
 
@@ -185,9 +185,9 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
 ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
-                                            Ray *ccl_restrict ray,
-                                            ShaderData *ccl_restrict sd,
-                                            float3 *ccl_restrict throughput,
+                                            ccl_private Ray *ccl_restrict ray,
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private float3 *ccl_restrict throughput,
                                             const float object_step_size)
 {
   /* Load random number state. */
@@ -257,10 +257,10 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 /* Equi-angular sampling as in:
  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
 
-ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
                                            const float3 light_P,
                                            const float xi,
-                                           float *pdf)
+                                           ccl_private float *pdf)
 {
   const float t = ray->t;
   const float delta = dot((light_P - ray->P), ray->D);
@@ -281,7 +281,7 @@ ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
   return min(t, delta + t_); /* min is only for float precision errors */
 }
 
-ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -305,7 +305,7 @@ ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
   return pdf;
 }
 
-ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -332,8 +332,12 @@ ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
 
 /* Distance sampling */
 
-ccl_device float volume_distance_sample(
-    float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float volume_distance_sample(float max_t,
+                                        float3 sigma_t,
+                                        int channel,
+                                        float xi,
+                                        ccl_private float3 *transmittance,
+                                        ccl_private float3 *pdf)
 {
   /* xi is [0, 1[ so log(0) should never happen, division by zero is
    * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
@@ -363,7 +367,7 @@ ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_
 
 /* Emission */
 
-ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff,
+ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
                                             int closure_flag,
                                             float3 transmittance,
                                             float t)
@@ -410,13 +414,13 @@ typedef struct VolumeIntegrateState {
 } VolumeIntegrateState;
 
 ccl_device_forceinline void volume_integrate_step_scattering(
-    const ShaderData *sd,
-    const Ray *ray,
+    ccl_private const ShaderData *sd,
+    ccl_private const Ray *ray,
     const float3 equiangular_light_P,
-    const VolumeShaderCoefficients &ccl_restrict coeff,
+    ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
     const float3 transmittance,
-    VolumeIntegrateState &ccl_restrict vstate,
-    VolumeIntegrateResult &ccl_restrict result)
+    ccl_private VolumeIntegrateState &ccl_restrict vstate,
+    ccl_private VolumeIntegrateResult &ccl_restrict result)
 {
   /* Pick random color channel, we use the Veach one-sample
    * model with balance heuristic for the channels. */
@@ -507,14 +511,14 @@ ccl_device_forceinline void volume_integrate_step_scattering(
  * for path tracing where we don't want to branch. */
 ccl_device_forceinline void volume_integrate_heterogeneous(
     INTEGRATOR_STATE_ARGS,
-    Ray *ccl_restrict ray,
-    ShaderData *ccl_restrict sd,
-    const RNGState *rng_state,
+    ccl_private Ray *ccl_restrict ray,
+    ccl_private ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *rng_state,
     ccl_global float *ccl_restrict render_buffer,
     const float object_step_size,
     const VolumeSampleMethod direct_sample_method,
     const float3 equiangular_light_P,
-    VolumeIntegrateResult &result)
+    ccl_private VolumeIntegrateResult &result)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
 
@@ -666,10 +670,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 #  ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline bool integrate_volume_sample_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_private LightSample *ccl_restrict ls)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -694,14 +699,14 @@ ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
 
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          const float3 P,
-                                                          const ShaderVolumePhases *ccl_restrict
-                                                              phases,
-                                                          const float3 throughput,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline void integrate_volume_direct_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    const float3 P,
+    ccl_private const ShaderVolumePhases *ccl_restrict phases,
+    ccl_private const float3 throughput,
+    ccl_private LightSample *ccl_restrict ls)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
@@ -737,7 +742,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
   if (is_zero(light_eval)) {
@@ -801,10 +806,11 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
 #  endif
 
 /* Path tracing: scatter in new direction using phase function */
-ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state,
-                                                           const ShaderVolumePhases *phases)
+ccl_device_forceinline bool integrate_volume_phase_scatter(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state,
+    ccl_private const ShaderVolumePhases *phases)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
 
@@ -865,7 +871,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
 ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
-                                                 Ray *ccl_restrict ray,
+                                                 ccl_private Ray *ccl_restrict ray,
                                                  ccl_global float *ccl_restrict render_buffer)
 {
   ShaderData sd;
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index efc7576d95b..517e2891769 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -106,7 +106,7 @@ typedef struct IntegratorQueueCounter {
  * GPU rendering path state with SoA layout. */
 typedef struct IntegratorStateGPU {
 #define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type *name;
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
 #define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
 #define KERNEL_STRUCT_END(name) \
   } \
@@ -124,13 +124,13 @@ typedef struct IntegratorStateGPU {
 #undef KERNEL_STRUCT_VOLUME_STACK_SIZE
 
   /* Count number of queued kernels. */
-  IntegratorQueueCounter *queue_counter;
+  ccl_global IntegratorQueueCounter *queue_counter;
 
   /* Count number of kernels queued for specific shaders. */
-  int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
+  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
 
   /* Index of path which will be used by a next shadow catcher split.  */
-  int *next_shadow_catcher_path_index;
+  ccl_global int *next_shadow_catcher_path_index;
 } IntegratorStateGPU;
 
 /* Abstraction
@@ -173,9 +173,10 @@ typedef IntegratorStateCPU *ccl_restrict IntegratorState;
 
 typedef int IntegratorState;
 
-#  define INTEGRATOR_STATE_ARGS const KernelGlobals *ccl_restrict kg, const IntegratorState state
+#  define INTEGRATOR_STATE_ARGS \
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_CONST_ARGS \
-    const KernelGlobals *ccl_restrict kg, const IntegratorState state
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_PASS kg, state
 
 #  define INTEGRATOR_STATE_PASS_NULL kg, -1
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index 037c7533943..fddd9eb5ac8 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
 /* Ray */
 
 ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ccl_restrict ray)
+                                                       ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
@@ -35,7 +35,7 @@ ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
 }
 
 ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                      Ray *ccl_restrict ray)
+                                                      ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(ray, P);
   ray->D = INTEGRATOR_STATE(ray, D);
@@ -47,8 +47,8 @@ ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARG
 
 /* Shadow Ray */
 
-ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_ARGS,
-                                                              const Ray *ccl_restrict ray)
+ccl_device_forceinline void integrator_state_write_shadow_ray(
+    INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
@@ -58,7 +58,7 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_A
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                             Ray *ccl_restrict ray)
+                                                             ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(shadow_ray, P);
   ray->D = INTEGRATOR_STATE(shadow_ray, D);
@@ -70,8 +70,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CO
 
 /* Intersection */
 
-ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_write_isect(
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect)
 {
   INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
   INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
@@ -84,8 +84,8 @@ ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                        Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_read_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect)
 {
   isect->prim = INTEGRATOR_STATE(isect, prim);
   isect->object = INTEGRATOR_STATE(isect, object);
@@ -124,7 +124,7 @@ ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_ST
 /* Shadow Intersection */
 
 ccl_device_forceinline void integrator_state_write_shadow_isect(
-    INTEGRATOR_STATE_ARGS, const Intersection *ccl_restrict isect, const int index)
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index)
 {
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
@@ -137,9 +137,8 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                               Intersection *ccl_restrict isect,
-                                                               const int index)
+ccl_device_forceinline void integrator_state_read_shadow_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index)
 {
   isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
   isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 2d15c82322a..153f9b79743 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -36,14 +36,16 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SUBSURFACE__
 
-ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const ShaderClosure *sc)
+ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private const ShaderClosure *sc)
 {
   /* We should never have two consecutive BSSRDF bounces, the second one should
    * be converted to a diffuse BSDF to avoid this. */
   kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
 
   /* Setup path state for intersect_subsurface kernel. */
-  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
 
   /* Setup ray into surface. */
   INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
@@ -89,7 +91,7 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const Sh
 }
 
 ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
-                                             ShaderData *sd,
+                                             ccl_private ShaderData *sd,
                                              const uint32_t path_flag)
 {
   /* Get bump mapped normal from shader evaluation at exit point. */
@@ -107,7 +109,7 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
 
 #  ifdef __PRINCIPLED__
   if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
         sd, sizeof(PrincipledDiffuseBsdf), weight);
 
     if (bsdf) {
@@ -119,7 +121,8 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
   else
 #  endif /* __PRINCIPLED__ */
   {
-    DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+    ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(DiffuseBsdf), weight);
 
     if (bsdf) {
       bsdf->N = N;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 3f685e3a2e9..788a5e9b929 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -33,8 +33,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r,
  * nearby points on the same object. */
 ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
                                        RNGState rng_state,
-                                       Ray &ray,
-                                       LocalIntersection &ss_isect)
+                                       ccl_private Ray &ray,
+                                       ccl_private LocalIntersection &ss_isect)
 
 {
   float disk_u, disk_v;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index d4935b0ce4a..45a43ea67a9 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -31,8 +31,11 @@ CCL_NAMESPACE_BEGIN
  * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
  * https://graphics.pixar.com/library/PathTracedSubsurface/ */
 
-ccl_device void subsurface_random_walk_remap(
-    const float albedo, const float d, float g, float *sigma_t, float *alpha)
+ccl_device void subsurface_random_walk_remap(const float albedo,
+                                             const float d,
+                                             float g,
+                                             ccl_private float *sigma_t,
+                                             ccl_private float *alpha)
 {
   /* Compute attenuation and scattering coefficients from albedo. */
   const float g2 = g * g;
@@ -78,9 +81,9 @@ ccl_device void subsurface_random_walk_remap(
 ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
                                                     const float3 radius,
                                                     const float anisotropy,
-                                                    float3 *sigma_t,
-                                                    float3 *alpha,
-                                                    float3 *throughput)
+                                                    ccl_private float3 *sigma_t,
+                                                    ccl_private float3 *alpha,
+                                                    ccl_private float3 *throughput)
 {
   float sigma_t_x, sigma_t_y, sigma_t_z;
   float alpha_x, alpha_y, alpha_z;
@@ -164,7 +167,7 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f
 ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
                                                          float t,
                                                          bool hit,
-                                                         float3 *transmittance)
+                                                         ccl_private float3 *transmittance)
 {
   float3 T = volume_color_transmittance(sigma_t, t);
   if (transmittance) {
@@ -179,8 +182,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
 
 ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
                                               RNGState rng_state,
-                                              Ray &ray,
-                                              LocalIntersection &ss_isect)
+                                              ccl_private Ray &ray,
+                                              ccl_private LocalIntersection &ss_isect)
 {
   float bssrdf_u, bssrdf_v;
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
index 01ebf8376b1..0c4a723de6f 100644
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
 
 template<typename StackReadOp, typename StackWriteOp>
 ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
-                                        const ShaderData *sd,
+                                        ccl_private const ShaderData *sd,
                                         StackReadOp stack_read,
                                         StackWriteOp stack_write)
 {
@@ -84,7 +84,7 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
@@ -95,7 +95,8 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData
       });
 }
 
-ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+                                               ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index f4d00e4c20c..dc0aa9356f7 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -32,7 +32,9 @@ CCL_NAMESPACE_BEGIN
  * that only one of those can happen at a bounce, and so do not need to accumulate
  * them separately. */
 
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, float3 value)
+ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
+                                      const bool is_diffuse,
+                                      float3 value)
 {
   eval->diffuse = zero_float3();
   eval->glossy = zero_float3();
@@ -45,7 +47,7 @@ ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, flo
   }
 }
 
-ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
+ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
                                        const bool is_diffuse,
                                        float3 value,
                                        float mis_weight)
@@ -60,29 +62,29 @@ ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
   }
 }
 
-ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
+ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
 {
   return is_zero(eval->diffuse) && is_zero(eval->glossy);
 }
 
-ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
+ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
 {
   eval->diffuse *= value;
   eval->glossy *= value;
 }
 
-ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
+ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
 {
   eval->diffuse *= value;
   eval->glossy *= value;
 }
 
-ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
+ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
 {
   return eval->diffuse + eval->glossy;
 }
 
-ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
 {
   /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
    * We assume reflection, transmission and volume scatter to be exclusive. */
@@ -96,7 +98,9 @@ ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
  * to render buffers instead of using per-thread memory, and to avoid the
  * impact of clamping on other contributions. */
 
-ccl_device_forceinline void kernel_accum_clamp(const KernelGlobals *kg, float3 *L, int bounce)
+ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *kg,
+                                               ccl_private float3 *L,
+                                               int bounce)
 {
 #ifdef __KERNEL_DEBUG_NAN__
   if (!isfinite3_safe(*L)) {
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index 7d71907effe..cdf2601f6c3 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -40,7 +40,7 @@ ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS
 
 /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
 
-ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *kg,
+ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const KernelGlobals *kg,
                                                            ccl_global float *render_buffer,
                                                            int x,
                                                            int y,
@@ -90,7 +90,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
 /* This is a simple box filter in two passes.
  * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
 
-ccl_device void kernel_adaptive_sampling_filter_x(const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals *kg,
                                                   ccl_global float *render_buffer,
                                                   int y,
                                                   int start_x,
@@ -123,7 +123,7 @@ ccl_device void kernel_adaptive_sampling_filter_x(const KernelGlobals *kg,
   }
 }
 
-ccl_device void kernel_adaptive_sampling_filter_y(const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_y(ccl_global const KernelGlobals *kg,
                                                   ccl_global float *render_buffer,
                                                   int x,
                                                   int start_y,
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index abb1ba455e6..cfff727d007 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -24,7 +24,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,
+ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
                                          ccl_global const KernelShaderEvalInput *input,
                                          ccl_global float4 *output,
                                          const int offset)
@@ -56,7 +56,7 @@ ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,
   output[offset] += make_float4(D.x, D.y, D.z, 0.0f);
 }
 
-ccl_device void kernel_background_evaluate(const KernelGlobals *kg,
+ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
                                            ccl_global const KernelShaderEvalInput *input,
                                            ccl_global float4 *output,
                                            const int offset)
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 7be5da8fe6d..73683a15c5d 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -46,12 +46,12 @@ ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u
   return bokeh;
 }
 
-ccl_device void camera_sample_perspective(const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_restrict kg,
                                           float raster_x,
                                           float raster_y,
                                           float lens_u,
                                           float lens_v,
-                                          ccl_addr_space Ray *ray)
+                                          ccl_private Ray *ray)
 {
   /* create ray form raster position */
   ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
@@ -185,12 +185,12 @@ ccl_device void camera_sample_perspective(const KernelGlobals *ccl_restrict kg,
 }
 
 /* Orthographic Camera */
-ccl_device void camera_sample_orthographic(const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_orthographic(ccl_global const KernelGlobals *ccl_restrict kg,
                                            float raster_x,
                                            float raster_y,
                                            float lens_u,
                                            float lens_v,
-                                           ccl_addr_space Ray *ray)
+                                           ccl_private Ray *ray)
 {
   /* create ray form raster position */
   ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
@@ -254,13 +254,13 @@ ccl_device void camera_sample_orthographic(const KernelGlobals *ccl_restrict kg,
 
 ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 #ifdef __CAMERA_MOTION__
-                                              const ccl_global DecomposedTransform *cam_motion,
+                                              ccl_global const DecomposedTransform *cam_motion,
 #endif
                                               float raster_x,
                                               float raster_y,
                                               float lens_u,
                                               float lens_v,
-                                              ccl_addr_space Ray *ray)
+                                              ccl_private Ray *ray)
 {
   ProjectionTransform rastertocamera = cam->rastertocamera;
   float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
@@ -370,7 +370,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 
 /* Common */
 
-ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
                                      int x,
                                      int y,
                                      float filter_u,
@@ -378,7 +378,7 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
                                      float lens_u,
                                      float lens_v,
                                      float time,
-                                     ccl_addr_space Ray *ray)
+                                     ccl_private Ray *ray)
 {
   /* pixel filter */
   int filter_table_offset = kernel_data.film.filter_table_offset;
@@ -434,7 +434,7 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
   }
   else {
 #ifdef __CAMERA_MOTION__
-    const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
+    ccl_global const DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
     camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
 #else
     camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
@@ -444,13 +444,13 @@ ccl_device_inline void camera_sample(const KernelGlobals *ccl_restrict kg,
 
 /* Utilities */
 
-ccl_device_inline float3 camera_position(const KernelGlobals *kg)
+ccl_device_inline float3 camera_position(ccl_global const KernelGlobals *kg)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
 }
 
-ccl_device_inline float camera_distance(const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
@@ -464,7 +464,7 @@ ccl_device_inline float camera_distance(const KernelGlobals *kg, float3 P)
   }
 }
 
-ccl_device_inline float camera_z_depth(const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float3 P)
 {
   if (kernel_data.cam.type != CAMERA_PANORAMA) {
     Transform worldtocamera = kernel_data.cam.worldtocamera;
@@ -477,7 +477,7 @@ ccl_device_inline float camera_z_depth(const KernelGlobals *kg, float3 P)
   }
 }
 
-ccl_device_inline float3 camera_direction_from_point(const KernelGlobals *kg, float3 P)
+ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlobals *kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
 
@@ -491,7 +491,9 @@ ccl_device_inline float3 camera_direction_from_point(const KernelGlobals *kg, fl
   }
 }
 
-ccl_device_inline float3 camera_world_to_ndc(const KernelGlobals *kg, ShaderData *sd, float3 P)
+ccl_device_inline float3 camera_world_to_ndc(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             float3 P)
 {
   if (kernel_data.cam.type != CAMERA_PANORAMA) {
     /* perspective / ortho */
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index 960774e0741..9e8e0e68b8f 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -20,14 +20,14 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 xyz_to_rgb(const KernelGlobals *kg, float3 xyz)
+ccl_device float3 xyz_to_rgb(ccl_global const KernelGlobals *kg, float3 xyz)
 {
   return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
 }
 
-ccl_device float linear_rgb_to_gray(const KernelGlobals *kg, float3 c)
+ccl_device float linear_rgb_to_gray(ccl_global const KernelGlobals *kg, float3 c)
 {
   return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
 }
diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h
index db4e110bd10..17187083019 100644
--- a/intern/cycles/kernel/kernel_differential.h
+++ b/intern/cycles/kernel/kernel_differential.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* See "Tracing Ray Differentials", Homan Igehy, 1999. */
 
-ccl_device void differential_transfer(ccl_addr_space differential3 *surface_dP,
+ccl_device void differential_transfer(ccl_private differential3 *surface_dP,
                                       const differential3 ray_dP,
                                       float3 ray_D,
                                       const differential3 ray_dD,
@@ -38,7 +38,7 @@ ccl_device void differential_transfer(ccl_addr_space differential3 *surface_dP,
   surface_dP->dy = tmpy - dot(tmpy, surface_Ng) * tmp;
 }
 
-ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD)
+ccl_device void differential_incoming(ccl_private differential3 *dI, const differential3 dD)
 {
   /* compute dIdx/dy at a shading point, we just need to negate the
    * differential of the ray direction */
@@ -47,8 +47,8 @@ ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const di
   dI->dy = -dD.dy;
 }
 
-ccl_device void differential_dudv(ccl_addr_space differential *du,
-                                  ccl_addr_space differential *dv,
+ccl_device void differential_dudv(ccl_private differential *du,
+                                  ccl_private differential *dv,
                                   float3 dPdu,
                                   float3 dPdv,
                                   differential3 dP,
@@ -132,7 +132,7 @@ ccl_device_forceinline float differential_make_compact(const differential3 D)
   return 0.5f * (len(D.dx) + len(D.dy));
 }
 
-ccl_device_forceinline void differential_transfer_compact(ccl_addr_space differential3 *surface_dP,
+ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
                                                           const float ray_dP,
                                                           const float3 /* ray_D */,
                                                           const float ray_dD,
@@ -149,7 +149,7 @@ ccl_device_forceinline void differential_transfer_compact(ccl_addr_space differe
   surface_dP->dy = dy * scale;
 }
 
-ccl_device_forceinline void differential_incoming_compact(ccl_addr_space differential3 *dI,
+ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
                                                           const float3 D,
                                                           const float dD)
 {
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index d62285d173d..015587ccbbd 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -24,10 +24,11 @@
 CCL_NAMESPACE_BEGIN
 
 /* Evaluate shader on light. */
-ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
-                                                        ShaderData *ccl_restrict emission_sd,
-                                                        LightSample *ccl_restrict ls,
-                                                        float time)
+ccl_device_noinline_cpu float3
+light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
+                         ccl_private ShaderData *ccl_restrict emission_sd,
+                         ccl_private LightSample *ccl_restrict ls,
+                         float time)
 {
   /* setup shading at emitter */
   float3 eval = zero_float3();
@@ -89,7 +90,7 @@ ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
   eval *= ls->eval_fac;
 
   if (ls->lamp != LAMP_NONE) {
-    const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+    ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
     eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
   }
 
@@ -97,16 +98,16 @@ ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
 }
 
 /* Test if light sample is from a light or emission from geometry. */
-ccl_device_inline bool light_sample_is_light(const LightSample *ccl_restrict ls)
+ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_restrict ls)
 {
   /* return if it's a lamp for shadow pass */
   return (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
 }
 
 /* Early path termination of shadow rays. */
-ccl_device_inline bool light_sample_terminate(const KernelGlobals *ccl_restrict kg,
-                                              const LightSample *ccl_restrict ls,
-                                              BsdfEval *ccl_restrict eval,
+ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *ccl_restrict kg,
+                                              ccl_private const LightSample *ccl_restrict ls,
+                                              ccl_private BsdfEval *ccl_restrict eval,
                                               const float rand_terminate)
 {
   if (bsdf_eval_is_zero(eval)) {
@@ -132,9 +133,10 @@ ccl_device_inline bool light_sample_terminate(const KernelGlobals *ccl_restrict
  * of a triangle. Surface is lifted by amount h along normal n in the incident
  * point. */
 
-ccl_device_inline float3 shadow_ray_smooth_surface_offset(const KernelGlobals *ccl_restrict kg,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          float3 Ng)
+ccl_device_inline float3
+shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg,
+                                 ccl_private const ShaderData *ccl_restrict sd,
+                                 float3 Ng)
 {
   float3 V[3], N[3];
   triangle_vertices_and_normals(kg, sd->prim, V, N);
@@ -178,8 +180,8 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset(const KernelGlobals *c
 
 /* Ray offset to avoid shadow terminator artifact. */
 
-ccl_device_inline float3 shadow_ray_offset(const KernelGlobals *ccl_restrict kg,
-                                           const ShaderData *ccl_restrict sd,
+ccl_device_inline float3 shadow_ray_offset(ccl_global const KernelGlobals *ccl_restrict kg,
+                                           ccl_private const ShaderData *ccl_restrict sd,
                                            float3 L)
 {
   float NL = dot(sd->N, L);
@@ -211,10 +213,10 @@ ccl_device_inline float3 shadow_ray_offset(const KernelGlobals *ccl_restrict kg,
   return P;
 }
 
-ccl_device_inline void shadow_ray_setup(const ShaderData *ccl_restrict sd,
-                                        const LightSample *ccl_restrict ls,
+ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restrict sd,
+                                        ccl_private const LightSample *ccl_restrict ls,
                                         const float3 P,
-                                        Ray *ray)
+                                        ccl_private Ray *ray)
 {
   if (ls->shader & SHADER_CAST_SHADOW) {
     /* setup ray */
@@ -244,21 +246,23 @@ ccl_device_inline void shadow_ray_setup(const ShaderData *ccl_restrict sd,
 }
 
 /* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_surface_shadow_ray(const KernelGlobals *ccl_restrict kg,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const LightSample *ccl_restrict ls,
-                                                          Ray *ray)
+ccl_device_inline void light_sample_to_surface_shadow_ray(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    ccl_private Ray *ray)
 {
   const float3 P = shadow_ray_offset(kg, sd, ls->D);
   shadow_ray_setup(sd, ls, P, ray);
 }
 
 /* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_volume_shadow_ray(const KernelGlobals *ccl_restrict kg,
-                                                         const ShaderData *ccl_restrict sd,
-                                                         const LightSample *ccl_restrict ls,
-                                                         const float3 P,
-                                                         Ray *ray)
+ccl_device_inline void light_sample_to_volume_shadow_ray(
+    ccl_global const KernelGlobals *ccl_restrict kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    const float3 P,
+    ccl_private Ray *ray)
 {
   shadow_ray_setup(sd, ls, P, ray);
 }
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index e8f4a21878e..a87eff3832e 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -30,7 +30,8 @@ ccl_device_forceinline float film_transparency_to_alpha(float transparency)
   return saturate(1.0f - transparency);
 }
 
-ccl_device_inline float film_get_scale(const KernelFilmConvert *ccl_restrict kfilm_convert,
+ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
+                                           kfilm_convert,
                                        ccl_global const float *ccl_restrict buffer)
 {
   if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
@@ -38,14 +39,15 @@ ccl_device_inline float film_get_scale(const KernelFilmConvert *ccl_restrict kfi
   }
 
   if (kfilm_convert->pass_use_filter) {
-    const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+    const uint sample_count = *(
+        (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
     return 1.0f / sample_count;
   }
 
   return 1.0f;
 }
 
-ccl_device_inline float film_get_scale_exposure(const KernelFilmConvert *ccl_restrict
+ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict
                                                     kfilm_convert,
                                                 ccl_global const float *ccl_restrict buffer)
 {
@@ -63,10 +65,10 @@ ccl_device_inline float film_get_scale_exposure(const KernelFilmConvert *ccl_res
 }
 
 ccl_device_inline bool film_get_scale_and_scale_exposure(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict scale,
-    float *ccl_restrict scale_exposure)
+    ccl_private float *ccl_restrict scale,
+    ccl_private float *ccl_restrict scale_exposure)
 {
   if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
     *scale = kfilm_convert->scale;
@@ -74,7 +76,8 @@ ccl_device_inline bool film_get_scale_and_scale_exposure(
     return true;
   }
 
-  const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+  const uint sample_count = *(
+      (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
   if (!sample_count) {
     *scale = 0.0f;
     *scale_exposure = 0.0f;
@@ -102,33 +105,33 @@ ccl_device_inline bool film_get_scale_and_scale_exposure(
  * Float (scalar) passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_depth(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict
                                                      kfilm_convert,
                                                  ccl_global const float *ccl_restrict buffer,
-                                                 float *ccl_restrict pixel)
+                                                 ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
 }
 
-ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict
                                                     kfilm_convert,
                                                 ccl_global const float *ccl_restrict buffer,
-                                                float *ccl_restrict pixel)
+                                                ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   /* Note that we accumulate 1 - mist in the kernel to avoid having to
@@ -137,9 +140,9 @@ ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_res
 }
 
 ccl_device_inline void film_get_pass_pixel_sample_count(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
    * meaningful value when adaptive sampler stopped rendering image way before the maximum
@@ -149,23 +152,23 @@ ccl_device_inline void film_get_pass_pixel_sample_count(
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
 }
 
-ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict
                                                      kfilm_convert,
                                                  ccl_global const float *ccl_restrict buffer,
-                                                 float *ccl_restrict pixel)
+                                                 ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 1);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   const float f = *in;
 
   pixel[0] = f * scale_exposure;
@@ -175,28 +178,28 @@ ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_re
  * Float 3 passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_light_path(const KernelFilmConvert *ccl_restrict
-                                                          kfilm_convert,
-                                                      ccl_global const float *ccl_restrict buffer,
-                                                      float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_light_path(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   /* Read light pass. */
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
   float3 f = make_float3(in[0], in[1], in[2]);
 
   /* Optionally add indirect light pass. */
   if (kfilm_convert->pass_indirect != PASS_UNUSED) {
-    const float *in_indirect = buffer + kfilm_convert->pass_indirect;
+    ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect;
     const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
     f += f_indirect;
   }
 
   /* Optionally divide out color. */
   if (kfilm_convert->pass_divide != PASS_UNUSED) {
-    const float *in_divide = buffer + kfilm_convert->pass_divide;
+    ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide;
     const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
     f = safe_divide_even_color(f, f_divide);
 
@@ -213,17 +216,17 @@ ccl_device_inline void film_get_pass_pixel_light_path(const KernelFilmConvert *c
   pixel[2] = f.z;
 }
 
-ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
 
@@ -236,17 +239,17 @@ ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_r
  * Float4 passes.
  */
 
-ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
   kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
-  const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
 
   const float weight = in_weight[0];
   const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
@@ -259,17 +262,17 @@ ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_r
   pixel[3] = motion.w;
 }
 
-ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *ccl_restrict
-                                                           kfilm_convert,
-                                                       ccl_global const float *ccl_restrict buffer,
-                                                       float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_cryptomatte(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
 
   const float scale = film_get_scale(kfilm_convert, buffer);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float4 f = make_float4(in[0], in[1], in[2], in[3]);
 
@@ -281,10 +284,10 @@ ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *
   pixel[3] = f.w * scale;
 }
 
-ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_restrict
+ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict
                                                       kfilm_convert,
                                                   ccl_global const float *ccl_restrict buffer,
-                                                  float *ccl_restrict pixel)
+                                                  ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
   kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
@@ -292,7 +295,7 @@ ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_r
   float scale, scale_exposure;
   film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
   const float alpha = in[3] * scale;
@@ -303,10 +306,10 @@ ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_r
   pixel[3] = alpha;
 }
 
-ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl_restrict
-                                                        kfilm_convert,
-                                                    ccl_global const float *ccl_restrict buffer,
-                                                    float *ccl_restrict pixel)
+ccl_device_inline void film_get_pass_pixel_combined(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 4);
 
@@ -324,7 +327,7 @@ ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl
     return;
   }
 
-  const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
 
   const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
   const float alpha = in[3] * scale;
@@ -339,9 +342,9 @@ ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl
  * Shadow catcher.
  */
 
-ccl_device_inline float3
-film_calculate_shadow_catcher_denoised(const KernelFilmConvert *ccl_restrict kfilm_convert,
-                                       ccl_global const float *ccl_restrict buffer)
+ccl_device_inline float3 film_calculate_shadow_catcher_denoised(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer)
 {
   kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
 
@@ -367,7 +370,7 @@ ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
 }
 
 ccl_device_inline float3
-film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_convert,
+film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
                               ccl_global const float *ccl_restrict buffer)
 {
   /* For the shadow catcher pass we divide combined pass by the shadow catcher.
@@ -431,7 +434,7 @@ film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_conver
 }
 
 ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer)
 {
   /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
@@ -474,9 +477,9 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
 }
 
 ccl_device_inline void film_get_pass_pixel_shadow_catcher(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components >= 3);
 
@@ -488,9 +491,9 @@ ccl_device_inline void film_get_pass_pixel_shadow_catcher(
 }
 
 ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
 
@@ -510,9 +513,9 @@ ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
  */
 
 ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
-    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
     ccl_global const float *ccl_restrict buffer,
-    float *ccl_restrict pixel)
+    ccl_private float *ccl_restrict pixel)
 {
   if (kfilm_convert->show_active_pixels &&
       kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index ed01f494f98..07b96d0e1a8 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -92,7 +92,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
 }
 
 /* post-sorting for Cryptomatte */
-ccl_device_inline void kernel_cryptomatte_post(const KernelGlobals *kg,
+ccl_device_inline void kernel_cryptomatte_post(ccl_global const KernelGlobals *kg,
                                                ccl_global float *render_buffer,
                                                int pixel_index)
 {
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 1beaf3cc2b2..1f745ab1da9 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -72,7 +72,10 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
   return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
 }
 
-ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
+ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg,
+                               uint sample,
+                               uint rng_hash,
+                               uint dimension)
 {
   /* Perform Owen shuffle of the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
@@ -115,8 +118,12 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
   return fx;
 }
 
-ccl_device void pmj_sample_2D(
-    const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension, float *x, float *y)
+ccl_device void pmj_sample_2D(ccl_global const KernelGlobals *kg,
+                              uint sample,
+                              uint rng_hash,
+                              uint dimension,
+                              ccl_private float *x,
+                              ccl_private float *y)
 {
   /* Perform a shuffle on the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 52f641634b9..33d0c09a32a 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -45,13 +45,13 @@ typedef struct LightSample {
 /* Regular Light */
 
 template<bool in_volume_segment>
-ccl_device_inline bool light_sample(const KernelGlobals *kg,
+ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg,
                                     const int lamp,
                                     const float randu,
                                     const float randv,
                                     const float3 P,
                                     const int path_flag,
-                                    LightSample *ls)
+                                    ccl_private LightSample *ls)
 {
   const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
@@ -209,9 +209,9 @@ ccl_device_inline bool light_sample(const KernelGlobals *kg,
   return (ls->pdf > 0.0f);
 }
 
-ccl_device bool lights_intersect(const KernelGlobals *ccl_restrict kg,
-                                 const Ray *ccl_restrict ray,
-                                 Intersection *ccl_restrict isect,
+ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg,
+                                 ccl_private const Ray *ccl_restrict ray,
+                                 ccl_private Intersection *ccl_restrict isect,
                                  const int last_prim,
                                  const int last_object,
                                  const int last_type,
@@ -298,12 +298,12 @@ ccl_device bool lights_intersect(const KernelGlobals *ccl_restrict kg,
   return isect->prim != PRIM_NONE;
 }
 
-ccl_device bool light_sample_from_distant_ray(const KernelGlobals *ccl_restrict kg,
+ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *ccl_restrict kg,
                                               const float3 ray_D,
                                               const int lamp,
-                                              LightSample *ccl_restrict ls)
+                                              ccl_private LightSample *ccl_restrict ls)
 {
-  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   const int shader = klight->shader_id;
   const float radius = klight->distant.radius;
   const LightType type = (LightType)klight->type;
@@ -362,14 +362,14 @@ ccl_device bool light_sample_from_distant_ray(const KernelGlobals *ccl_restrict
   return true;
 }
 
-ccl_device bool light_sample_from_intersection(const KernelGlobals *ccl_restrict kg,
-                                               const Intersection *ccl_restrict isect,
+ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *ccl_restrict kg,
+                                               ccl_private const Intersection *ccl_restrict isect,
                                                const float3 ray_P,
                                                const float3 ray_D,
-                                               LightSample *ccl_restrict ls)
+                                               ccl_private LightSample *ccl_restrict ls)
 {
   const int lamp = isect->prim;
-  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+  ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
   LightType type = (LightType)klight->type;
   ls->type = type;
   ls->shader = klight->shader_id;
@@ -464,7 +464,7 @@ ccl_device bool light_sample_from_intersection(const KernelGlobals *ccl_restrict
 
 /* returns true if the triangle is has motion blur or an instancing transform applied */
 ccl_device_inline bool triangle_world_space_vertices(
-    const KernelGlobals *kg, int object, int prim, float time, float3 V[3])
+    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 V[3])
 {
   bool has_motion = false;
   const int object_flag = kernel_tex_fetch(__object_flag, object);
@@ -492,7 +492,7 @@ ccl_device_inline bool triangle_world_space_vertices(
   return has_motion;
 }
 
-ccl_device_inline float triangle_light_pdf_area(const KernelGlobals *kg,
+ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals *kg,
                                                 const float3 Ng,
                                                 const float3 I,
                                                 float t)
@@ -506,8 +506,8 @@ ccl_device_inline float triangle_light_pdf_area(const KernelGlobals *kg,
   return t * t * pdf / cos_pi;
 }
 
-ccl_device_forceinline float triangle_light_pdf(const KernelGlobals *kg,
-                                                const ShaderData *sd,
+ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals *kg,
+                                                ccl_private const ShaderData *sd,
                                                 float t)
 {
   /* A naive heuristic to decide between costly solid angle sampling
@@ -578,13 +578,13 @@ ccl_device_forceinline float triangle_light_pdf(const KernelGlobals *kg,
 }
 
 template<bool in_volume_segment>
-ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
+ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals *kg,
                                                   int prim,
                                                   int object,
                                                   float randu,
                                                   float randv,
                                                   float time,
-                                                  LightSample *ls,
+                                                  ccl_private LightSample *ls,
                                                   const float3 P)
 {
   /* A naive heuristic to decide between costly solid angle sampling
@@ -747,7 +747,8 @@ ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
 
 /* Light Distribution */
 
-ccl_device int light_distribution_sample(const KernelGlobals *kg, float *randu)
+ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg,
+                                         ccl_private float *randu)
 {
   /* This is basically std::upper_bound as used by PBRT, to find a point light or
    * triangle to emit from, proportional to area. a good improvement would be to
@@ -785,7 +786,7 @@ ccl_device int light_distribution_sample(const KernelGlobals *kg, float *randu)
 
 /* Generic Light */
 
-ccl_device_inline bool light_select_reached_max_bounces(const KernelGlobals *kg,
+ccl_device_inline bool light_select_reached_max_bounces(ccl_global const KernelGlobals *kg,
                                                         int index,
                                                         int bounce)
 {
@@ -793,18 +794,18 @@ ccl_device_inline bool light_select_reached_max_bounces(const KernelGlobals *kg,
 }
 
 template<bool in_volume_segment>
-ccl_device_noinline bool light_distribution_sample(const KernelGlobals *kg,
+ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobals *kg,
                                                    float randu,
                                                    const float randv,
                                                    const float time,
                                                    const float3 P,
                                                    const int bounce,
                                                    const int path_flag,
-                                                   LightSample *ls)
+                                                   ccl_private LightSample *ls)
 {
   /* Sample light index from distribution. */
   const int index = light_distribution_sample(kg, &randu);
-  const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
+  ccl_global const KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
                                                                               index);
   const int prim = kdistribution->prim;
 
@@ -833,36 +834,37 @@ ccl_device_noinline bool light_distribution_sample(const KernelGlobals *kg,
   return light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_volume_segment(const KernelGlobals *kg,
-                                                                     float randu,
-                                                                     const float randv,
-                                                                     const float time,
-                                                                     const float3 P,
-                                                                     const int bounce,
-                                                                     const int path_flag,
-                                                                     LightSample *ls)
+ccl_device_inline bool light_distribution_sample_from_volume_segment(
+    ccl_global const KernelGlobals *kg,
+    float randu,
+    const float randv,
+    const float time,
+    const float3 P,
+    const int bounce,
+    const int path_flag,
+    ccl_private LightSample *ls)
 {
   return light_distribution_sample<true>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_position(const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_from_position(ccl_global const KernelGlobals *kg,
                                                                float randu,
                                                                const float randv,
                                                                const float time,
                                                                const float3 P,
                                                                const int bounce,
                                                                const int path_flag,
-                                                               LightSample *ls)
+                                                               ccl_private LightSample *ls)
 {
   return light_distribution_sample<false>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_new_position(const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_new_position(ccl_global const KernelGlobals *kg,
                                                               const float randu,
                                                               const float randv,
                                                               const float time,
                                                               const float3 P,
-                                                              LightSample *ls)
+                                                              ccl_private LightSample *ls)
 {
   /* Sample a new position on the same light, for volume sampling. */
   if (ls->type == LIGHT_TRIANGLE) {
diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h
index 493ed560bc6..3669ff50455 100644
--- a/intern/cycles/kernel/kernel_light_background.h
+++ b/intern/cycles/kernel/kernel_light_background.h
@@ -24,10 +24,10 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BACKGROUND_MIS__
 
-ccl_device float3 background_map_sample(const KernelGlobals *kg,
+ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg,
                                         float randu,
                                         float randv,
-                                        float *pdf)
+                                        ccl_private float *pdf)
 {
   /* for the following, the CDF values are actually a pair of floats, with the
    * function value as X and the actual CDF as Y.  The last entry's function
@@ -109,7 +109,7 @@ ccl_device float3 background_map_sample(const KernelGlobals *kg,
 /* TODO(sergey): Same as above, after the release we should consider using
  * 'noinline' for all devices.
  */
-ccl_device float background_map_pdf(const KernelGlobals *kg, float3 direction)
+ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 direction)
 {
   float2 uv = direction_to_equirectangular(direction);
   int res_x = kernel_data.background.map_res_x;
@@ -143,7 +143,11 @@ ccl_device float background_map_pdf(const KernelGlobals *kg, float3 direction)
 }
 
 ccl_device_inline bool background_portal_data_fetch_and_check_side(
-    const KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
+    ccl_global const KernelGlobals *kg,
+    float3 P,
+    int index,
+    ccl_private float3 *lightpos,
+    ccl_private float3 *dir)
 {
   int portal = kernel_data.background.portal_offset + index;
   const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
@@ -158,8 +162,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(
   return false;
 }
 
-ccl_device_inline float background_portal_pdf(
-    const KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
+ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg,
+                                              float3 P,
+                                              float3 direction,
+                                              int ignore_portal,
+                                              ccl_private bool *is_possible)
 {
   float portal_pdf = 0.0f;
 
@@ -219,7 +226,7 @@ ccl_device_inline float background_portal_pdf(
   return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
 }
 
-ccl_device int background_num_possible_portals(const KernelGlobals *kg, float3 P)
+ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *kg, float3 P)
 {
   int num_possible_portals = 0;
   for (int p = 0; p < kernel_data.background.num_portals; p++) {
@@ -230,13 +237,13 @@ ccl_device int background_num_possible_portals(const KernelGlobals *kg, float3 P
   return num_possible_portals;
 }
 
-ccl_device float3 background_portal_sample(const KernelGlobals *kg,
+ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg,
                                            float3 P,
                                            float randu,
                                            float randv,
                                            int num_possible,
-                                           int *sampled_portal,
-                                           float *pdf)
+                                           ccl_private int *sampled_portal,
+                                           ccl_private float *pdf)
 {
   /* Pick a portal, then re-normalize randv. */
   randv *= num_possible;
@@ -285,10 +292,10 @@ ccl_device float3 background_portal_sample(const KernelGlobals *kg,
   return zero_float3();
 }
 
-ccl_device_inline float3 background_sun_sample(const KernelGlobals *kg,
+ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *kg,
                                                float randu,
                                                float randv,
-                                               float *pdf)
+                                               ccl_private float *pdf)
 {
   float3 D;
   const float3 N = float4_to_float3(kernel_data.background.sun);
@@ -297,15 +304,15 @@ ccl_device_inline float3 background_sun_sample(const KernelGlobals *kg,
   return D;
 }
 
-ccl_device_inline float background_sun_pdf(const KernelGlobals *kg, float3 D)
+ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, float3 D)
 {
   const float3 N = float4_to_float3(kernel_data.background.sun);
   const float angle = kernel_data.background.sun.w;
   return pdf_uniform_cone(N, D, angle);
 }
 
-ccl_device_inline float3
-background_light_sample(const KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+ccl_device_inline float3 background_light_sample(
+    ccl_global const KernelGlobals *kg, float3 P, float randu, float randv, ccl_private float *pdf)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
@@ -405,7 +412,9 @@ background_light_sample(const KernelGlobals *kg, float3 P, float randu, float ra
   return D;
 }
 
-ccl_device float background_light_pdf(const KernelGlobals *kg, float3 P, float3 direction)
+ccl_device float background_light_pdf(ccl_global const KernelGlobals *kg,
+                                      float3 P,
+                                      float3 direction)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
index 765d8f5338e..9421ac462e2 100644
--- a/intern/cycles/kernel/kernel_light_common.h
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN
  * Note: light_p is modified when sample_coord is true.
  */
 ccl_device_inline float rect_light_sample(float3 P,
-                                          float3 *light_p,
+                                          ccl_private float3 *light_p,
                                           float3 axisu,
                                           float3 axisv,
                                           float randu,
@@ -167,9 +167,9 @@ ccl_device float light_spread_attenuation(const float3 D,
  * reduce noise with low spread. */
 ccl_device bool light_spread_clamp_area_light(const float3 P,
                                               const float3 lightNg,
-                                              float3 *lightP,
-                                              float3 *axisu,
-                                              float3 *axisv,
+                                              ccl_private float3 *lightP,
+                                              ccl_private float3 *axisu,
+                                              ccl_private float3 *axisv,
                                               const float tan_spread)
 {
   /* Closest point in area light plane and distance to that plane. */
@@ -214,7 +214,10 @@ ccl_device bool light_spread_clamp_area_light(const float3 P,
   return true;
 }
 
-ccl_device float lamp_light_pdf(const KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+ccl_device float lamp_light_pdf(ccl_global const KernelGlobals *kg,
+                                const float3 Ng,
+                                const float3 I,
+                                float t)
 {
   float cos_pi = dot(Ng, I);
 
diff --git a/intern/cycles/kernel/kernel_lookup_table.h b/intern/cycles/kernel/kernel_lookup_table.h
index 33d9d5ae1f0..3c8577af417 100644
--- a/intern/cycles/kernel/kernel_lookup_table.h
+++ b/intern/cycles/kernel/kernel_lookup_table.h
@@ -20,7 +20,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Interpolated lookup table access */
 
-ccl_device float lookup_table_read(const KernelGlobals *kg, float x, int offset, int size)
+ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg,
+                                   float x,
+                                   int offset,
+                                   int size)
 {
   x = saturate(x) * (size - 1);
 
@@ -37,7 +40,7 @@ ccl_device float lookup_table_read(const KernelGlobals *kg, float x, int offset,
 }
 
 ccl_device float lookup_table_read_2D(
-    const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
+    ccl_global const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
 {
   y = saturate(y) * (ysize - 1);
 
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index b158f4c4fd3..c931aa45276 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -35,7 +35,7 @@
 CCL_NAMESPACE_BEGIN
 
 /* distribute uniform xy on [0,1] over unit disk [-1,1] */
-ccl_device void to_unit_disk(float *x, float *y)
+ccl_device void to_unit_disk(ccl_private float *x, ccl_private float *y)
 {
   float phi = M_2PI_F * (*x);
   float r = sqrtf(*y);
@@ -46,7 +46,10 @@ ccl_device void to_unit_disk(float *x, float *y)
 
 /* return an orthogonal tangent and bitangent given a normal and tangent that
  * may not be exactly orthogonal */
-ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3 *a, float3 *b)
+ccl_device void make_orthonormals_tangent(const float3 N,
+                                          const float3 T,
+                                          ccl_private float3 *a,
+                                          ccl_private float3 *b)
 {
   *b = normalize(cross(N, T));
   *a = cross(*b, N);
@@ -54,7 +57,7 @@ ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3
 
 /* sample direction with cosine weighted distributed in hemisphere */
 ccl_device_inline void sample_cos_hemisphere(
-    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
 {
   to_unit_disk(&randu, &randv);
   float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
@@ -66,7 +69,7 @@ ccl_device_inline void sample_cos_hemisphere(
 
 /* sample direction uniformly distributed in hemisphere */
 ccl_device_inline void sample_uniform_hemisphere(
-    const float3 N, float randu, float randv, float3 *omega_in, float *pdf)
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
 {
   float z = randu;
   float r = sqrtf(max(0.0f, 1.0f - z * z));
@@ -81,8 +84,12 @@ ccl_device_inline void sample_uniform_hemisphere(
 }
 
 /* sample direction uniformly distributed in cone */
-ccl_device_inline void sample_uniform_cone(
-    const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf)
+ccl_device_inline void sample_uniform_cone(const float3 N,
+                                           float angle,
+                                           float randu,
+                                           float randv,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float *pdf)
 {
   float zMin = cosf(angle);
   float z = zMin - zMin * randu + randu;
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 67466b28170..b981e750dda 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -36,7 +36,9 @@ ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
 #ifdef __DENOISING_FEATURES__
 
 ccl_device_forceinline void kernel_write_denoising_features_surface(
-    INTEGRATOR_STATE_ARGS, const ShaderData *sd, ccl_global float *ccl_restrict render_buffer)
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
 {
   if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DENOISING_FEATURES)) {
     return;
@@ -55,7 +57,7 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
   float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
       continue;
@@ -71,11 +73,11 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
      * To account for this, we scale their weight by the average fresnel factor (the same is also
      * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
     if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
       closure_albedo *= bsdf->extra->fresnel_color;
     }
     else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
-      PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)sc;
+      ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
       closure_albedo *= bsdf->avg_value;
     }
     else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
@@ -151,7 +153,9 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_ST
 
 /* Write shadow catcher passes on a bounce from the shadow catcher object. */
 ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
-    INTEGRATOR_STATE_ARGS, const ShaderData *sd, ccl_global float *ccl_restrict render_buffer)
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
 {
   if (!kernel_data.integrator.has_shadow_catcher) {
     return;
@@ -178,7 +182,7 @@ ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
 
 #endif /* __SHADOW_CATCHER__ */
 
-ccl_device_inline size_t kernel_write_id_pass(float *ccl_restrict buffer,
+ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
                                               size_t depth,
                                               float id,
                                               float matte_weight)
@@ -188,7 +192,7 @@ ccl_device_inline size_t kernel_write_id_pass(float *ccl_restrict buffer,
 }
 
 ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
-                                                const ShaderData *sd,
+                                                ccl_private const ShaderData *sd,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
 #ifdef __PASSES__
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index ebb2c0df4f1..e04ed5b1cc1 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -32,7 +32,7 @@ ccl_device_inline void path_state_init_queues(INTEGRATOR_STATE_ARGS)
 /* Minimalistic initialization of the path state, which is needed for early outputs in the
  * integrator initialization to work. */
 ccl_device_inline void path_state_init(INTEGRATOR_STATE_ARGS,
-                                       const ccl_global KernelWorkTile *ccl_restrict tile,
+                                       ccl_global const KernelWorkTile *ccl_restrict tile,
                                        const int x,
                                        const int y)
 {
@@ -281,14 +281,16 @@ typedef struct RNGState {
   int sample;
 } RNGState;
 
-ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, RNGState *rng_state)
+ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+                                           ccl_private RNGState *rng_state)
 {
   rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash);
   rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset);
   rng_state->sample = INTEGRATOR_STATE(path, sample);
 }
 
-ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, RNGState *rng_state)
+ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+                                                  ccl_private RNGState *rng_state)
 {
   const uint shadow_bounces = INTEGRATOR_STATE(shadow_path, transparent_bounce) -
                               INTEGRATOR_STATE(path, transparent_bounce);
@@ -298,23 +300,26 @@ ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, R
   rng_state->sample = INTEGRATOR_STATE(path, sample);
 }
 
-ccl_device_inline float path_state_rng_1D(const KernelGlobals *kg,
-                                          const RNGState *rng_state,
+ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg,
+                                          ccl_private const RNGState *rng_state,
                                           int dimension)
 {
   return path_rng_1D(
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(
-    const KernelGlobals *kg, const RNGState *rng_state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg,
+                                         ccl_private const RNGState *rng_state,
+                                         int dimension,
+                                         ccl_private float *fx,
+                                         ccl_private float *fy)
 {
   path_rng_2D(
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
 }
 
-ccl_device_inline float path_state_rng_1D_hash(const KernelGlobals *kg,
-                                               const RNGState *rng_state,
+ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *kg,
+                                               ccl_private const RNGState *rng_state,
                                                uint hash)
 {
   /* Use a hash instead of dimension, this is not great but avoids adding
@@ -324,8 +329,8 @@ ccl_device_inline float path_state_rng_1D_hash(const KernelGlobals *kg,
       kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
 }
 
-ccl_device_inline float path_branched_rng_1D(const KernelGlobals *kg,
-                                             const RNGState *rng_state,
+ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg,
+                                             ccl_private const RNGState *rng_state,
                                              int branch,
                                              int num_branches,
                                              int dimension)
@@ -336,13 +341,13 @@ ccl_device_inline float path_branched_rng_1D(const KernelGlobals *kg,
                      rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(const KernelGlobals *kg,
-                                            const RNGState *rng_state,
+ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg,
+                                            ccl_private const RNGState *rng_state,
                                             int branch,
                                             int num_branches,
                                             int dimension,
-                                            float *fx,
-                                            float *fy)
+                                            ccl_private float *fx,
+                                            ccl_private float *fy)
 {
   path_rng_2D(kg,
               rng_state->rng_hash,
@@ -355,8 +360,8 @@ ccl_device_inline void path_branched_rng_2D(const KernelGlobals *kg,
 /* Utility functions to get light termination value,
  * since it might not be needed in many cases.
  */
-ccl_device_inline float path_state_rng_light_termination(const KernelGlobals *kg,
-                                                         const RNGState *state)
+ccl_device_inline float path_state_rng_light_termination(ccl_global const KernelGlobals *kg,
+                                                         ccl_private const RNGState *state)
 {
   if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
     return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index 192bf7ca5aa..0aea82fa812 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -215,8 +215,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f
 }
 
 ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
-                                                  float3 *P,
-                                                  float3 *D)
+                                                  ccl_private float3 *P,
+                                                  ccl_private float3 *D)
 {
   float interocular_offset = cam->interocular_offset;
 
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 240c92bf9d0..7db4289acec 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN
  */
 #  define SOBOL_SKIP 64
 
-ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimension)
+ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, int dimension)
 {
   uint result = 0;
   uint i = index + SOBOL_SKIP;
@@ -51,7 +51,7 @@ ccl_device uint sobol_dimension(const KernelGlobals *kg, int index, int dimensio
 
 #endif /* __SOBOL__ */
 
-ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg,
+ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg,
                                          uint rng_hash,
                                          int sample,
                                          int dimension)
@@ -85,8 +85,12 @@ ccl_device_forceinline float path_rng_1D(const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(
-    const KernelGlobals *kg, uint rng_hash, int sample, int dimension, float *fx, float *fy)
+ccl_device_forceinline void path_rng_2D(ccl_global const KernelGlobals *kg,
+                                        uint rng_hash,
+                                        int sample,
+                                        int dimension,
+                                        ccl_private float *fx,
+                                        ccl_private float *fy)
 {
 #ifdef __DEBUG_CORRELATION__
   *fx = (float)drand48();
@@ -137,7 +141,7 @@ ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
   return n;
 }
 
-ccl_device_inline uint path_rng_hash_init(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline uint path_rng_hash_init(ccl_global const KernelGlobals *ccl_restrict kg,
                                           const int sample,
                                           const int x,
                                           const int y)
@@ -184,13 +188,6 @@ ccl_device_inline uint lcg_state_init(const uint rng_hash,
   return lcg_init(rng_hash + rng_offset + sample * scramble);
 }
 
-ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
-{
-  /* Implicit mod 2^32 */
-  *rng = (1103515245 * (*rng) + 12345);
-  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
-}
-
 ccl_device_inline bool sample_is_even(int pattern, int sample)
 {
   if (pattern == SAMPLING_PATTERN_PMJ) {
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index d1b53832793..4174a27406b 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -37,24 +37,26 @@ CCL_NAMESPACE_BEGIN
 /* Merging */
 
 #if defined(__VOLUME__)
-ccl_device_inline void shader_merge_volume_closures(ShaderData *sd)
+ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
 {
   /* Merge identical closures to save closure space with stacked volumes. */
   for (int i = 0; i < sd->num_closure; i++) {
-    ShaderClosure *sci = &sd->closure[i];
+    ccl_private ShaderClosure *sci = &sd->closure[i];
 
     if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
       continue;
     }
 
     for (int j = i + 1; j < sd->num_closure; j++) {
-      ShaderClosure *scj = &sd->closure[j];
+      ccl_private ShaderClosure *scj = &sd->closure[j];
       if (sci->type != scj->type) {
         continue;
       }
 
-      const HenyeyGreensteinVolume *hgi = (const HenyeyGreensteinVolume *)sci;
-      const HenyeyGreensteinVolume *hgj = (const HenyeyGreensteinVolume *)scj;
+      ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+          sci;
+      ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+          scj;
       if (!(hgi->g == hgj->g)) {
         continue;
       }
@@ -76,17 +78,19 @@ ccl_device_inline void shader_merge_volume_closures(ShaderData *sd)
   }
 }
 
-ccl_device_inline void shader_copy_volume_phases(ShaderVolumePhases *ccl_restrict phases,
-                                                 const ShaderData *ccl_restrict sd)
+ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
+                                                     phases,
+                                                 ccl_private const ShaderData *ccl_restrict sd)
 {
   phases->num_closure = 0;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *from_sc = &sd->closure[i];
-    const HenyeyGreensteinVolume *from_hg = (const HenyeyGreensteinVolume *)from_sc;
+    ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+    ccl_private const HenyeyGreensteinVolume *from_hg =
+        (ccl_private const HenyeyGreensteinVolume *)from_sc;
 
     if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-      ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+      ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
 
       to_sc->weight = from_sc->weight;
       to_sc->sample_weight = from_sc->sample_weight;
@@ -100,7 +104,8 @@ ccl_device_inline void shader_copy_volume_phases(ShaderVolumePhases *ccl_restric
 }
 #endif /* __VOLUME__ */
 
-ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
+ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS,
+                                                       ccl_private ShaderData *sd)
 {
   /* Defensive sampling.
    *
@@ -112,14 +117,14 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
     float sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
       }
     }
 
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
       }
@@ -137,7 +142,7 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
       float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
 
       for (int i = 0; i < sd->num_closure; i++) {
-        ShaderClosure *sc = &sd->closure[i];
+        ccl_private ShaderClosure *sc = &sd->closure[i];
         if (CLOSURE_IS_BSDF(sc->type)) {
           bsdf_blur(kg, sc, blur_roughness);
         }
@@ -148,7 +153,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
 
 /* BSDF */
 
-ccl_device_inline bool shader_bsdf_is_transmission(const ShaderData *sd, const float3 omega_in)
+ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
+                                                   const float3 omega_in)
 {
   return dot(sd->N, omega_in) < 0.0f;
 }
@@ -176,12 +182,12 @@ ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_sh
   return false;
 }
 
-ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
-                                                ShaderData *sd,
+ccl_device_inline float _shader_bsdf_multi_eval(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
                                                 const float3 omega_in,
                                                 const bool is_transmission,
-                                                const ShaderClosure *skip_sc,
-                                                BsdfEval *result_eval,
+                                                ccl_private const ShaderClosure *skip_sc,
+                                                ccl_private BsdfEval *result_eval,
                                                 float sum_pdf,
                                                 float sum_sample_weight,
                                                 const uint light_shader_flags)
@@ -189,7 +195,7 @@ ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
   /* This is the veach one-sample model with balance heuristic,
    * some PDF factors drop out when using balance heuristic weighting. */
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (sc == skip_sc) {
       continue;
@@ -220,11 +226,11 @@ ccl_device
 ccl_device_inline
 #endif
     float
-    shader_bsdf_eval(const KernelGlobals *kg,
-                     ShaderData *sd,
+    shader_bsdf_eval(ccl_global const KernelGlobals *kg,
+                     ccl_private ShaderData *sd,
                      const float3 omega_in,
                      const bool is_transmission,
-                     BsdfEval *bsdf_eval,
+                     ccl_private BsdfEval *bsdf_eval,
                      const uint light_shader_flags)
 {
   bsdf_eval_init(bsdf_eval, false, zero_float3());
@@ -234,8 +240,8 @@ ccl_device_inline
 }
 
 /* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
-ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData *ccl_restrict sd,
-                                                               float *randu)
+ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
+    ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
 {
   int sampled = 0;
 
@@ -244,7 +250,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
     float sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
@@ -255,7 +261,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
     float partial_sum = 0.0f;
 
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         float next_sum = partial_sum + sc->sample_weight;
@@ -277,15 +283,16 @@ ccl_device_inline const ShaderClosure *shader_bsdf_bssrdf_pick(const ShaderData
 }
 
 /* Return weight for picked BSSRDF. */
-ccl_device_inline float3 shader_bssrdf_sample_weight(const ShaderData *ccl_restrict sd,
-                                                     const ShaderClosure *ccl_restrict bssrdf_sc)
+ccl_device_inline float3
+shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
+                            ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
 {
   float3 weight = bssrdf_sc->weight;
 
   if (sd->num_closure > 1) {
     float sum = 0.0f;
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
         sum += sc->sample_weight;
@@ -299,15 +306,15 @@ ccl_device_inline float3 shader_bssrdf_sample_weight(const ShaderData *ccl_restr
 
 /* Sample direction for picked BSDF, and return evaluation and pdf for all
  * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          const ShaderClosure *sc,
+ccl_device int shader_bsdf_sample_closure(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private const ShaderClosure *sc,
                                           float randu,
                                           float randv,
-                                          BsdfEval *bsdf_eval,
-                                          float3 *omega_in,
-                                          differential3 *domega_in,
-                                          float *pdf)
+                                          ccl_private BsdfEval *bsdf_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
 {
   /* BSSRDF should already have been handled elsewhere. */
   kernel_assert(CLOSURE_IS_BSDF(sc->type));
@@ -333,13 +340,13 @@ ccl_device int shader_bsdf_sample_closure(const KernelGlobals *kg,
   return label;
 }
 
-ccl_device float shader_bsdf_average_roughness(const ShaderData *sd)
+ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
 {
   float roughness = 0.0f;
   float sum_weight = 0.0f;
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF(sc->type)) {
       /* sqrt once to undo the squaring from multiplying roughness on the
@@ -353,7 +360,8 @@ ccl_device float shader_bsdf_average_roughness(const ShaderData *sd)
   return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
 }
 
-ccl_device float3 shader_bsdf_transparency(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_HAS_ONLY_VOLUME) {
     return one_float3();
@@ -366,11 +374,12 @@ ccl_device float3 shader_bsdf_transparency(const KernelGlobals *kg, const Shader
   }
 }
 
-ccl_device void shader_bsdf_disable_transparency(const KernelGlobals *kg, ShaderData *sd)
+ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd)
 {
   if (sd->flag & SD_TRANSPARENT) {
     for (int i = 0; i < sd->num_closure; i++) {
-      ShaderClosure *sc = &sd->closure[i];
+      ccl_private ShaderClosure *sc = &sd->closure[i];
 
       if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
         sc->sample_weight = 0.0f;
@@ -382,7 +391,8 @@ ccl_device void shader_bsdf_disable_transparency(const KernelGlobals *kg, Shader
   }
 }
 
-ccl_device float3 shader_bsdf_alpha(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg,
+                                    ccl_private const ShaderData *sd)
 {
   float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
 
@@ -392,12 +402,13 @@ ccl_device float3 shader_bsdf_alpha(const KernelGlobals *kg, const ShaderData *s
   return alpha;
 }
 
-ccl_device float3 shader_bsdf_diffuse(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg,
+                                      ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
       eval += sc->weight;
@@ -406,12 +417,13 @@ ccl_device float3 shader_bsdf_diffuse(const KernelGlobals *kg, const ShaderData
   return eval;
 }
 
-ccl_device float3 shader_bsdf_glossy(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg,
+                                     ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
       eval += sc->weight;
@@ -420,12 +432,13 @@ ccl_device float3 shader_bsdf_glossy(const KernelGlobals *kg, const ShaderData *
   return eval;
 }
 
-ccl_device float3 shader_bsdf_transmission(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
       eval += sc->weight;
@@ -434,12 +447,13 @@ ccl_device float3 shader_bsdf_transmission(const KernelGlobals *kg, const Shader
   return eval;
 }
 
-ccl_device float3 shader_bsdf_average_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg,
+                                             ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
     if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
       N += sc->N * fabsf(average(sc->weight));
   }
@@ -447,14 +461,15 @@ ccl_device float3 shader_bsdf_average_normal(const KernelGlobals *kg, const Shad
   return (is_zero(N)) ? sd->N : normalize(N);
 }
 
-ccl_device float3 shader_bsdf_ao_normal(const KernelGlobals *kg, const ShaderData *sd)
+ccl_device float3 shader_bsdf_ao_normal(ccl_global const KernelGlobals *kg,
+                                        ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
     if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
-      const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc;
+      ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
       N += bsdf->N * fabsf(average(sc->weight));
     }
   }
@@ -463,15 +478,15 @@ ccl_device float3 shader_bsdf_ao_normal(const KernelGlobals *kg, const ShaderDat
 }
 
 #ifdef __SUBSURFACE__
-ccl_device float3 shader_bssrdf_normal(const ShaderData *sd)
+ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
   for (int i = 0; i < sd->num_closure; i++) {
-    const ShaderClosure *sc = &sd->closure[i];
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
 
     if (CLOSURE_IS_BSSRDF(sc->type)) {
-      const Bssrdf *bssrdf = (const Bssrdf *)sc;
+      ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
       float avg_weight = fabsf(average(sc->weight));
 
       N += bssrdf->N * avg_weight;
@@ -484,7 +499,9 @@ ccl_device float3 shader_bssrdf_normal(const ShaderData *sd)
 
 /* Constant emission optimization */
 
-ccl_device bool shader_constant_emission_eval(const KernelGlobals *kg, int shader, float3 *eval)
+ccl_device bool shader_constant_emission_eval(ccl_global const KernelGlobals *kg,
+                                              int shader,
+                                              ccl_private float3 *eval)
 {
   int shader_index = shader & SHADER_MASK;
   int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
@@ -502,7 +519,7 @@ ccl_device bool shader_constant_emission_eval(const KernelGlobals *kg, int shade
 
 /* Background */
 
-ccl_device float3 shader_background_eval(const ShaderData *sd)
+ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_EMISSION) {
     return sd->closure_emission_background;
@@ -514,7 +531,7 @@ ccl_device float3 shader_background_eval(const ShaderData *sd)
 
 /* Emission */
 
-ccl_device float3 shader_emissive_eval(const ShaderData *sd)
+ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_EMISSION) {
     return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
@@ -526,7 +543,8 @@ ccl_device float3 shader_emissive_eval(const ShaderData *sd)
 
 /* Holdout */
 
-ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
+ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd)
 {
   float3 weight = zero_float3();
 
@@ -537,7 +555,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
       weight = one_float3() - sd->closure_transparent_extinction;
 
       for (int i = 0; i < sd->num_closure; i++) {
-        ShaderClosure *sc = &sd->closure[i];
+        ccl_private ShaderClosure *sc = &sd->closure[i];
         if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
           sc->type = NBUILTIN_CLOSURES;
         }
@@ -551,7 +569,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
   }
   else {
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
       if (CLOSURE_IS_HOLDOUT(sc->type)) {
         weight += sc->weight;
       }
@@ -565,7 +583,7 @@ ccl_device float3 shader_holdout_apply(const KernelGlobals *kg, ShaderData *sd)
 
 template<uint node_feature_mask>
 ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
-                                    ShaderData *ccl_restrict sd,
+                                    ccl_private ShaderData *ccl_restrict sd,
                                     ccl_global float *ccl_restrict buffer,
                                     int path_flag)
 {
@@ -604,7 +622,7 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
       sd->flag |= SD_EMISSION;
     }
     else {
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
           sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
       if (bsdf != NULL) {
         bsdf->N = sd->N;
@@ -626,19 +644,20 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
 
 #ifdef __VOLUME__
 
-ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
-                                                        const ShaderVolumePhases *phases,
-                                                        const float3 omega_in,
-                                                        int skip_phase,
-                                                        BsdfEval *result_eval,
-                                                        float sum_pdf,
-                                                        float sum_sample_weight)
+ccl_device_inline float _shader_volume_phase_multi_eval(
+    ccl_private const ShaderData *sd,
+    ccl_private const ShaderVolumePhases *phases,
+    const float3 omega_in,
+    int skip_phase,
+    ccl_private BsdfEval *result_eval,
+    float sum_pdf,
+    float sum_sample_weight)
 {
   for (int i = 0; i < phases->num_closure; i++) {
     if (i == skip_phase)
       continue;
 
-    const ShaderVolumeClosure *svc = &phases->closure[i];
+    ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
     float phase_pdf = 0.0f;
     float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
 
@@ -653,26 +672,26 @@ ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
   return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
 }
 
-ccl_device float shader_volume_phase_eval(const KernelGlobals *kg,
-                                          const ShaderData *sd,
-                                          const ShaderVolumePhases *phases,
+ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
                                           const float3 omega_in,
-                                          BsdfEval *phase_eval)
+                                          ccl_private BsdfEval *phase_eval)
 {
   bsdf_eval_init(phase_eval, false, zero_float3());
 
   return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
 }
 
-ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
-                                          const ShaderData *sd,
-                                          const ShaderVolumePhases *phases,
+ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
                                           float randu,
                                           float randv,
-                                          BsdfEval *phase_eval,
-                                          float3 *omega_in,
-                                          differential3 *domega_in,
-                                          float *pdf)
+                                          ccl_private BsdfEval *phase_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
 {
   int sampled = 0;
 
@@ -681,7 +700,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
     float sum = 0.0f;
 
     for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
       sum += svc->sample_weight;
     }
 
@@ -689,7 +708,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
     float partial_sum = 0.0f;
 
     for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
       float next_sum = partial_sum + svc->sample_weight;
 
       if (r <= next_sum) {
@@ -709,7 +728,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
 
   /* todo: this isn't quite correct, we don't weight anisotropy properly
    * depending on color channels, even if this is perhaps not a common case */
-  const ShaderVolumeClosure *svc = &phases->closure[sampled];
+  ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
   int label;
   float3 eval = zero_float3();
 
@@ -723,15 +742,15 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
   return label;
 }
 
-ccl_device int shader_phase_sample_closure(const KernelGlobals *kg,
-                                           const ShaderData *sd,
-                                           const ShaderVolumeClosure *sc,
+ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg,
+                                           ccl_private const ShaderData *sd,
+                                           ccl_private const ShaderVolumeClosure *sc,
                                            float randu,
                                            float randv,
-                                           BsdfEval *phase_eval,
-                                           float3 *omega_in,
-                                           differential3 *domega_in,
-                                           float *pdf)
+                                           ccl_private BsdfEval *phase_eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private differential3 *domega_in,
+                                           ccl_private float *pdf)
 {
   int label;
   float3 eval = zero_float3();
@@ -749,7 +768,7 @@ ccl_device int shader_phase_sample_closure(const KernelGlobals *kg,
 
 template<const bool shadow, typename StackReadOp>
 ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
-                                          ShaderData *ccl_restrict sd,
+                                          ccl_private ShaderData *ccl_restrict sd,
                                           const int path_flag,
                                           StackReadOp stack_read)
 {
@@ -824,7 +843,7 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
 
 /* Displacement Evaluation */
 
-ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
+ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_private ShaderData *sd)
 {
   sd->num_closure = 0;
   sd->num_closure_left = 0;
@@ -846,13 +865,14 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ShaderData
 /* Transparent Shadows */
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device bool shader_transparent_shadow(const KernelGlobals *kg, Intersection *isect)
+ccl_device bool shader_transparent_shadow(ccl_global const KernelGlobals *kg,
+                                          ccl_private Intersection *isect)
 {
   return (intersection_get_shader_flags(kg, isect) & SD_HAS_TRANSPARENT_SHADOW) != 0;
 }
 #endif /* __TRANSPARENT_SHADOWS__ */
 
-ccl_device float shader_cryptomatte_id(const KernelGlobals *kg, int shader)
+ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader)
 {
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
 }
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 00457695e53..3a5a11d2c10 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -636,7 +636,7 @@ typedef struct AttributeDescriptor {
   float sample_weight; \
   float3 N
 
-typedef ccl_addr_space struct ccl_align(16) ShaderClosure
+typedef struct ccl_align(16) ShaderClosure
 {
   SHADER_CLOSURE_BASE;
 
@@ -747,7 +747,7 @@ enum ShaderDataObjectFlag {
                      SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
 };
 
-typedef ccl_addr_space struct ccl_align(16) ShaderData
+typedef struct ccl_align(16) ShaderData
 {
   /* position */
   float3 P;
@@ -837,27 +837,28 @@ ShaderData;
 
 /* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
  * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
-typedef ccl_addr_space struct ccl_align(16) ShaderDataTinyStorage
+typedef struct ccl_align(16) ShaderDataTinyStorage
 {
   char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
 }
 ShaderDataTinyStorage;
-#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
+#define AS_SHADER_DATA(shader_data_tiny_storage) \
+  ((ccl_private ShaderData *)shader_data_tiny_storage)
 
 /* Compact volume closures storage.
  *
  * Used for decoupled direct/indirect light closure storage. */
 
-ccl_addr_space struct ShaderVolumeClosure {
+typedef struct ShaderVolumeClosure {
   float3 weight;
   float sample_weight;
   float g;
-};
+} ShaderVolumeClosure;
 
-ccl_addr_space struct ShaderVolumePhases {
+typedef struct ShaderVolumePhases {
   ShaderVolumeClosure closure[MAX_VOLUME_CLOSURE];
   int num_closure;
-};
+} ShaderVolumePhases;
 
 /* Volume Stack */
 
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index ad609b15f86..871e370123e 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -44,56 +44,56 @@ CCL_NAMESPACE_BEGIN
 
 /* Stack */
 
-ccl_device_inline float3 stack_load_float3(float *stack, uint a)
+ccl_device_inline float3 stack_load_float3(ccl_private float *stack, uint a)
 {
   kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-  float *stack_a = stack + a;
+  ccl_private float *stack_a = stack + a;
   return make_float3(stack_a[0], stack_a[1], stack_a[2]);
 }
 
-ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f)
+ccl_device_inline void stack_store_float3(ccl_private float *stack, uint a, float3 f)
 {
   kernel_assert(a + 2 < SVM_STACK_SIZE);
 
-  float *stack_a = stack + a;
+  ccl_private float *stack_a = stack + a;
   stack_a[0] = f.x;
   stack_a[1] = f.y;
   stack_a[2] = f.z;
 }
 
-ccl_device_inline float stack_load_float(float *stack, uint a)
+ccl_device_inline float stack_load_float(ccl_private float *stack, uint a)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   return stack[a];
 }
 
-ccl_device_inline float stack_load_float_default(float *stack, uint a, uint value)
+ccl_device_inline float stack_load_float_default(ccl_private float *stack, uint a, uint value)
 {
   return (a == (uint)SVM_STACK_INVALID) ? __uint_as_float(value) : stack_load_float(stack, a);
 }
 
-ccl_device_inline void stack_store_float(float *stack, uint a, float f)
+ccl_device_inline void stack_store_float(ccl_private float *stack, uint a, float f)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   stack[a] = f;
 }
 
-ccl_device_inline int stack_load_int(float *stack, uint a)
+ccl_device_inline int stack_load_int(ccl_private float *stack, uint a)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
   return __float_as_int(stack[a]);
 }
 
-ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value)
+ccl_device_inline int stack_load_int_default(ccl_private float *stack, uint a, uint value)
 {
   return (a == (uint)SVM_STACK_INVALID) ? (int)value : stack_load_int(stack, a);
 }
 
-ccl_device_inline void stack_store_int(float *stack, uint a, int i)
+ccl_device_inline void stack_store_int(ccl_private float *stack, uint a, int i)
 {
   kernel_assert(a < SVM_STACK_SIZE);
 
@@ -107,14 +107,15 @@ ccl_device_inline bool stack_valid(uint a)
 
 /* Reading Nodes */
 
-ccl_device_inline uint4 read_node(const KernelGlobals *kg, int *offset)
+ccl_device_inline uint4 read_node(ccl_global const KernelGlobals *kg, ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   (*offset)++;
   return node;
 }
 
-ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset)
+ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg,
+                                         ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   float4 f = make_float4(__uint_as_float(node.x),
@@ -125,7 +126,7 @@ ccl_device_inline float4 read_node_float(const KernelGlobals *kg, int *offset)
   return f;
 }
 
-ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset)
+ccl_device_inline float4 fetch_node_float(ccl_global const KernelGlobals *kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return make_float4(__uint_as_float(node.x),
@@ -134,20 +135,26 @@ ccl_device_inline float4 fetch_node_float(const KernelGlobals *kg, int offset)
                      __uint_as_float(node.w));
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y)
+ccl_device_forceinline void svm_unpack_node_uchar2(uint i,
+                                                   ccl_private uint *x,
+                                                   ccl_private uint *y)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z)
+ccl_device_forceinline void svm_unpack_node_uchar3(uint i,
+                                                   ccl_private uint *x,
+                                                   ccl_private uint *y,
+                                                   ccl_private uint *z)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
   *z = ((i >> 16) & 0xFF);
 }
 
-ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+ccl_device_forceinline void svm_unpack_node_uchar4(
+    uint i, ccl_private uint *x, ccl_private uint *y, ccl_private uint *z, ccl_private uint *w)
 {
   *x = (i & 0xFF);
   *y = ((i >> 8) & 0xFF);
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 34ac2cb8fbf..092f3817fd8 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -25,7 +25,7 @@ extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONS
 #  else
 ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
 #  endif
-                                                           ShaderData *sd,
+                                                           ccl_private ShaderData *sd,
                                                            float3 N,
                                                            float max_dist,
                                                            int num_samples,
@@ -96,7 +96,10 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
+    svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
+                ccl_private ShaderData *sd,
+                ccl_private float *stack,
+                uint4 node)
 {
   uint flags, dist_offset, normal_offset, out_ao_offset;
   svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
index 26dec9717b3..640bec87ac9 100644
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -26,8 +26,8 @@ ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float
 }
 
 ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
-                                   ShaderData *sd,
-                                   float *stack,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
                                    uint4 node,
                                    ccl_global float *render_buffer)
 {
@@ -44,8 +44,8 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
 }
 
 ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS,
-                                   ShaderData *sd,
-                                   float *stack,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
                                    uint4 node,
                                    ccl_global float *render_buffer)
 {
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 5f94b20af73..9fd401ba1c3 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,11 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Attribute Node */
 
-ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg,
-                                                  ShaderData *sd,
+ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
                                                   uint4 node,
-                                                  NodeAttributeOutputType *type,
-                                                  uint *out_offset)
+                                                  ccl_private NodeAttributeOutputType *type,
+                                                  ccl_private uint *out_offset)
 {
   *out_offset = node.z;
   *type = (NodeAttributeOutputType)node.w;
@@ -48,9 +48,9 @@ ccl_device AttributeDescriptor svm_node_attr_init(const KernelGlobals *kg,
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline void svm_node_attr(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
@@ -148,9 +148,9 @@ ccl_device_noinline void svm_node_attr(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
@@ -244,9 +244,9 @@ ccl_device_noinline void svm_node_attr_bump_dx(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dy(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_attr_bump_dy(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index 60302b8e3d7..a76584e6bc8 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -77,7 +77,10 @@ ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
   return x;
 }
 
-ccl_device void svm_bevel_cubic_sample(const float radius, float xi, float *r, float *h)
+ccl_device void svm_bevel_cubic_sample(const float radius,
+                                       float xi,
+                                       ccl_private float *r,
+                                       ccl_private float *h)
 {
   float Rm = radius;
   float r_ = svm_bevel_cubic_quintic_root_find(xi);
@@ -100,7 +103,7 @@ extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_
 #  else
 ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
 #  endif
-                                                               ShaderData *sd,
+                                                               ccl_private ShaderData *sd,
                                                                float radius,
                                                                int num_samples)
 {
@@ -284,7 +287,10 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd, float *stack, uint4 node)
+    svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS,
+                   ccl_private ShaderData *sd,
+                   ccl_private float *stack,
+                   uint4 node)
 {
   uint num_samples, radius_offset, normal_offset, out_offset;
   svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 96b3703b954..521afb42adc 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,9 +34,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Blackbody Node */
 
-ccl_device_noinline void svm_node_blackbody(const KernelGlobals *kg,
-                                            ShaderData *sd,
-                                            float *stack,
+ccl_device_noinline void svm_node_blackbody(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
                                             uint temperature_offset,
                                             uint col_offset)
 {
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index dca1b220dd5..29a8350f1c1 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -72,8 +72,11 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p,
   return make_float2(tint, mortar);
 }
 
-ccl_device_noinline int svm_node_tex_brick(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_brick(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index 2ed812acd71..0a44ffe6359 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -17,7 +17,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device_noinline void svm_node_brightness(
-    ShaderData *sd, float *stack, uint in_color, uint out_color, uint node)
+    ccl_private ShaderData *sd, ccl_private float *stack, uint in_color, uint out_color, uint node)
 {
   uint bright_offset, contrast_offset;
   float3 color = stack_load_float3(stack, in_color);
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index 8672839dbab..70935c730f4 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Eval Nodes */
 
-ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint offset)
 {
   /* save state */
@@ -45,9 +45,9 @@ ccl_device_noinline void svm_node_enter_bump_eval(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_leave_bump_eval(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_leave_bump_eval(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint offset)
 {
   /* restore state */
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index 40c0edcdad0..2b786757af8 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_camera(const KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         float *stack,
+ccl_device_noinline void svm_node_camera(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
                                          uint out_vector,
                                          uint out_zdepth,
                                          uint out_distance)
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index a9919c9ddc9..e22367f4f59 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -32,9 +32,9 @@ ccl_device float svm_checker(float3 p)
   return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
 }
 
-ccl_device_noinline void svm_node_tex_checker(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline void svm_node_tex_checker(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint4 node)
 {
   uint co_offset, color1_offset, color2_offset, scale_offset;
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
index 656bd31c085..cb5224aebb2 100644
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Clamp Node */
 
-ccl_device_noinline int svm_node_clamp(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline int svm_node_clamp(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint value_stack_offset,
                                        uint parameters_stack_offsets,
                                        uint result_stack_offset,
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index e55f76a4400..87be73bb2cc 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Closure Nodes */
 
-ccl_device void svm_node_glass_setup(
-    ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract)
+ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
+                                     ccl_private MicrofacetBsdf *bsdf,
+                                     int type,
+                                     float eta,
+                                     float roughness,
+                                     bool refract)
 {
   if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
     if (refract) {
@@ -58,8 +62,12 @@ ccl_device void svm_node_glass_setup(
 }
 
 template<uint node_feature_mask, ShaderType shader_type>
-ccl_device_noinline int svm_node_closure_bsdf(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
+ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node,
+                                              int path_flag,
+                                              int offset)
 {
   uint type, param1_offset, param2_offset;
 
@@ -213,8 +221,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
           float3 diff_weight = weight * base_color * diffuse_weight;
 
-          PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
-              sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+          ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)
+              bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
 
           if (bsdf) {
             bsdf->N = N;
@@ -225,7 +233,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
           }
         }
         else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
-          Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
+          ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
 
           if (bssrdf) {
             bssrdf->radius = subsurface_radius * subsurface;
@@ -247,7 +255,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
         float3 diff_weight = weight * base_color * diffuse_weight;
 
-        PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+        ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
             sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
 
         if (bsdf) {
@@ -273,7 +281,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
 
         float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
 
-        PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc(
+        ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
             sd, sizeof(PrincipledSheenBsdf), sheen_weight);
 
         if (bsdf) {
@@ -292,11 +300,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
           float3 spec_weight = weight * specular_weight;
 
-          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
               sd, sizeof(MicrofacetBsdf), spec_weight);
-          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                        sd, sizeof(MicrofacetExtra)) :
-                                                    NULL;
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
 
           if (bsdf && extra) {
             bsdf->N = N;
@@ -355,11 +364,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #  endif
             {
-              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                   sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
-              MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                            sd, sizeof(MicrofacetExtra)) :
-                                                        NULL;
+              ccl_private MicrofacetExtra *extra =
+                  (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                       sd, sizeof(MicrofacetExtra)) :
+                                   NULL;
 
               if (bsdf && extra) {
                 bsdf->N = N;
@@ -384,7 +394,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
             if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #  endif
             {
-              MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                   sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
               if (bsdf) {
                 bsdf->N = N;
@@ -407,11 +417,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
             }
           }
           else { /* use multi-scatter GGX */
-            MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+            ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
                 sd, sizeof(MicrofacetBsdf), glass_weight);
-            MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                          sd, sizeof(MicrofacetExtra)) :
-                                                      NULL;
+            ccl_private MicrofacetExtra *extra =
+                (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                     sd, sizeof(MicrofacetExtra)) :
+                                 NULL;
 
             if (bsdf && extra) {
               bsdf->N = N;
@@ -440,10 +451,12 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
 #  endif
         if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
-          MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
-          MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra(
-                                                        sd, sizeof(MicrofacetExtra)) :
-                                                    NULL;
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+              sd, sizeof(MicrofacetBsdf), weight);
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
 
           if (bsdf && extra) {
             bsdf->N = clearcoat_normal;
@@ -471,7 +484,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
 #endif /* __PRINCIPLED__ */
     case CLOSURE_BSDF_DIFFUSE_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      OrenNayarBsdf *bsdf = (OrenNayarBsdf *)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
+      ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
+          sd, sizeof(OrenNayarBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -479,7 +493,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
         float roughness = param1;
 
         if (roughness == 0.0f) {
-          sd->flag |= bsdf_diffuse_setup((DiffuseBsdf *)bsdf);
+          sd->flag |= bsdf_diffuse_setup((ccl_private DiffuseBsdf *)bsdf);
         }
         else {
           bsdf->roughness = roughness;
@@ -490,7 +504,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     }
     case CLOSURE_BSDF_TRANSLUCENT_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -513,7 +528,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
 
       if (!bsdf) {
         break;
@@ -559,7 +575,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
       else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
         kernel_assert(stack_valid(data_node.w));
-        bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+        bsdf->extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(sd,
+                                                                         sizeof(MicrofacetExtra));
         if (bsdf->extra) {
           bsdf->extra->color = stack_load_float3(stack, data_node.w);
           bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
@@ -581,7 +598,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -639,7 +657,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
       {
-        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
             sd, sizeof(MicrofacetBsdf), weight * fresnel);
 
         if (bsdf) {
@@ -655,7 +673,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
       if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
 #endif
       {
-        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
             sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
 
         if (bsdf) {
@@ -675,12 +693,14 @@ ccl_device_noinline int svm_node_closure_bsdf(
         break;
 #endif
       float3 weight = sd->svm_closure_weight * mix_weight;
-      MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight);
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
       if (!bsdf) {
         break;
       }
 
-      MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+      ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+          sd, sizeof(MicrofacetExtra));
       if (!extra) {
         break;
       }
@@ -706,7 +726,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     }
     case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight);
+      ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
+          sd, sizeof(VelvetBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -724,7 +745,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
 #endif
     case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      ToonBsdf *bsdf = (ToonBsdf *)bsdf_alloc(sd, sizeof(ToonBsdf), weight);
+      ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
+          sd, sizeof(ToonBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -771,11 +793,11 @@ ccl_device_noinline int svm_node_closure_bsdf(
         random = stack_load_float_default(stack, random_ofs, data_node3.y);
       }
 
-      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc(
+      ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc(
           sd, sizeof(PrincipledHairBSDF), weight);
       if (bsdf) {
-        PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
-            sd, sizeof(PrincipledHairExtra));
+        ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)
+            closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
 
         if (!extra)
           break;
@@ -854,7 +876,8 @@ ccl_device_noinline int svm_node_closure_bsdf(
     case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
 
-      HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+      ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
+          sd, sizeof(HairBsdf), weight);
 
       if (bsdf) {
         bsdf->N = N;
@@ -889,7 +912,7 @@ ccl_device_noinline int svm_node_closure_bsdf(
     case CLOSURE_BSSRDF_RANDOM_WALK_ID:
     case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
       float3 weight = sd->svm_closure_weight * mix_weight;
-      Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+      ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
 
       if (bssrdf) {
         /* disable in case of diffuse ancestor, can't see it well then and
@@ -921,9 +944,9 @@ ccl_device_noinline int svm_node_closure_bsdf(
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
-                                                 ShaderData *sd,
-                                                 float *stack,
+ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
                                                  uint4 node)
 {
 #ifdef __VOLUME__
@@ -958,7 +981,7 @@ ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
 
   /* Add closure for volume scattering. */
   if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
         sd, sizeof(HenyeyGreensteinVolume), weight);
 
     if (volume) {
@@ -976,8 +999,12 @@ ccl_device_noinline void svm_node_closure_volume(const KernelGlobals *kg,
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline int svm_node_principled_volume(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int offset)
+ccl_device_noinline int svm_node_principled_volume(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int path_flag,
+                                                   int offset)
 {
 #ifdef __VOLUME__
   uint4 value_node = read_node(kg, &offset);
@@ -1023,7 +1050,7 @@ ccl_device_noinline int svm_node_principled_volume(
     }
 
     /* Add closure for volume scattering. */
-    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc(
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
         sd, sizeof(HenyeyGreensteinVolume), color * density);
     if (volume) {
       float anisotropy = (stack_valid(anisotropy_offset)) ?
@@ -1087,7 +1114,9 @@ ccl_device_noinline int svm_node_principled_volume(
   return offset;
 }
 
-ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node)
 {
   uint mix_weight_offset = node.y;
   float3 weight = sd->svm_closure_weight;
@@ -1104,7 +1133,9 @@ ccl_device_noinline void svm_node_closure_emission(ShaderData *sd, float *stack,
   emission_setup(sd, weight);
 }
 
-ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd,
+                                                     ccl_private float *stack,
+                                                     uint4 node)
 {
   uint mix_weight_offset = node.y;
   float3 weight = sd->svm_closure_weight;
@@ -1121,7 +1152,9 @@ ccl_device_noinline void svm_node_closure_background(ShaderData *sd, float *stac
   background_setup(sd, weight);
 }
 
-ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint4 node)
 {
   uint mix_weight_offset = node.y;
 
@@ -1142,26 +1175,28 @@ ccl_device_noinline void svm_node_closure_holdout(ShaderData *sd, float *stack,
 
 /* Closure Nodes */
 
-ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
+ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight)
 {
   sd->svm_closure_weight = weight;
 }
 
-ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
+ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b)
 {
   float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset)
+ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint weight_offset)
 {
   float3 weight = stack_load_float3(stack, weight_offset);
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_emission_weight(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint4 node)
 {
   uint color_offset = node.y;
@@ -1173,7 +1208,9 @@ ccl_device_noinline void svm_node_emission_weight(const KernelGlobals *kg,
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node)
 {
   /* fetch weight from blend input, previous mix closures,
    * and write to stack to be used by closure nodes later */
@@ -1195,8 +1232,11 @@ ccl_device_noinline void svm_node_mix_closure(ShaderData *sd, float *stack, uint
 
 /* (Bump) normal */
 
-ccl_device void svm_node_set_normal(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal)
+ccl_device void svm_node_set_normal(ccl_global const KernelGlobals *kg,
+                                    ccl_private ShaderData *sd,
+                                    ccl_private float *stack,
+                                    uint in_direction,
+                                    uint out_normal)
 {
   float3 normal = stack_load_float3(stack, in_direction);
   sd->N = normal;
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 37d40167ccc..0d53779a5c8 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Conversion Nodes */
 
-ccl_device_noinline void svm_node_convert(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to)
+ccl_device_noinline void svm_node_convert(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint type,
+                                          uint from,
+                                          uint to)
 {
   switch (type) {
     case NODE_CONVERT_FI: {
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index a1d952173d8..7a3c8a6d36d 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -20,9 +20,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Node */
 
-ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint4 node)
 {
 #ifdef __RAY_DIFFERENTIALS__
@@ -88,18 +88,18 @@ ccl_device_noinline void svm_node_set_bump(const KernelGlobals *kg,
 
 /* Displacement Node */
 
-ccl_device void svm_node_set_displacement(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint fac_offset)
 {
   float3 dP = stack_load_float3(stack, fac_offset);
   sd->P += dP;
 }
 
-ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint4 node)
 {
   uint height_offset, midlevel_offset, scale_offset, normal_offset;
@@ -127,8 +127,11 @@ ccl_device_noinline void svm_node_displacement(const KernelGlobals *kg,
   stack_store_float3(stack, node.z, dP);
 }
 
-ccl_device_noinline int svm_node_vector_displacement(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_vector_displacement(ccl_global const KernelGlobals *kg,
+                                                     ccl_private ShaderData *sd,
+                                                     ccl_private float *stack,
+                                                     uint4 node,
+                                                     int offset)
 {
   uint4 data_node = read_node(kg, &offset);
   uint space = data_node.x;
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index b5ecdbe2abf..449ec84370f 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Fresnel Node */
 
-ccl_device_noinline void svm_node_fresnel(
-    ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
+ccl_device_noinline void svm_node_fresnel(ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint ior_offset,
+                                          uint ior_value,
+                                          uint node)
 {
   uint normal_offset, out_offset;
   svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
@@ -37,7 +40,9 @@ ccl_device_noinline void svm_node_fresnel(
 
 /* Layer Weight Node */
 
-ccl_device_noinline void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_layer_weight(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
 {
   uint blend_offset = node.y;
   uint blend_value = node.z;
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
index f6fafdee941..7ec6c31065d 100644
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ b/intern/cycles/kernel/svm/svm_gamma.h
@@ -16,8 +16,11 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_gamma(
-    ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color)
+ccl_device_noinline void svm_node_gamma(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_gamma,
+                                        uint in_color,
+                                        uint out_color)
 {
   float3 color = stack_load_float3(stack, in_color);
   float gamma = stack_load_float(stack, in_gamma);
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index 432529eb061..a94464d3a52 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Geometry Node */
 
-ccl_device_noinline void svm_node_geometry(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint type,
+                                           uint out_offset)
 {
   float3 data;
 
@@ -51,8 +54,11 @@ ccl_device_noinline void svm_node_geometry(
   stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dx(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -75,8 +81,11 @@ ccl_device_noinline void svm_node_geometry_bump_dx(
 #endif
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dy(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -101,8 +110,11 @@ ccl_device_noinline void svm_node_geometry_bump_dy(
 
 /* Object Info */
 
-ccl_device_noinline void svm_node_object_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint type,
+                                              uint out_offset)
 {
   float data;
 
@@ -140,8 +152,11 @@ ccl_device_noinline void svm_node_object_info(
 
 /* Particle Info */
 
-ccl_device_noinline void svm_node_particle_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint type,
+                                                uint out_offset)
 {
   switch (type) {
     case NODE_INFO_PAR_INDEX: {
@@ -199,8 +214,11 @@ ccl_device_noinline void svm_node_particle_info(
 
 /* Hair Info */
 
-ccl_device_noinline void svm_node_hair_info(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_noinline void svm_node_hair_info(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
+                                            uint type,
+                                            uint out_offset)
 {
   float data;
   float3 data3;
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index cd15f7097e7..8cc37be606f 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -60,7 +60,9 @@ ccl_device float svm_gradient(float3 p, NodeGradientType type)
   return 0.0f;
 }
 
-ccl_device_noinline void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
 {
   uint type, co_offset, color_offset, fac_offset;
 
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 6f49a8385aa..feb85eda122 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,9 +19,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_hsv(const KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
+ccl_device_noinline void svm_node_hsv(ccl_global const KernelGlobals *kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
                                       uint4 node)
 {
   uint in_color_offset, fac_offset, out_color_offset;
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 9c13734ecf0..7d41205c9ef 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 /* IES Light */
 
 ccl_device_inline float interpolate_ies_vertical(
-    const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+    ccl_global const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
 {
   /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
    * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
@@ -39,7 +39,7 @@ ccl_device_inline float interpolate_ies_vertical(
   return cubic_interp(a, b, c, d, v_frac);
 }
 
-ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg,
+ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg,
                                           int slot,
                                           float h_angle,
                                           float v_angle)
@@ -98,9 +98,9 @@ ccl_device_inline float kernel_ies_interp(const KernelGlobals *kg,
   return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
 }
 
-ccl_device_noinline void svm_node_ies(const KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
+ccl_device_noinline void svm_node_ies(ccl_global const KernelGlobals *kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
                                       uint4 node)
 {
   uint vector_offset, strength_offset, fac_offset, slot = node.z;
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index ce70109392b..2de80d5fc29 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,7 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4 svm_image_texture(const KernelGlobals *kg, int id, float x, float y, uint flags)
+ccl_device float4
+svm_image_texture(ccl_global const KernelGlobals *kg, int id, float x, float y, uint flags)
 {
   if (id == -1) {
     return make_float4(
@@ -44,8 +45,11 @@ ccl_device_inline float3 texco_remap_square(float3 co)
   return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
 }
 
-ccl_device_noinline int svm_node_tex_image(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint co_offset, out_offset, alpha_offset, flags;
 
@@ -117,9 +121,9 @@ ccl_device_noinline int svm_node_tex_image(
   return offset;
 }
 
-ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg,
-                                                ShaderData *sd,
-                                                float *stack,
+ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals *kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
                                                 uint4 node)
 {
   /* get object space normal */
@@ -219,9 +223,9 @@ ccl_device_noinline void svm_node_tex_image_box(const KernelGlobals *kg,
     stack_store_float(stack, alpha_offset, f.w);
 }
 
-ccl_device_noinline void svm_node_tex_environment(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_tex_environment(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint4 node)
 {
   uint id = node.y;
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
index 27cdaaff473..60668ec00f1 100644
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ b/intern/cycles/kernel/svm/svm_invert.h
@@ -21,8 +21,11 @@ ccl_device float invert(float color, float factor)
   return factor * (1.0f - color) + (1.0f - factor) * color;
 }
 
-ccl_device_noinline void svm_node_invert(
-    ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color)
+ccl_device_noinline void svm_node_invert(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint in_fac,
+                                         uint in_color,
+                                         uint out_color)
 {
   float factor = stack_load_float(stack, in_fac);
   float3 color = stack_load_float3(stack, in_color);
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 49fabad1cc5..aaff8376c7c 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -19,8 +19,8 @@ CCL_NAMESPACE_BEGIN
 /* Light Path Node */
 
 ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
-                                             const ShaderData *sd,
-                                             float *stack,
+                                             ccl_private const ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint type,
                                              uint out_offset,
                                              int path_flag)
@@ -106,7 +106,9 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
 
 /* Light Falloff Node */
 
-ccl_device_noinline void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
+ccl_device_noinline void svm_node_light_falloff(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint4 node)
 {
   uint strength_offset, out_offset, smooth_offset;
 
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 8784c760860..4c4f3bcf523 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -87,8 +87,11 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
   return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
 }
 
-ccl_device_noinline int svm_node_tex_magic(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_magic(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint depth;
   uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
index c8684981e31..f4f7d3ca76f 100644
--- a/intern/cycles/kernel/svm/svm_map_range.h
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -24,9 +24,9 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x)
   return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
 }
 
-ccl_device_noinline int svm_node_map_range(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline int svm_node_map_range(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint value_stack_offset,
                                            uint parameters_stack_offsets,
                                            uint results_stack_offsets,
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index fcc724405f5..8102afc637e 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Mapping Node */
 
-ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint type,
                                           uint inputs_stack_offsets,
                                           uint result_stack_offset)
@@ -43,9 +43,9 @@ ccl_device_noinline void svm_node_mapping(const KernelGlobals *kg,
 
 /* Texture Mapping */
 
-ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg,
-                                                 ShaderData *sd,
-                                                 float *stack,
+ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals *kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
                                                  uint vec_offset,
                                                  uint out_offset,
                                                  int offset)
@@ -62,9 +62,9 @@ ccl_device_noinline int svm_node_texture_mapping(const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_min_max(const KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         float *stack,
+ccl_device_noinline int svm_node_min_max(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
                                          uint vec_offset,
                                          uint out_offset,
                                          int offset)
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 99e7a8f2bda..3897a453873 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_math(const KernelGlobals *kg,
-                                       ShaderData *sd,
-                                       float *stack,
+ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
                                        uint type,
                                        uint inputs_stack_offsets,
                                        uint result_stack_offset)
@@ -34,9 +34,9 @@ ccl_device_noinline void svm_node_math(const KernelGlobals *kg,
   stack_store_float(stack, result_stack_offset, result);
 }
 
-ccl_device_noinline int svm_node_vector_math(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_vector_math(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint type,
                                              uint inputs_stack_offsets,
                                              uint outputs_stack_offsets,
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 11b1e8f57f8..d3225b55ef0 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -16,8 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void svm_vector_math(float *value,
-                                float3 *vector,
+ccl_device void svm_vector_math(ccl_private float *value,
+                                ccl_private float3 *vector,
                                 NodeVectorMathType type,
                                 float3 a,
                                 float3 b,
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 3e38080977f..0064c5e643c 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Node */
 
-ccl_device_noinline int svm_node_mix(const KernelGlobals *kg,
-                                     ShaderData *sd,
-                                     float *stack,
+ccl_device_noinline int svm_node_mix(ccl_global const KernelGlobals *kg,
+                                     ccl_private ShaderData *sd,
+                                     ccl_private float *stack,
                                      uint fac_offset,
                                      uint c1_offset,
                                      uint c2_offset,
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 03a8b68b3ef..8523f45b95f 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -700,9 +700,9 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
   return value;
 }
 
-ccl_device_noinline int svm_node_tex_musgrave(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline int svm_node_tex_musgrave(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint offsets1,
                                               uint offsets2,
                                               uint offsets3,
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 29b262ac06e..61da8227efa 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -55,8 +55,8 @@ ccl_device void noise_texture_1d(float co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float p = co;
   if (distortion != 0.0f) {
@@ -76,8 +76,8 @@ ccl_device void noise_texture_2d(float2 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float2 p = co;
   if (distortion != 0.0f) {
@@ -98,8 +98,8 @@ ccl_device void noise_texture_3d(float3 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float3 p = co;
   if (distortion != 0.0f) {
@@ -121,8 +121,8 @@ ccl_device void noise_texture_4d(float4 co,
                                  float roughness,
                                  float distortion,
                                  bool color_is_needed,
-                                 float *value,
-                                 float3 *color)
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
 {
   float4 p = co;
   if (distortion != 0.0f) {
@@ -140,9 +140,9 @@ ccl_device void noise_texture_4d(float4 co,
   }
 }
 
-ccl_device_noinline int svm_node_tex_noise(const KernelGlobals *kg,
-                                           ShaderData *sd,
-                                           float *stack,
+ccl_device_noinline int svm_node_tex_noise(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
                                            uint dimensions,
                                            uint offsets1,
                                            uint offsets2,
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index 724b5f281f9..0d1b4200d54 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_normal(const KernelGlobals *kg,
-                                        ShaderData *sd,
-                                        float *stack,
+ccl_device_noinline int svm_node_normal(ccl_global const KernelGlobals *kg,
+                                        ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
                                         uint in_normal_offset,
                                         uint out_normal_offset,
                                         uint out_dot_offset,
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 563e5bcb5e4..ef8b0d103c1 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,13 +21,13 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device_inline float fetch_float(const KernelGlobals *kg, int offset)
+ccl_device_inline float fetch_float(ccl_global const KernelGlobals *kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return __uint_as_float(node.x);
 }
 
-ccl_device_inline float float_ramp_lookup(const KernelGlobals *kg,
+ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg,
                                           int offset,
                                           float f,
                                           bool interpolate,
@@ -63,7 +63,7 @@ ccl_device_inline float float_ramp_lookup(const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
+ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg,
                                          int offset,
                                          float f,
                                          bool interpolate,
@@ -99,8 +99,11 @@ ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_noinline int svm_node_rgb_ramp(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint4 node,
+                                          int offset)
 {
   uint fac_offset, color_offset, alpha_offset;
   uint interpolate = node.z;
@@ -121,8 +124,11 @@ ccl_device_noinline int svm_node_rgb_ramp(
   return offset;
 }
 
-ccl_device_noinline int svm_node_curves(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg,
+                                        ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint4 node,
+                                        int offset)
 {
   uint fac_offset, color_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
@@ -147,8 +153,11 @@ ccl_device_noinline int svm_node_curves(
   return offset;
 }
 
-ccl_device_noinline int svm_node_curve(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_curve(ccl_global const KernelGlobals *kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
+                                       uint4 node,
+                                       int offset)
 {
   uint fac_offset, value_in_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 8d52845ea3d..3cd4ba87a55 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint hue_in,
                                              uint saturation_in,
                                              uint value_in,
@@ -39,9 +39,9 @@ ccl_device_noinline int svm_node_combine_hsv(const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_separate_hsv(const KernelGlobals *kg,
-                                              ShaderData *sd,
-                                              float *stack,
+ccl_device_noinline int svm_node_separate_hsv(ccl_global const KernelGlobals *kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
                                               uint color_in,
                                               uint hue_out,
                                               uint saturation_out,
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
index cbf77f1e640..11e440f2cbf 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
@@ -18,8 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector combine / separate, used for the RGB and XYZ nodes */
 
-ccl_device void svm_node_combine_vector(
-    ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_combine_vector(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_offset,
+                                        uint vector_index,
+                                        uint out_offset)
 {
   float vector = stack_load_float(stack, in_offset);
 
@@ -27,8 +30,11 @@ ccl_device void svm_node_combine_vector(
     stack_store_float(stack, out_offset + vector_index, vector);
 }
 
-ccl_device void svm_node_separate_vector(
-    ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset)
+ccl_device void svm_node_separate_vector(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint ivector_offset,
+                                         uint vector_index,
+                                         uint out_offset)
 {
   float3 vector = stack_load_float3(stack, ivector_offset);
 
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index b77c4311e72..04db8109170 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -28,7 +28,7 @@ ccl_device float sky_angle_between(float thetav, float phiv, float theta, float
  * "A Practical Analytic Model for Daylight"
  * A. J. Preetham, Peter Shirley, Brian Smits
  */
-ccl_device float sky_perez_function(float *lam, float theta, float gamma)
+ccl_device float sky_perez_function(ccl_private float *lam, float theta, float gamma)
 {
   float ctheta = cosf(theta);
   float cgamma = cosf(gamma);
@@ -37,16 +37,16 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma)
          (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
 }
 
-ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_preetham(ccl_global const KernelGlobals *kg,
                                         float3 dir,
                                         float sunphi,
                                         float suntheta,
                                         float radiance_x,
                                         float radiance_y,
                                         float radiance_z,
-                                        float *config_x,
-                                        float *config_y,
-                                        float *config_z)
+                                        ccl_private float *config_x,
+                                        ccl_private float *config_y,
+                                        ccl_private float *config_z)
 {
   /* convert vector to spherical coordinates */
   float2 spherical = direction_to_spherical(dir);
@@ -73,7 +73,7 @@ ccl_device float3 sky_radiance_preetham(const KernelGlobals *kg,
  * "An Analytic Model for Full Spectral Sky-Dome Radiance"
  * Lukas Hosek, Alexander Wilkie
  */
-ccl_device float sky_radiance_internal(float *configuration, float theta, float gamma)
+ccl_device float sky_radiance_internal(ccl_private float *configuration, float theta, float gamma)
 {
   float ctheta = cosf(theta);
   float cgamma = cosf(gamma);
@@ -90,16 +90,16 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
           configuration[6] * mieM + configuration[7] * zenith);
 }
 
-ccl_device float3 sky_radiance_hosek(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_hosek(ccl_global const KernelGlobals *kg,
                                      float3 dir,
                                      float sunphi,
                                      float suntheta,
                                      float radiance_x,
                                      float radiance_y,
                                      float radiance_z,
-                                     float *config_x,
-                                     float *config_y,
-                                     float *config_z)
+                                     ccl_private float *config_x,
+                                     ccl_private float *config_y,
+                                     ccl_private float *config_z)
 {
   /* convert vector to spherical coordinates */
   float2 spherical = direction_to_spherical(dir);
@@ -127,9 +127,9 @@ ccl_device float3 geographical_to_direction(float lat, float lon)
   return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
 }
 
-ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg,
+ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg,
                                        float3 dir,
-                                       float *nishita_data,
+                                       ccl_private float *nishita_data,
                                        uint texture_id)
 {
   /* definitions */
@@ -209,8 +209,11 @@ ccl_device float3 sky_radiance_nishita(const KernelGlobals *kg,
   return xyz_to_rgb(kg, xyz);
 }
 
-ccl_device_noinline int svm_node_tex_sky(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_sky(ccl_global const KernelGlobals *kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint4 node,
+                                         int offset)
 {
   /* Load data */
   uint dir_offset = node.y;
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 8869001015b..295d5e9f65b 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -22,8 +22,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Texture Coordinate Node */
 
-ccl_device_noinline int svm_node_tex_coord(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           int path_flag,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   float3 data;
   uint type = node.y;
@@ -99,8 +103,12 @@ ccl_device_noinline int svm_node_tex_coord(
   return offset;
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dx(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   int path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -180,8 +188,12 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(
 #endif
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dy(
-    const KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   int path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
   float3 data;
@@ -261,9 +273,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(
 #endif
 }
 
-ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint4 node)
 {
   uint color_offset, strength_offset, normal_offset, space;
@@ -354,9 +366,9 @@ ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
   stack_store_float3(stack, normal_offset, N);
 }
 
-ccl_device_noinline void svm_node_tangent(const KernelGlobals *kg,
-                                          ShaderData *sd,
-                                          float *stack,
+ccl_device_noinline void svm_node_tangent(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
                                           uint4 node)
 {
   uint tangent_offset, direction_type, axis;
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index d0478660094..d1038bc072d 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -18,14 +18,20 @@ CCL_NAMESPACE_BEGIN
 
 /* Value Nodes */
 
-ccl_device void svm_node_value_f(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset)
+ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private float *stack,
+                                 uint ivalue,
+                                 uint out_offset)
 {
   stack_store_float(stack, out_offset, __uint_as_float(ivalue));
 }
 
-ccl_device int svm_node_value_v(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int offset)
+ccl_device int svm_node_value_v(ccl_global const KernelGlobals *kg,
+                                ccl_private ShaderData *sd,
+                                ccl_private float *stack,
+                                uint out_offset,
+                                int offset)
 {
   /* read extra data */
   uint4 node1 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
index 55e1bce0158..c20f9b2556f 100644
--- a/intern/cycles/kernel/svm/svm_vector_rotate.h
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Rotate */
 
-ccl_device_noinline void svm_node_vector_rotate(ShaderData *sd,
-                                                float *stack,
+ccl_device_noinline void svm_node_vector_rotate(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
                                                 uint input_stack_offsets,
                                                 uint axis_stack_offsets,
                                                 uint result_stack_offset)
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 8aedb7e0f54..b6c898c3952 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,9 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Transform */
 
-ccl_device_noinline void svm_node_vector_transform(const KernelGlobals *kg,
-                                                   ShaderData *sd,
-                                                   float *stack,
+ccl_device_noinline void svm_node_vector_transform(ccl_global const KernelGlobals *kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
                                                    uint4 node)
 {
   uint itype, ifrom, ito;
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
index 986ea244f3a..3641f05ca43 100644
--- a/intern/cycles/kernel/svm/svm_vertex_color.h
+++ b/intern/cycles/kernel/svm/svm_vertex_color.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg,
-                                               ShaderData *sd,
-                                               float *stack,
+ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
                                                uint layer_id,
                                                uint color_offset,
                                                uint alpha_offset)
@@ -35,9 +35,9 @@ ccl_device_noinline void svm_node_vertex_color(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg,
-                                                       ShaderData *sd,
-                                                       float *stack,
+ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGlobals *kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
                                                        uint layer_id,
                                                        uint color_offset,
                                                        uint alpha_offset)
@@ -56,9 +56,9 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dy(const KernelGlobals *kg,
-                                                       ShaderData *sd,
-                                                       float *stack,
+ccl_device_noinline void svm_node_vertex_color_bump_dy(ccl_global const KernelGlobals *kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
                                                        uint layer_id,
                                                        uint color_offset,
                                                        uint alpha_offset)
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index b1d2eff7f37..e7112087e17 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -46,9 +46,9 @@ ccl_device void voronoi_f1_1d(float w,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float *outW)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -76,9 +76,9 @@ ccl_device void voronoi_smooth_f1_1d(float w,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float *outW)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -108,9 +108,9 @@ ccl_device void voronoi_f2_1d(float w,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float *outW)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -144,7 +144,9 @@ ccl_device void voronoi_f2_1d(float w,
   *outW = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_1d(float w, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_1d(float w,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -158,7 +160,7 @@ ccl_device void voronoi_distance_to_edge_1d(float w, float randomness, float *ou
   *outDistance = min(distanceToMidLeft, distanceToMidRight);
 }
 
-ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, ccl_private float *outRadius)
 {
   float cellPosition = floorf(w);
   float localPosition = w - cellPosition;
@@ -223,9 +225,9 @@ ccl_device void voronoi_f1_2d(float2 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float2 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -256,9 +258,9 @@ ccl_device void voronoi_smooth_f1_2d(float2 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float2 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -291,9 +293,9 @@ ccl_device void voronoi_f2_2d(float2 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float2 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -330,7 +332,9 @@ ccl_device void voronoi_f2_2d(float2 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_2d(float2 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_2d(float2 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -369,7 +373,9 @@ ccl_device void voronoi_distance_to_edge_2d(float2 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_2d(float2 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_2d(float2 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float2 cellPosition = floor(coord);
   float2 localPosition = coord - cellPosition;
@@ -441,9 +447,9 @@ ccl_device void voronoi_f1_3d(float3 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float3 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -477,9 +483,9 @@ ccl_device void voronoi_smooth_f1_3d(float3 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float3 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -515,9 +521,9 @@ ccl_device void voronoi_f2_3d(float3 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float3 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -557,7 +563,9 @@ ccl_device void voronoi_f2_3d(float3 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_3d(float3 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_3d(float3 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -600,7 +608,9 @@ ccl_device void voronoi_distance_to_edge_3d(float3 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_3d(float3 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_3d(float3 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float3 cellPosition = floor(coord);
   float3 localPosition = coord - cellPosition;
@@ -676,9 +686,9 @@ ccl_device void voronoi_f1_4d(float4 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float4 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -715,9 +725,9 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
                                      float exponent,
                                      float randomness,
                                      NodeVoronoiDistanceMetric metric,
-                                     float *outDistance,
-                                     float3 *outColor,
-                                     float4 *outPosition)
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -756,9 +766,9 @@ ccl_device void voronoi_f2_4d(float4 coord,
                               float exponent,
                               float randomness,
                               NodeVoronoiDistanceMetric metric,
-                              float *outDistance,
-                              float3 *outColor,
-                              float4 *outPosition)
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -801,7 +811,9 @@ ccl_device void voronoi_f2_4d(float4 coord,
   *outPosition = positionF2 + cellPosition;
 }
 
-ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, float *outDistance)
+ccl_device void voronoi_distance_to_edge_4d(float4 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -850,7 +862,9 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
   *outDistance = minDistance;
 }
 
-ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float *outRadius)
+ccl_device void voronoi_n_sphere_radius_4d(float4 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
 {
   float4 cellPosition = floor(coord);
   float4 localPosition = coord - cellPosition;
@@ -903,9 +917,9 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline int svm_node_tex_voronoi(const KernelGlobals *kg,
-                                             ShaderData *sd,
-                                             float *stack,
+ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
                                              uint dimensions,
                                              uint feature,
                                              uint metric,
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 78b75405356..764fb71ba72 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,8 +19,11 @@ CCL_NAMESPACE_BEGIN
 /* TODO(sergey): Think of making it more generic volume-type attribute
  * sampler.
  */
-ccl_device_noinline int svm_node_tex_voxel(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_voxel(ccl_global const KernelGlobals *kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
 {
   uint co_offset, density_out_offset, color_out_offset, space;
   svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 00f980c16df..1ac130e2006 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -82,8 +82,11 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
   }
 }
 
-ccl_device_noinline int svm_node_tex_wave(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
+ccl_device_noinline int svm_node_tex_wave(ccl_global const KernelGlobals *kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint4 node,
+                                          int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index aa291fd2741..e891744f276 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
 
-ccl_device_noinline void svm_node_wavelength(
-    const KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
+ccl_device_noinline void svm_node_wavelength(ccl_global const KernelGlobals *kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint wavelength,
+                                             uint color_out)
 {
   // CIE colour matching functions xBar, yBar, and zBar for
   //   wavelengths from 380 through 780 nanometers, every 5
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
index 0306d2e7b9c..ccc49bf1a7c 100644
--- a/intern/cycles/kernel/svm/svm_white_noise.h
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -16,9 +16,9 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_tex_white_noise(const KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  float *stack,
+ccl_device_noinline void svm_node_tex_white_noise(ccl_global const KernelGlobals *kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
                                                   uint dimensions,
                                                   uint inputs_stack_offsets,
                                                   uint ouptuts_stack_offsets)
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 7ec913789d2..70d1211aa4a 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,8 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device_inline float wireframe(
-    const KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P)
+ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg,
+                                  ccl_private ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  ccl_private float3 *P)
 {
 #ifdef __HAIR__
   if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
@@ -88,9 +91,9 @@ ccl_device_inline float wireframe(
   return 0.0f;
 }
 
-ccl_device_noinline void svm_node_wireframe(const KernelGlobals *kg,
-                                            ShaderData *sd,
-                                            float *stack,
+ccl_device_noinline void svm_node_wireframe(ccl_global const KernelGlobals *kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
                                             uint4 node)
 {
   uint in_size = node.y;
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 7b67b90e44d..361c36d9061 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -277,7 +277,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
 #endif
 }
 
-ccl_device float3 color_highlight_compress(float3 color, float3 *variance)
+ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance)
 {
   color += one_float3();
   if (variance) {
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index f36a492a1b0..81723abe1e2 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -61,7 +61,7 @@ struct half4 {
 
 #if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
 
-ccl_device_inline void float4_store_half(half *h, float4 f)
+ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
 {
   h[0] = __float2half(f.x);
   h[1] = __float2half(f.y);
@@ -71,7 +71,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f)
 
 #else
 
-ccl_device_inline void float4_store_half(half *h, float4 f)
+ccl_device_inline void float4_store_half(ccl_private half *h, float4 f)
 {
 
 #  ifndef __KERNEL_SSE2__
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index cb1e94c838c..f834011a032 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -338,7 +338,7 @@ ccl_device_inline int quick_floor_to_int(float x)
   return float_to_int(x) - ((x < 0) ? 1 : 0);
 }
 
-ccl_device_inline float floorfrac(float x, int *i)
+ccl_device_inline float floorfrac(float x, ccl_private int *i)
 {
   *i = quick_floor_to_int(x);
   return x - *i;
@@ -465,14 +465,18 @@ template<class A, class B> A lerp(const A &a, const A &b, const B &t)
 
 /* Triangle */
 
-ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
+ccl_device_inline float triangle_area(ccl_private const float3 &v1,
+                                      ccl_private const float3 &v2,
+                                      ccl_private const float3 &v3)
 {
   return len(cross(v3 - v2, v1 - v2)) * 0.5f;
 }
 
 /* Orthonormal vectors */
 
-ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
+ccl_device_inline void make_orthonormals(const float3 N,
+                                         ccl_private float3 *a,
+                                         ccl_private float3 *b)
 {
 #if 0
   if (fabsf(N.y) >= 0.999f) {
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index 38afa163db5..cc924f36a71 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -156,7 +156,7 @@ ccl_device float fast_cosf(float x)
   return u;
 }
 
-ccl_device void fast_sincosf(float x, float *sine, float *cosine)
+ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
 {
   /* Same argument reduction as fast_sin. */
   int q = fast_rint(x * M_1_PI_F);
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 70b80c33544..25eda840214 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -207,7 +207,7 @@ ccl_device_inline float2 normalize(const float2 &a)
   return a / len(a);
 }
 
-ccl_device_inline float2 normalize_len(const float2 &a, float *t)
+ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t)
 {
   *t = len(a);
   return a / (*t);
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 30a1b4c3f77..c3230a8068c 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -411,7 +411,7 @@ ccl_device_inline float3 saturate3(float3 a)
   return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
 }
 
-ccl_device_inline float3 normalize_len(const float3 a, float *t)
+ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
 {
   *t = len(a);
   float x = 1.0f / *t;
@@ -424,7 +424,7 @@ ccl_device_inline float3 safe_normalize(const float3 a)
   return (t != 0.0f) ? a * (1.0f / t) : a;
 }
 
-ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
+ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
 {
   *t = len(a);
   return (*t != 0.0f) ? a / (*t) : a;
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index 19af5c8c638..f30a78cfc69 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -497,7 +497,7 @@ ccl_device_inline float4 reduce_max(const float4 &a)
 #  endif
 }
 
-ccl_device_inline float4 load_float4(const float *v)
+ccl_device_inline float4 load_float4(ccl_private const float *v)
 {
 #  ifdef __KERNEL_SSE__
   return float4(_mm_loadu_ps(v));
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index fd0c9124345..0c431a36afb 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -26,8 +26,8 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
                                      float ray_t,
                                      float3 sphere_P,
                                      float sphere_radius,
-                                     float3 *isect_P,
-                                     float *isect_t)
+                                     ccl_private float3 *isect_P,
+                                     ccl_private float *isect_t)
 {
   const float3 d = sphere_P - ray_P;
   const float radiussq = sphere_radius * sphere_radius;
@@ -60,8 +60,8 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
                                            float ray_t,
                                            float3 disk_P,
                                            float disk_radius,
-                                           float3 *isect_P,
-                                           float *isect_t)
+                                           ccl_private float3 *isect_P,
+                                           ccl_private float *isect_t)
 {
   /* Aligned disk normal. */
   float disk_t;
@@ -95,9 +95,9 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
                                                    const float3 tri_b,
                                                    const float3 tri_c,
 #endif
-                                                   float *isect_u,
-                                                   float *isect_v,
-                                                   float *isect_t)
+                                                   ccl_private float *isect_u,
+                                                   ccl_private float *isect_v,
+                                                   ccl_private float *isect_t)
 {
 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
   typedef ssef float3;
@@ -207,10 +207,10 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
                                    float3 quad_u,
                                    float3 quad_v,
                                    float3 quad_n,
-                                   float3 *isect_P,
-                                   float *isect_t,
-                                   float *isect_u,
-                                   float *isect_v,
+                                   ccl_private float3 *isect_P,
+                                   ccl_private float *isect_t,
+                                   ccl_private float *isect_u,
+                                   ccl_private float *isect_v,
                                    bool ellipse)
 {
   /* Perform intersection test. */
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
index 123736f75a6..bff7ddb4cee 100644
--- a/intern/cycles/util/util_math_matrix.h
+++ b/intern/cycles/util/util_math_matrix.h
@@ -35,14 +35,14 @@ CCL_NAMESPACE_BEGIN
 
 /* Zeroing helpers. */
 
-ccl_device_inline void math_vector_zero(float *v, int n)
+ccl_device_inline void math_vector_zero(ccl_private float *v, int n)
 {
   for (int i = 0; i < n; i++) {
     v[i] = 0.0f;
   }
 }
 
-ccl_device_inline void math_matrix_zero(float *A, int n)
+ccl_device_inline void math_matrix_zero(ccl_private float *A, int n)
 {
   for (int row = 0; row < n; row++) {
     for (int col = 0; col <= row; col++) {
@@ -53,14 +53,18 @@ ccl_device_inline void math_matrix_zero(float *A, int n)
 
 /* Elementary vector operations. */
 
-ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_add(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] += b[i];
   }
 }
 
-ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_mul(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] *= b[i];
@@ -68,7 +72,7 @@ ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, in
 }
 
 ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
-                                               const float *ccl_restrict b,
+                                               ccl_private const float *ccl_restrict b,
                                                int astride,
                                                int n)
 {
@@ -77,21 +81,23 @@ ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
   }
 }
 
-ccl_device_inline void math_vector_scale(float *a, float b, int n)
+ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] *= b;
   }
 }
 
-ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
+ccl_device_inline void math_vector_max(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
 {
   for (int i = 0; i < n; i++) {
     a[i] = max(a[i], b[i]);
   }
 }
 
-ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w)
+ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w)
 {
   for (int i = 0; i < n; i++) {
     v[i] += w * x[i];
@@ -99,7 +105,7 @@ ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w)
 }
 
 ccl_device_inline void math_vec3_add_strided(
-    ccl_global float3 *v, int n, float *x, float3 w, int stride)
+    ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride)
 {
   for (int i = 0; i < n; i++) {
     ccl_global float *elem = (ccl_global float *)(v + i * stride);
@@ -125,9 +131,9 @@ ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
 
 /* Add Gramian matrix of v to A.
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian(float *A,
+ccl_device_inline void math_matrix_add_gramian(ccl_private float *A,
                                                int n,
-                                               const float *ccl_restrict v,
+                                               ccl_private const float *ccl_restrict v,
                                                float weight)
 {
   for (int row = 0; row < n; row++) {
@@ -140,7 +146,7 @@ ccl_device_inline void math_matrix_add_gramian(float *A,
 /* Add Gramian matrix of v to A.
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
 ccl_device_inline void math_trimatrix_add_gramian_strided(
-    ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride)
+    ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride)
 {
   for (int row = 0; row < n; row++) {
     for (int col = 0; col <= row; col++) {
@@ -151,7 +157,7 @@ ccl_device_inline void math_trimatrix_add_gramian_strided(
 
 ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A,
                                                   int n,
-                                                  const float *ccl_restrict v,
+                                                  ccl_private const float *ccl_restrict v,
                                                   float weight)
 {
   for (int row = 0; row < n; row++) {
@@ -244,7 +250,7 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
  * and V will contain the eigenvectors of the original A in its rows (!),
  * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A.
  */
-ccl_device void math_matrix_jacobi_eigendecomposition(float *A,
+ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A,
                                                       ccl_global float *V,
                                                       int n,
                                                       int v_stride)
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
index 9c7e0061c82..04b4574d75b 100644
--- a/intern/cycles/util/util_projection.h
+++ b/intern/cycles/util/util_projection.h
@@ -45,7 +45,8 @@ typedef struct PerspectiveMotionTransform {
 
 /* Functions */
 
-ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, const float3 a)
+ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t,
+                                               const float3 a)
 {
   float4 b = make_float4(a.x, a.y, a.z, 1.0f);
   float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
@@ -54,7 +55,7 @@ ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, con
   return (w != 0.0f) ? c / w : zero_float3();
 }
 
-ccl_device_inline float3 transform_perspective_direction(const ProjectionTransform *t,
+ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t,
                                                          const float3 a)
 {
   float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h
index 36f02a01f7b..32df9327cbd 100644
--- a/intern/cycles/util/util_rect.h
+++ b/intern/cycles/util/util_rect.h
@@ -54,7 +54,10 @@ ccl_device_inline int coord_to_local_index(int4 rect, int x, int y)
 
 /* Finds the coordinates of a pixel given by its row-major index in the rect,
  * and returns whether the pixel is inside it. */
-ccl_device_inline bool local_index_to_coord(int4 rect, int idx, int *x, int *y)
+ccl_device_inline bool local_index_to_coord(int4 rect,
+                                            int idx,
+                                            ccl_private int *x,
+                                            ccl_private int *y)
 {
   int w = rect.z - rect.x;
   *x = (idx % w) + rect.x;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index e9cd3b0b483..fc04f9aab46 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -53,7 +53,7 @@ typedef struct DecomposedTransform {
 
 /* Functions */
 
-ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a)
 {
   /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
@@ -82,7 +82,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 #endif
 }
 
-ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a)
 {
 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
   ssef x, y, z, w, aa;
@@ -108,7 +108,8 @@ ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
 #endif
 }
 
-ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
+ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t,
+                                                        const float3 a)
 {
   float3 x = make_float3(t->x.x, t->y.x, t->z.x);
   float3 y = make_float3(t->x.y, t->y.y, t->z.y);
@@ -409,7 +410,8 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
   return R;
 }
 
-ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
+ccl_device_inline void transform_compose(ccl_private Transform *tfm,
+                                         ccl_private const DecomposedTransform *decomp)
 {
   /* rotation */
   float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
@@ -449,7 +451,7 @@ ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransfo
 
 /* Interpolate from array of decomposed transforms. */
 ccl_device void transform_motion_array_interpolate(Transform *tfm,
-                                                   const ccl_global DecomposedTransform *motion,
+                                                   const DecomposedTransform *motion,
                                                    uint numsteps,
                                                    float time)
 {
@@ -458,8 +460,8 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
   int step = min((int)(time * maxstep), maxstep - 1);
   float t = time * maxstep - step;
 
-  const ccl_global DecomposedTransform *a = motion + step;
-  const ccl_global DecomposedTransform *b = motion + step + 1;
+  const DecomposedTransform *a = motion + step;
+  const DecomposedTransform *b = motion + step + 1;
 
   /* Interpolate rotation, translation and scale. */
   DecomposedTransform decomp;
@@ -472,12 +474,12 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
   transform_compose(tfm, &decomp);
 }
 
-ccl_device_inline bool transform_isfinite_safe(Transform *tfm)
+ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm)
 {
   return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
 }
 
-ccl_device_inline bool transform_decomposed_isfinite_safe(DecomposedTransform *decomp)
+ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp)
 {
   return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
          isfinite4_safe(decomp->w);