Cycles: Kernel address space changes for MSL

This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
author: Michael Jones <michael_p_jones@apple.com> 2021-10-14 15:53:40 +0300
committer: Michael Jones <michael_p_jones@apple.com> 2021-10-14 18:14:43 +0300
commit: a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree: 0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles/kernel/integrator
parent: 47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)
16 files changed, 137 insertions, 122 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index 6e4e1be55fa..c822823de9c 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -44,7 +44,7 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
-                                          const ccl_global KernelWorkTile *ccl_restrict tile,
+                                          ccl_global const KernelWorkTile *ccl_restrict tile,
                                           ccl_global float *render_buffer,
                                           const int x,
                                           const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
index 58e7bde4c94..291f0f106f0 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -25,12 +25,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
                                                const int sample,
                                                const int x,
                                                const int y,
                                                const uint rng_hash,
-                                               Ray *ray)
+                                               ccl_private Ray *ray)
 {
   /* Filter sampling. */
   float filter_u, filter_v;
@@ -64,7 +64,7 @@ ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
 ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
-                                            const ccl_global KernelWorkTile *ccl_restrict tile,
+                                            ccl_global const KernelWorkTile *ccl_restrict tile,
                                             ccl_global float *render_buffer,
                                             const int x,
                                             const int y,
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index cd9af1c62fc..760c08159e3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -86,7 +86,7 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_shader_next_kernel(
     INTEGRATOR_STATE_ARGS,
-    const Intersection *ccl_restrict isect,
+    ccl_private const Intersection *ccl_restrict isect,
     const int shader,
     const int shader_flags)
 {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index 5bd9cfda4a4..00d44f0e5ed 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -32,7 +32,7 @@ ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STA
 }
 
 ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
-                                                  const Ray *ray,
+                                                  ccl_private const Ray *ray,
                                                   const uint visibility)
 {
   /* Mask which will pick only opaque visibility bits from the `visibility`.
@@ -62,7 +62,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STAT
 
 #ifdef __TRANSPARENT_SHADOWS__
 ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ray,
+                                                       ccl_private const Ray *ray,
                                                        const uint visibility)
 {
   Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE];
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
index 33a77d0fe29..192e9c6ab43 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -30,7 +30,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   kernel_assert(kernel_data.integrator.use_volumes);
 
@@ -78,7 +78,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
   ShaderDataTinyStorage stack_sd_storage;
-  ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   Ray volume_ray ccl_optional_struct_init;
   integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
index 234aa7cae63..a898f3fb2fc 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -49,7 +49,7 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
     /* TODO: does aliasing like this break automatic SoA in CUDA?
      * Should we instead store closures separate from ShaderData? */
     ShaderDataTinyStorage emission_sd_storage;
-    ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+    ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
 
     PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
     shader_setup_from_background(kg,
@@ -155,7 +155,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       /* Evaluate light shader. */
       /* TODO: does aliasing like this break automatic SoA in CUDA? */
       ShaderDataTinyStorage emission_sd_storage;
-      ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+      ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
       float3 light_eval = light_sample_shader_eval(
           INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
       if (is_zero(light_eval)) {
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
index 05b530f9665..d8f8da63023 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -72,7 +72,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   /* Evaluate light shader. */
   /* TODO: does aliasing like this break automatic SoA in CUDA? */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
   if (is_zero(light_eval)) {
     return;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index fd3c3ae1653..3857b522b25 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -39,7 +39,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
    * TODO: is it better to declare this outside the loop or keep it local
    * so the compiler can see there is no dependency between iterations? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data at surface. */
   Intersection isect ccl_optional_struct_init;
@@ -69,13 +69,14 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
                                                            const int hit,
                                                            const int num_recorded_hits,
-                                                           float3 *ccl_restrict throughput)
+                                                           ccl_private float3 *ccl_restrict
+                                                               throughput)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
 
   /* TODO: deduplicate with surface, or does it not matter for memory usage? */
   ShaderDataTinyStorage shadow_sd_storage;
-  ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
 
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 27338f824c0..0d739517592 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -29,7 +29,7 @@
 CCL_NAMESPACE_BEGIN
 
 ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
-                                                           ShaderData *sd)
+                                                           ccl_private ShaderData *sd)
 {
   Intersection isect ccl_optional_struct_init;
   integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
@@ -42,7 +42,7 @@ ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONS
 
 #ifdef __HOLDOUT__
 ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
-                                                      ShaderData *sd,
+                                                      ccl_private ShaderData *sd,
                                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Write holdout transparency to render buffer and stop if fully holdout. */
@@ -67,7 +67,7 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG
 
 #ifdef __EMISSION__
 ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
-                                                       const ShaderData *sd,
+                                                       ccl_private const ShaderData *sd,
                                                        ccl_global float *ccl_restrict
                                                            render_buffer)
 {
@@ -103,8 +103,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state)
+                                                           ccl_private ShaderData *sd,
+                                                           ccl_private const RNGState *rng_state)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
@@ -134,7 +134,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
@@ -206,9 +206,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
 #endif
 
 /* Path tracing: bounce off or through surface with new direction. */
-ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE_ARGS,
-                                                                ShaderData *sd,
-                                                                const RNGState *rng_state)
+ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
+    INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state)
 {
   /* Sample BSDF or BSSRDF. */
   if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
@@ -217,7 +216,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
   float bsdf_u, bsdf_v;
   path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-  const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
 
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -281,7 +280,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE
 
 #ifdef __VOLUME__
 ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
-                                                                 ShaderData *sd)
+                                                                 ccl_private ShaderData *sd)
 {
   if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
     return LABEL_NONE;
@@ -304,19 +303,21 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT
 #endif
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
-ccl_device_forceinline void integrate_surface_ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+ccl_device_forceinline void integrate_surface_ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  ifdef __KERNEL_OPTIX__
   optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
 }
 
-extern "C" __device__ void __direct_callable__ao_pass(INTEGRATOR_STATE_CONST_ARGS,
-                                                      const ShaderData *ccl_restrict sd,
-                                                      const RNGState *ccl_restrict rng_state,
-                                                      ccl_global float *ccl_restrict render_buffer)
+extern "C" __device__ void __direct_callable__ao_pass(
+    INTEGRATOR_STATE_CONST_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_global float *ccl_restrict render_buffer)
 {
 #  endif /* __KERNEL_OPTIX__ */
   float bsdf_u, bsdf_v;
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index aa4c652c037..72c609751f7 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -71,8 +71,8 @@ typedef struct VolumeShaderCoefficients {
 
 /* Evaluate shader to get extinction coefficient at P. */
 ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                                   ShaderData *ccl_restrict sd,
-                                                   float3 *ccl_restrict extinction)
+                                                   ccl_private ShaderData *ccl_restrict sd,
+                                                   ccl_private float3 *ccl_restrict extinction)
 {
   shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
     return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
@@ -89,8 +89,8 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
 ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
-                                            ShaderData *ccl_restrict sd,
-                                            VolumeShaderCoefficients *coeff)
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private VolumeShaderCoefficients *coeff)
 {
   const int path_flag = INTEGRATOR_STATE(path, flag);
   shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
@@ -107,7 +107,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
 
   if (sd->flag & SD_SCATTER) {
     for (int i = 0; i < sd->num_closure; i++) {
-      const ShaderClosure *sc = &sd->closure[i];
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
 
       if (CLOSURE_IS_VOLUME(sc->type)) {
         coeff->sigma_s += sc->weight;
@@ -123,14 +123,14 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
   return true;
 }
 
-ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
-                                             const RNGState *rng_state,
+ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
+                                             ccl_private const RNGState *rng_state,
                                              const float object_step_size,
                                              float t,
-                                             float *step_size,
-                                             float *step_shade_offset,
-                                             float *steps_offset,
-                                             int *max_steps)
+                                             ccl_private float *step_size,
+                                             ccl_private float *step_shade_offset,
+                                             ccl_private float *steps_offset,
+                                             ccl_private int *max_steps)
 {
   if (object_step_size == FLT_MAX) {
     /* Homogeneous volume. */
@@ -170,9 +170,9 @@ ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
 ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
-                                          Ray *ccl_restrict ray,
-                                          ShaderData *ccl_restrict sd,
-                                          float3 *ccl_restrict throughput)
+                                          ccl_private Ray *ccl_restrict ray,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          ccl_global float3 *ccl_restrict throughput)
 {
   float3 sigma_t = zero_float3();
 
@@ -185,9 +185,9 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
 ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
-                                            Ray *ccl_restrict ray,
-                                            ShaderData *ccl_restrict sd,
-                                            float3 *ccl_restrict throughput,
+                                            ccl_private Ray *ccl_restrict ray,
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private float3 *ccl_restrict throughput,
                                             const float object_step_size)
 {
   /* Load random number state. */
@@ -257,10 +257,10 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 /* Equi-angular sampling as in:
  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
 
-ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
                                            const float3 light_P,
                                            const float xi,
-                                           float *pdf)
+                                           ccl_private float *pdf)
 {
   const float t = ray->t;
   const float delta = dot((light_P - ray->P), ray->D);
@@ -281,7 +281,7 @@ ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
   return min(t, delta + t_); /* min is only for float precision errors */
 }
 
-ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -305,7 +305,7 @@ ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
   return pdf;
 }
 
-ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
+ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
                                         const float3 light_P,
                                         const float sample_t)
 {
@@ -332,8 +332,12 @@ ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
 
 /* Distance sampling */
 
-ccl_device float volume_distance_sample(
-    float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+ccl_device float volume_distance_sample(float max_t,
+                                        float3 sigma_t,
+                                        int channel,
+                                        float xi,
+                                        ccl_private float3 *transmittance,
+                                        ccl_private float3 *pdf)
 {
   /* xi is [0, 1[ so log(0) should never happen, division by zero is
    * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
@@ -363,7 +367,7 @@ ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_
 
 /* Emission */
 
-ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff,
+ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
                                             int closure_flag,
                                             float3 transmittance,
                                             float t)
@@ -410,13 +414,13 @@ typedef struct VolumeIntegrateState {
 } VolumeIntegrateState;
 
 ccl_device_forceinline void volume_integrate_step_scattering(
-    const ShaderData *sd,
-    const Ray *ray,
+    ccl_private const ShaderData *sd,
+    ccl_private const Ray *ray,
     const float3 equiangular_light_P,
-    const VolumeShaderCoefficients &ccl_restrict coeff,
+    ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
     const float3 transmittance,
-    VolumeIntegrateState &ccl_restrict vstate,
-    VolumeIntegrateResult &ccl_restrict result)
+    ccl_private VolumeIntegrateState &ccl_restrict vstate,
+    ccl_private VolumeIntegrateResult &ccl_restrict result)
 {
   /* Pick random color channel, we use the Veach one-sample
    * model with balance heuristic for the channels. */
@@ -507,14 +511,14 @@ ccl_device_forceinline void volume_integrate_step_scattering(
  * for path tracing where we don't want to branch. */
 ccl_device_forceinline void volume_integrate_heterogeneous(
     INTEGRATOR_STATE_ARGS,
-    Ray *ccl_restrict ray,
-    ShaderData *ccl_restrict sd,
-    const RNGState *rng_state,
+    ccl_private Ray *ccl_restrict ray,
+    ccl_private ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *rng_state,
     ccl_global float *ccl_restrict render_buffer,
     const float object_step_size,
     const VolumeSampleMethod direct_sample_method,
     const float3 equiangular_light_P,
-    VolumeIntegrateResult &result)
+    ccl_private VolumeIntegrateResult &result)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
 
@@ -666,10 +670,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 #  ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline bool integrate_volume_sample_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_private LightSample *ccl_restrict ls)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -694,14 +699,14 @@ ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
 
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
-                                                          const ShaderData *ccl_restrict sd,
-                                                          const RNGState *ccl_restrict rng_state,
-                                                          const float3 P,
-                                                          const ShaderVolumePhases *ccl_restrict
-                                                              phases,
-                                                          const float3 throughput,
-                                                          LightSample *ccl_restrict ls)
+ccl_device_forceinline void integrate_volume_direct_light(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    const float3 P,
+    ccl_private const ShaderVolumePhases *ccl_restrict phases,
+    ccl_private const float3 throughput,
+    ccl_private LightSample *ccl_restrict ls)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
@@ -737,7 +742,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
    * the light shader. This could also move to its own kernel, for
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
-  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
   const float3 light_eval = light_sample_shader_eval(
       INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
   if (is_zero(light_eval)) {
@@ -801,10 +806,11 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
 #  endif
 
 /* Path tracing: scatter in new direction using phase function */
-ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
-                                                           ShaderData *sd,
-                                                           const RNGState *rng_state,
-                                                           const ShaderVolumePhases *phases)
+ccl_device_forceinline bool integrate_volume_phase_scatter(
+    INTEGRATOR_STATE_ARGS,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state,
+    ccl_private const ShaderVolumePhases *phases)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
 
@@ -865,7 +871,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
 ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
-                                                 Ray *ccl_restrict ray,
+                                                 ccl_private Ray *ccl_restrict ray,
                                                  ccl_global float *ccl_restrict render_buffer)
 {
   ShaderData sd;
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index efc7576d95b..517e2891769 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -106,7 +106,7 @@ typedef struct IntegratorQueueCounter {
  * GPU rendering path state with SoA layout. */
 typedef struct IntegratorStateGPU {
 #define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type *name;
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
 #define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
 #define KERNEL_STRUCT_END(name) \
   } \
@@ -124,13 +124,13 @@ typedef struct IntegratorStateGPU {
 #undef KERNEL_STRUCT_VOLUME_STACK_SIZE
 
   /* Count number of queued kernels. */
-  IntegratorQueueCounter *queue_counter;
+  ccl_global IntegratorQueueCounter *queue_counter;
 
   /* Count number of kernels queued for specific shaders. */
-  int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
+  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
 
   /* Index of path which will be used by a next shadow catcher split.  */
-  int *next_shadow_catcher_path_index;
+  ccl_global int *next_shadow_catcher_path_index;
 } IntegratorStateGPU;
 
 /* Abstraction
@@ -173,9 +173,10 @@ typedef IntegratorStateCPU *ccl_restrict IntegratorState;
 
 typedef int IntegratorState;
 
-#  define INTEGRATOR_STATE_ARGS const KernelGlobals *ccl_restrict kg, const IntegratorState state
+#  define INTEGRATOR_STATE_ARGS \
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_CONST_ARGS \
-    const KernelGlobals *ccl_restrict kg, const IntegratorState state
+    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
 #  define INTEGRATOR_STATE_PASS kg, state
 
 #  define INTEGRATOR_STATE_PASS_NULL kg, -1
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index 037c7533943..fddd9eb5ac8 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
 /* Ray */
 
 ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
-                                                       const Ray *ccl_restrict ray)
+                                                       ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
@@ -35,7 +35,7 @@ ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
 }
 
 ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                      Ray *ccl_restrict ray)
+                                                      ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(ray, P);
   ray->D = INTEGRATOR_STATE(ray, D);
@@ -47,8 +47,8 @@ ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARG
 
 /* Shadow Ray */
 
-ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_ARGS,
-                                                              const Ray *ccl_restrict ray)
+ccl_device_forceinline void integrator_state_write_shadow_ray(
+    INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray)
 {
   INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
   INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
@@ -58,7 +58,7 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_A
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
-                                                             Ray *ccl_restrict ray)
+                                                             ccl_private Ray *ccl_restrict ray)
 {
   ray->P = INTEGRATOR_STATE(shadow_ray, P);
   ray->D = INTEGRATOR_STATE(shadow_ray, D);
@@ -70,8 +70,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CO
 
 /* Intersection */
 
-ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
-                                                         const Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_write_isect(
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect)
 {
   INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
   INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
@@ -84,8 +84,8 @@ ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                        Intersection *ccl_restrict isect)
+ccl_device_forceinline void integrator_state_read_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect)
 {
   isect->prim = INTEGRATOR_STATE(isect, prim);
   isect->object = INTEGRATOR_STATE(isect, object);
@@ -124,7 +124,7 @@ ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_ST
 /* Shadow Intersection */
 
 ccl_device_forceinline void integrator_state_write_shadow_isect(
-    INTEGRATOR_STATE_ARGS, const Intersection *ccl_restrict isect, const int index)
+    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index)
 {
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
   INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
@@ -137,9 +137,8 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
 #endif
 }
 
-ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_CONST_ARGS,
-                                                               Intersection *ccl_restrict isect,
-                                                               const int index)
+ccl_device_forceinline void integrator_state_read_shadow_isect(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index)
 {
   isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
   isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 2d15c82322a..153f9b79743 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -36,14 +36,16 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SUBSURFACE__
 
-ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const ShaderClosure *sc)
+ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private const ShaderClosure *sc)
 {
   /* We should never have two consecutive BSSRDF bounces, the second one should
    * be converted to a diffuse BSDF to avoid this. */
   kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
 
   /* Setup path state for intersect_subsurface kernel. */
-  const Bssrdf *bssrdf = (const Bssrdf *)sc;
+  ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
 
   /* Setup ray into surface. */
   INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
@@ -89,7 +91,7 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const Sh
 }
 
 ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
-                                             ShaderData *sd,
+                                             ccl_private ShaderData *sd,
                                              const uint32_t path_flag)
 {
   /* Get bump mapped normal from shader evaluation at exit point. */
@@ -107,7 +109,7 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
 
 #  ifdef __PRINCIPLED__
   if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
-    PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
         sd, sizeof(PrincipledDiffuseBsdf), weight);
 
     if (bsdf) {
@@ -119,7 +121,8 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
   else
 #  endif /* __PRINCIPLED__ */
   {
-    DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+    ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(DiffuseBsdf), weight);
 
     if (bsdf) {
       bsdf->N = N;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 3f685e3a2e9..788a5e9b929 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -33,8 +33,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r,
  * nearby points on the same object. */
 ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
                                        RNGState rng_state,
-                                       Ray &ray,
-                                       LocalIntersection &ss_isect)
+                                       ccl_private Ray &ray,
+                                       ccl_private LocalIntersection &ss_isect)
 
 {
   float disk_u, disk_v;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index d4935b0ce4a..45a43ea67a9 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -31,8 +31,11 @@ CCL_NAMESPACE_BEGIN
  * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
  * https://graphics.pixar.com/library/PathTracedSubsurface/ */
 
-ccl_device void subsurface_random_walk_remap(
-    const float albedo, const float d, float g, float *sigma_t, float *alpha)
+ccl_device void subsurface_random_walk_remap(const float albedo,
+                                             const float d,
+                                             float g,
+                                             ccl_private float *sigma_t,
+                                             ccl_private float *alpha)
 {
   /* Compute attenuation and scattering coefficients from albedo. */
   const float g2 = g * g;
@@ -78,9 +81,9 @@ ccl_device void subsurface_random_walk_remap(
 ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
                                                     const float3 radius,
                                                     const float anisotropy,
-                                                    float3 *sigma_t,
-                                                    float3 *alpha,
-                                                    float3 *throughput)
+                                                    ccl_private float3 *sigma_t,
+                                                    ccl_private float3 *alpha,
+                                                    ccl_private float3 *throughput)
 {
   float sigma_t_x, sigma_t_y, sigma_t_z;
   float alpha_x, alpha_y, alpha_z;
@@ -164,7 +167,7 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f
 ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
                                                          float t,
                                                          bool hit,
-                                                         float3 *transmittance)
+                                                         ccl_private float3 *transmittance)
 {
   float3 T = volume_color_transmittance(sigma_t, t);
   if (transmittance) {
@@ -179,8 +182,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
 
 ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
                                               RNGState rng_state,
-                                              Ray &ray,
-                                              LocalIntersection &ss_isect)
+                                              ccl_private Ray &ray,
+                                              ccl_private LocalIntersection &ss_isect)
 {
   float bssrdf_u, bssrdf_v;
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
index 01ebf8376b1..0c4a723de6f 100644
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
 
 template<typename StackReadOp, typename StackWriteOp>
 ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
-                                        const ShaderData *sd,
+                                        ccl_private const ShaderData *sd,
                                         StackReadOp stack_read,
                                         StackWriteOp stack_write)
 {
@@ -84,7 +84,7 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
@@ -95,7 +95,8 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData
       });
 }
 
-ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+                                               ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
       INTEGRATOR_STATE_PASS,
author	Michael Jones <michael_p_jones@apple.com>	2021-10-14 15:53:40 +0300
committer	Michael Jones <michael_p_jones@apple.com>	2021-10-14 18:14:43 +0300
commit	a0f269f682dab848afc80cd322d04a0c4a815cae (patch)
tree	0978b1888273fbaa2d14550bde484c5247fa89ff /intern/cycles/kernel/integrator
parent	47caeb8c26686e24ea7e694f94fabee44f3d2dca (diff)