1 files changed, 135 insertions, 336 deletions
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index ad022716207..bd3791594e0 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -19,10 +19,6 @@
 
 #include "kernel/svm/types.h"
 
-#ifndef __KERNEL_GPU__
-#  define __KERNEL_CPU__
-#endif
-
 CCL_NAMESPACE_BEGIN
 
 /* Constants */
@@ -51,57 +47,40 @@ CCL_NAMESPACE_BEGIN
 #define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
 #define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
 
-#ifdef __KERNEL_CPU__
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
+#ifdef __KERNEL_GPU__
 #  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#else
+#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
 #endif
 
 /* Kernel features */
-#define __SOBOL__
-#define __DPDU__
-#define __BACKGROUND__
+#define __AO__
 #define __CAUSTICS_TRICKS__
-#define __VISIBILITY_FLAG__
-#define __RAY_DIFFERENTIALS__
-#define __CAMERA_CLIPPING__
-#define __INTERSECTION_REFINE__
 #define __CLAMP_SAMPLE__
-#define __PATCH_EVAL__
-#define __SHADOW_CATCHER__
 #define __DENOISING_FEATURES__
-#define __SHADER_RAYTRACE__
-#define __AO__
-#define __PASSES__
+#define __DPDU__
 #define __HAIR__
+#define __OBJECT_MOTION__
+#define __PASSES__
+#define __PATCH_EVAL__
 #define __POINTCLOUD__
+#define __RAY_DIFFERENTIALS__
+#define __SHADER_RAYTRACE__
+#define __SHADOW_CATCHER__
+#define __SHADOW_RECORD_ALL__
+#define __SUBSURFACE__
 #define __SVM__
-#define __EMISSION__
-#define __HOLDOUT__
 #define __TRANSPARENT_SHADOWS__
-#define __BACKGROUND_MIS__
-#define __LAMP_MIS__
-#define __CAMERA_MOTION__
-#define __OBJECT_MOTION__
-#define __BAKING__
-#define __PRINCIPLED__
-#define __SUBSURFACE__
+#define __VISIBILITY_FLAG__
 #define __VOLUME__
-#define __CMJ__
-#define __SHADOW_RECORD_ALL__
-#define __BRANCHED_PATH__
 
 /* Device specific features */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
 #  define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
-
-#ifdef __KERNEL_GPU_RAYTRACING__
-#  undef __BAKING__
-#endif /* __KERNEL_GPU_RAYTRACING__ */
+#endif /* !__KERNEL_GPU__ */
 
 /* MNEE currently causes "Compute function exceeds available temporary registers"
  * on Metal, disabled for now. */
@@ -111,9 +90,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Scene-based selective features compilation. */
 #ifdef __KERNEL_FEATURES__
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION)
-#    undef __CAMERA_MOTION__
-#  endif
 #  if !(__KERNEL_FEATURES & KERNEL_FEATURE_OBJECT_MOTION)
 #    undef __OBJECT_MOTION__
 #  endif
@@ -129,9 +105,6 @@ CCL_NAMESPACE_BEGIN
 #  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
 #    undef __SUBSURFACE__
 #  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
-#    undef __BAKING__
-#  endif
 #  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
 #    undef __PATCH_EVAL__
 #  endif
@@ -141,9 +114,6 @@ CCL_NAMESPACE_BEGIN
 #  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SHADOW_CATCHER)
 #    undef __SHADOW_CATCHER__
 #  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PRINCIPLED)
-#    undef __PRINCIPLED__
-#  endif
 #  if !(__KERNEL_FEATURES & KERNEL_FEATURE_DENOISING)
 #    undef __DENOISING_FEATURES__
 #  endif
@@ -159,36 +129,48 @@ CCL_NAMESPACE_BEGIN
 #  define __BVH_LOCAL__
 #endif
 
-/* Path Tracing
- * note we need to keep the u/v pairs at even values */
+/* Sampling Patterns */
 
+/* Unique numbers for sampling patterns in each bounce. */
 enum PathTraceDimension {
-  PRNG_FILTER_U = 0,
-  PRNG_FILTER_V = 1,
-  PRNG_LENS_U = 2,
-  PRNG_LENS_V = 3,
-  PRNG_TIME = 4,
-  PRNG_UNUSED_0 = 5,
-  PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
-  PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
-  PRNG_BASE_NUM = 10,
-
-  PRNG_BSDF_U = 0,
-  PRNG_BSDF_V = 1,
-  PRNG_LIGHT_U = 2,
-  PRNG_LIGHT_V = 3,
-  PRNG_LIGHT_TERMINATE = 4,
-  PRNG_TERMINATE = 5,
-  PRNG_PHASE_CHANNEL = 6,
-  PRNG_SCATTER_DISTANCE = 7,
-  PRNG_BOUNCE_NUM = 8,
-
-  PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
-  PRNG_BEVEL_V = 7,
+  /* Init bounce */
+  PRNG_FILTER = 0,
+  PRNG_LENS = 1,
+  PRNG_TIME = 2,
+
+  /* Shade bounce */
+  PRNG_TERMINATE = 0,
+  PRNG_LIGHT = 1,
+  PRNG_LIGHT_TERMINATE = 2,
+  /* Surface */
+  PRNG_SURFACE_BSDF = 3,
+  PRNG_SURFACE_AO = 4,
+  PRNG_SURFACE_BEVEL = 5,
+  /* Volume */
+  PRNG_VOLUME_PHASE = 3,
+  PRNG_VOLUME_PHASE_CHANNEL = 4,
+  PRNG_VOLUME_SCATTER_DISTANCE = 5,
+  PRNG_VOLUME_OFFSET = 6,
+  PRNG_VOLUME_SHADE_OFFSET = 7,
+
+  /* Subsurface random walk bounces */
+  PRNG_SUBSURFACE_BSDF = 0,
+  PRNG_SUBSURFACE_PHASE_CHANNEL = 1,
+  PRNG_SUBSURFACE_SCATTER_DISTANCE = 2,
+  PRNG_SUBSURFACE_GUIDE_STRATEGY = 3,
+  PRNG_SUBSURFACE_GUIDE_DIRECTION = 4,
+
+  /* Subsurface disk bounce */
+  PRNG_SUBSURFACE_DISK = 0,
+  PRNG_SUBSURFACE_DISK_RESAMPLE = 1,
+
+  /* High enough number so we don't need to change it when adding new dimensions,
+   * low enough so there is no uint16_t overflow with many bounces. */
+  PRNG_BOUNCE_NUM = 16,
 };
 
 enum SamplingPattern {
-  SAMPLING_PATTERN_SOBOL = 0,
+  SAMPLING_PATTERN_SOBOL_BURLEY = 0,
   SAMPLING_PATTERN_PMJ = 1,
 
   SAMPLING_NUM_PATTERNS,
@@ -425,9 +407,9 @@ typedef enum CryptomatteType {
 } CryptomatteType;
 
 typedef struct BsdfEval {
-  float3 diffuse;
-  float3 glossy;
-  float3 sum;
+  Spectrum diffuse;
+  Spectrum glossy;
+  Spectrum sum;
 } BsdfEval;
 
 /* Closure Filter */
@@ -535,7 +517,8 @@ typedef struct RaySelfPrimitives {
 typedef struct Ray {
   float3 P;   /* origin */
   float3 D;   /* direction */
-  float t;    /* length of the ray */
+  float tmin; /* start distance */
+  float tmax; /* end distance */
   float time; /* time (for motion blur) */
 
   RaySelfPrimitives self;
@@ -672,12 +655,11 @@ typedef struct AttributeDescriptor {
 
 /* For looking up attributes on objects and geometry. */
 typedef struct AttributeMap {
-  uint id;       /* Global unique identifier. */
-  uint element;  /* AttributeElement. */
-  int offset;    /* Offset into __attributes global arrays. */
-  uint8_t type;  /* NodeAttributeType. */
-  uint8_t flags; /* AttributeFlag. */
-  uint8_t pad[2];
+  uint64_t id;      /* Global unique identifier. */
+  int offset;       /* Offset into __attributes global arrays. */
+  uint16_t element; /* AttributeElement. */
+  uint8_t type;     /* NodeAttributeType. */
+  uint8_t flags;    /* AttributeFlag. */
 } AttributeMap;
 
 /* Closure data */
@@ -720,7 +702,7 @@ typedef struct AttributeMap {
  * padded to be 16 bytes, while it's only 12 bytes on the GPU. */
 
 #define SHADER_CLOSURE_BASE \
-  float3 weight; \
+  Spectrum weight; \
   ClosureType type; \
   float sample_weight; \
   float3 N
@@ -729,10 +711,9 @@ typedef struct ccl_align(16) ShaderClosure
 {
   SHADER_CLOSURE_BASE;
 
-#ifdef __KERNEL_CPU__
-  float pad[2];
-#endif
-  float data[10];
+  /* Extra space for closures to store data, somewhat arbitrary but closures
+   * assert that their size fits. */
+  char pad[sizeof(Spectrum) * 2 + sizeof(float) * 4];
 }
 ShaderClosure;
 
@@ -885,10 +866,10 @@ typedef struct ccl_align(16) ShaderData
   float ray_length;
 
 #ifdef __RAY_DIFFERENTIALS__
-  /* differential of P. these are orthogonal to Ng, not N */
-  differential3 dP;
-  /* differential of I */
-  differential3 dI;
+  /* Radius of differential of P. */
+  float dP;
+  /* Radius of differential of I. */
+  float dI;
   /* differential of u, v */
   differential du;
   differential dv;
@@ -923,12 +904,12 @@ typedef struct ccl_align(16) ShaderData
   /* Closure data, we store a fixed array of closures */
   int num_closure;
   int num_closure_left;
-  float3 svm_closure_weight;
+  Spectrum svm_closure_weight;
 
   /* Closure weights summed directly, so we can evaluate
    * emission and shadow transparency with MAX_CLOSURE 0. */
-  float3 closure_emission_background;
-  float3 closure_transparent_extinction;
+  Spectrum closure_emission_background;
+  Spectrum closure_transparent_extinction;
 
   /* At the end so we can adjust size in ShaderDataTinyStorage. */
   struct ShaderClosure closure[MAX_CLOSURE];
@@ -959,7 +940,7 @@ ShaderDataCausticsStorage;
  * Used for decoupled direct/indirect light closure storage. */
 
 typedef struct ShaderVolumeClosure {
-  float3 weight;
+  Spectrum weight;
   float sample_weight;
   float g;
 } ShaderVolumeClosure;
@@ -1072,94 +1053,6 @@ typedef struct KernelCamera {
 } KernelCamera;
 static_assert_align(KernelCamera, 16);
 
-typedef struct KernelFilm {
-  float exposure;
-  int pass_flag;
-
-  int light_pass_flag;
-  int pass_stride;
-
-  int pass_combined;
-  int pass_depth;
-  int pass_position;
-  int pass_normal;
-  int pass_roughness;
-  int pass_motion;
-
-  int pass_motion_weight;
-  int pass_uv;
-  int pass_object_id;
-  int pass_material_id;
-
-  int pass_diffuse_color;
-  int pass_glossy_color;
-  int pass_transmission_color;
-
-  int pass_diffuse_indirect;
-  int pass_glossy_indirect;
-  int pass_transmission_indirect;
-  int pass_volume_indirect;
-
-  int pass_diffuse_direct;
-  int pass_glossy_direct;
-  int pass_transmission_direct;
-  int pass_volume_direct;
-
-  int pass_emission;
-  int pass_background;
-  int pass_ao;
-  float pass_alpha_threshold;
-
-  int pass_shadow;
-  float pass_shadow_scale;
-
-  int pass_shadow_catcher;
-  int pass_shadow_catcher_sample_count;
-  int pass_shadow_catcher_matte;
-
-  int filter_table_offset;
-
-  int cryptomatte_passes;
-  int cryptomatte_depth;
-  int pass_cryptomatte;
-
-  int pass_adaptive_aux_buffer;
-  int pass_sample_count;
-
-  int pass_mist;
-  float mist_start;
-  float mist_inv_depth;
-  float mist_falloff;
-
-  int pass_denoising_normal;
-  int pass_denoising_albedo;
-  int pass_denoising_depth;
-
-  int pass_aov_color;
-  int pass_aov_value;
-  int pass_lightgroup;
-
-  /* XYZ to rendering color space transform. float4 instead of float3 to
-   * ensure consistent padding/alignment across devices. */
-  float4 xyz_to_r;
-  float4 xyz_to_g;
-  float4 xyz_to_b;
-  float4 rgb_to_y;
-  /* Rec709 to rendering color space. */
-  float4 rec709_to_r;
-  float4 rec709_to_g;
-  float4 rec709_to_b;
-  int is_rec709;
-
-  int pass_bake_primitive;
-  int pass_bake_differential;
-
-  int use_approximate_shadow_catcher;
-
-  int pad1;
-} KernelFilm;
-static_assert_align(KernelFilm, 16);
-
 typedef struct KernelFilmConvert {
   int pass_offset;
   int pass_stride;
@@ -1201,108 +1094,6 @@ typedef struct KernelFilmConvert {
 } KernelFilmConvert;
 static_assert_align(KernelFilmConvert, 16);
 
-typedef struct KernelBackground {
-  /* only shader index */
-  int surface_shader;
-  int volume_shader;
-  float volume_step_size;
-  int transparent;
-  float transparent_roughness_squared_threshold;
-
-  /* portal sampling */
-  float portal_weight;
-  int num_portals;
-  int portal_offset;
-
-  /* sun sampling */
-  float sun_weight;
-  /* xyz store direction, w the angle. float4 instead of float3 is used
-   * to ensure consistent padding/alignment across devices. */
-  float4 sun;
-
-  /* map sampling */
-  float map_weight;
-  int map_res_x;
-  int map_res_y;
-
-  int use_mis;
-
-  int lightgroup;
-
-  /* Padding */
-  int pad1, pad2;
-} KernelBackground;
-static_assert_align(KernelBackground, 16);
-
-typedef struct KernelIntegrator {
-  /* emission */
-  int use_direct_light;
-  int num_distribution;
-  int num_all_lights;
-  float pdf_triangles;
-  float pdf_lights;
-  float light_inv_rr_threshold;
-
-  /* bounces */
-  int min_bounce;
-  int max_bounce;
-
-  int max_diffuse_bounce;
-  int max_glossy_bounce;
-  int max_transmission_bounce;
-  int max_volume_bounce;
-
-  /* AO bounces */
-  int ao_bounces;
-  float ao_bounces_distance;
-  float ao_bounces_factor;
-  float ao_additive_factor;
-
-  /* transparent */
-  int transparent_min_bounce;
-  int transparent_max_bounce;
-  int transparent_shadows;
-
-  /* caustics */
-  int caustics_reflective;
-  int caustics_refractive;
-  float filter_glossy;
-
-  /* seed */
-  int seed;
-
-  /* clamp */
-  float sample_clamp_direct;
-  float sample_clamp_indirect;
-
-  /* mis */
-  int use_lamp_mis;
-
-  /* caustics */
-  int use_caustics;
-
-  /* sampler */
-  int sampling_pattern;
-
-  /* volume render */
-  int use_volumes;
-  int volume_max_steps;
-  float volume_step_rate;
-
-  int has_shadow_catcher;
-  float scrambling_distance;
-
-  /* Closure filter. */
-  int filter_closures;
-
-  /* MIS debugging. */
-  int direct_light_sampling_type;
-
-  /* padding */
-  int pad1;
-} KernelIntegrator;
-static_assert_align(KernelIntegrator, 16);
-
 typedef enum KernelBVHLayout {
   BVH_LAYOUT_NONE = 0,
 
@@ -1320,36 +1111,25 @@ typedef enum KernelBVHLayout {
   BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
 } KernelBVHLayout;
 
-typedef struct KernelBVH {
-  /* Own BVH */
-  int root;
-  int have_motion;
-  int have_curves;
-  int bvh_layout;
-  int use_bvh_steps;
-  int curve_subdivisions;
+/* Specialized struct that can become constants in dynamic compilation. */
+#define KERNEL_STRUCT_BEGIN(name, parent) struct name {
+#define KERNEL_STRUCT_END(name) \
+  } \
+  ; \
+  static_assert_align(name, 16);
 
-  /* Custom BVH */
-#ifdef __KERNEL_OPTIX__
-  OptixTraversableHandle scene;
-#elif defined __METALRT__
-  metalrt_as_type scene;
+#ifdef __KERNEL_USE_DATA_CONSTANTS__
+#  define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name;
 #else
-#  ifdef __EMBREE__
-  RTCScene scene;
-#    ifndef __KERNEL_64_BIT__
-  int pad2;
-#    endif
-#  else
-  int scene, pad2;
-#  endif
+#  define KERNEL_STRUCT_MEMBER(parent, type, name) type name;
 #endif
-} KernelBVH;
-static_assert_align(KernelBVH, 16);
+
+#include "kernel/data_template.h"
 
 typedef struct KernelTables {
   int beckmann_offset;
-  int pad1, pad2, pad3;
+  int filter_table_offset;
+  int pad1, pad2;
 } KernelTables;
 static_assert_align(KernelTables, 16);
 
@@ -1362,18 +1142,37 @@ typedef struct KernelBake {
 static_assert_align(KernelBake, 16);
 
 typedef struct KernelData {
+  /* Features and limits. */
   uint kernel_features;
   uint max_closures;
   uint max_shaders;
   uint volume_stack_size;
 
+  /* Always dynamic data members. */
   KernelCamera cam;
-  KernelFilm film;
-  KernelBackground background;
-  KernelIntegrator integrator;
-  KernelBVH bvh;
-  KernelTables tables;
   KernelBake bake;
+  KernelTables tables;
+
+  /* Potentially specialized data members. */
+#define KERNEL_STRUCT_BEGIN(name, parent) name parent;
+#include "kernel/data_template.h"
+
+  /* Device specific BVH. */
+#ifdef __KERNEL_OPTIX__
+  OptixTraversableHandle device_bvh;
+#elif defined __METALRT__
+  metalrt_as_type device_bvh;
+#else
+#  ifdef __EMBREE__
+  RTCScene device_bvh;
+#    ifndef __KERNEL_64_BIT__
+  int pad1;
+#    endif
+#  else
+  int device_bvh, pad1;
+#  endif
+#endif
+  int pad2, pad3;
 } KernelData;
 static_assert_align(KernelData, 16);
 
@@ -1557,10 +1356,14 @@ typedef struct KernelShaderEvalInput {
 } KernelShaderEvalInput;
 static_assert_align(KernelShaderEvalInput, 16);
 
-/* Pre-computed sample table sizes for PMJ02 sampler. */
+/* Pre-computed sample table sizes for PMJ02 sampler.
+ *
+ * NOTE: divisions *must* be a power of two, and patterns
+ * ideally should be as well.
+ */
 #define NUM_PMJ_DIVISIONS 32
 #define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
-#define NUM_PMJ_PATTERNS 1
+#define NUM_PMJ_PATTERNS 64
 
 /* Device kernels.
  *
@@ -1571,7 +1374,7 @@ static_assert_align(KernelShaderEvalInput, 16);
  * If the kernel uses shared CUDA memory, `CUDADeviceQueue::enqueue` is to be modified.
  * The path iteration kernels are handled in `PathTraceWorkGPU::enqueue_path_iteration`. */
 
-typedef enum DeviceKernel {
+typedef enum DeviceKernel : int {
   DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA = 0,
   DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE,
   DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
@@ -1667,42 +1470,38 @@ enum KernelFeatureFlag : uint32_t {
   KERNEL_FEATURE_HAIR = (1U << 12U),
   KERNEL_FEATURE_HAIR_THICK = (1U << 13U),
   KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U),
-  KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U),
 
   /* Denotes whether baking functionality is needed. */
-  KERNEL_FEATURE_BAKING = (1U << 16U),
+  KERNEL_FEATURE_BAKING = (1U << 15U),
 
   /* Use subsurface scattering materials. */
-  KERNEL_FEATURE_SUBSURFACE = (1U << 17U),
+  KERNEL_FEATURE_SUBSURFACE = (1U << 16U),
 
   /* Use volume materials. */
-  KERNEL_FEATURE_VOLUME = (1U << 18U),
+  KERNEL_FEATURE_VOLUME = (1U << 17U),
 
   /* Use OpenSubdiv patch evaluation */
-  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U),
+  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 18U),
 
   /* Use Transparent shadows */
-  KERNEL_FEATURE_TRANSPARENT = (1U << 20U),
+  KERNEL_FEATURE_TRANSPARENT = (1U << 19U),
 
   /* Use shadow catcher. */
-  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U),
-
-  /* Per-uber shader usage flags. */
-  KERNEL_FEATURE_PRINCIPLED = (1U << 22U),
+  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 29U),
 
   /* Light render passes. */
-  KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U),
+  KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U),
 
   /* Shadow render pass. */
-  KERNEL_FEATURE_SHADOW_PASS = (1U << 24U),
+  KERNEL_FEATURE_SHADOW_PASS = (1U << 22U),
 
   /* AO. */
-  KERNEL_FEATURE_AO_PASS = (1U << 25U),
-  KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
+  KERNEL_FEATURE_AO_PASS = (1U << 23U),
+  KERNEL_FEATURE_AO_ADDITIVE = (1U << 24U),
   KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
 
   /* MNEE. */
-  KERNEL_FEATURE_MNEE = (1U << 27U),
+  KERNEL_FEATURE_MNEE = (1U << 25U),
 };
 
 /* Shader node feature mask, to specialize shader evaluation for kernels. */
@@ -1729,15 +1528,15 @@ enum KernelFeatureFlag : uint32_t {
 /* Must be constexpr on the CPU to avoid compile errors because the state types
  * are different depending on the main, shadow or null path. For GPU we don't have
  * C++17 everywhere so can't use it. */
-#ifdef __KERNEL_CPU__
+#ifdef __KERNEL_GPU__
+#  define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
 #  define IF_KERNEL_FEATURE(feature) \
     if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
 #  define IF_KERNEL_NODES_FEATURE(feature) \
     if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-#  define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
 #endif
 
 CCL_NAMESPACE_END