From 1df3b51988852fa8ee6b530a64aa23346db9acd4 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 17 Oct 2021 16:10:10 +0200 Subject: Cycles: replace integrator state argument macros * Rename struct KernelGlobals to struct KernelGlobalsCPU * Add KernelGlobals, IntegratorState and ConstIntegratorState typedefs that every device can define in its own way. * Remove INTEGRATOR_STATE_ARGS and INTEGRATOR_STATE_PASS macros and replace with these new typedefs. * Add explicit state argument to INTEGRATOR_STATE and similar macros In preparation for decoupling main and shadow paths. Differential Revision: https://developer.blender.org/D12888 --- intern/cycles/kernel/bvh/bvh.h | 10 +- intern/cycles/kernel/bvh/bvh_embree.h | 8 +- intern/cycles/kernel/bvh/bvh_local.h | 4 +- intern/cycles/kernel/bvh/bvh_nodes.h | 10 +- intern/cycles/kernel/bvh/bvh_shadow_all.h | 4 +- intern/cycles/kernel/bvh/bvh_traversal.h | 4 +- intern/cycles/kernel/bvh/bvh_util.h | 20 +-- intern/cycles/kernel/bvh/bvh_volume.h | 4 +- intern/cycles/kernel/bvh/bvh_volume_all.h | 4 +- intern/cycles/kernel/closure/bsdf.h | 8 +- .../cycles/kernel/closure/bsdf_hair_principled.h | 8 +- intern/cycles/kernel/closure/bsdf_microfacet.h | 8 +- .../cycles/kernel/closure/bsdf_microfacet_multi.h | 4 +- intern/cycles/kernel/device/cpu/globals.h | 6 +- intern/cycles/kernel/device/cpu/image.h | 4 +- intern/cycles/kernel/device/cpu/kernel.cpp | 4 +- intern/cycles/kernel/device/cpu/kernel.h | 14 +- intern/cycles/kernel/device/cpu/kernel_arch.h | 20 +-- intern/cycles/kernel/device/cpu/kernel_arch_impl.h | 20 +-- intern/cycles/kernel/device/cuda/globals.h | 3 +- intern/cycles/kernel/device/gpu/image.h | 4 +- intern/cycles/kernel/device/gpu/kernel.h | 24 +-- intern/cycles/kernel/device/optix/globals.h | 3 +- intern/cycles/kernel/geom/geom_attribute.h | 12 +- intern/cycles/kernel/geom/geom_curve.h | 17 +- intern/cycles/kernel/geom/geom_curve_intersect.h | 4 +- intern/cycles/kernel/geom/geom_motion_curve.h | 15 +- intern/cycles/kernel/geom/geom_motion_triangle.h | 15 +- .../kernel/geom/geom_motion_triangle_intersect.h | 8 +- .../kernel/geom/geom_motion_triangle_shader.h | 2 +- intern/cycles/kernel/geom/geom_object.h | 89 +++++----- intern/cycles/kernel/geom/geom_patch.h | 18 +- intern/cycles/kernel/geom/geom_primitive.h | 32 ++-- intern/cycles/kernel/geom/geom_shader_data.h | 13 +- intern/cycles/kernel/geom/geom_subd_triangle.h | 24 ++- intern/cycles/kernel/geom/geom_triangle.h | 29 ++- .../cycles/kernel/geom/geom_triangle_intersect.h | 8 +- intern/cycles/kernel/geom/geom_volume.h | 4 +- .../kernel/integrator/integrator_init_from_bake.h | 19 +- .../integrator/integrator_init_from_camera.h | 15 +- .../integrator/integrator_intersect_closest.h | 58 +++--- .../integrator/integrator_intersect_shadow.h | 42 ++--- .../integrator/integrator_intersect_subsurface.h | 4 +- .../integrator/integrator_intersect_volume_stack.h | 25 +-- .../kernel/integrator/integrator_megakernel.h | 31 ++-- .../integrator/integrator_shade_background.h | 74 ++++---- .../kernel/integrator/integrator_shade_light.h | 38 ++-- .../kernel/integrator/integrator_shade_shadow.h | 72 ++++---- .../kernel/integrator/integrator_shade_surface.h | 196 +++++++++++---------- .../kernel/integrator/integrator_shade_volume.h | 158 +++++++++-------- intern/cycles/kernel/integrator/integrator_state.h | 69 +++----- .../kernel/integrator/integrator_state_flow.h | 41 ++--- .../kernel/integrator/integrator_state_util.h | 170 +++++++++--------- .../kernel/integrator/integrator_subsurface.h | 62 +++---- .../kernel/integrator/integrator_subsurface_disk.h | 17 +- .../integrator/integrator_subsurface_random_walk.h | 25 +-- .../kernel/integrator/integrator_volume_stack.h | 37 ++-- intern/cycles/kernel/kernel_accumulate.h | 150 ++++++++-------- intern/cycles/kernel/kernel_adaptive_sampling.h | 11 +- intern/cycles/kernel/kernel_bake.h | 8 +- intern/cycles/kernel/kernel_camera.h | 16 +- intern/cycles/kernel/kernel_color.h | 4 +- intern/cycles/kernel/kernel_emission.h | 19 +- intern/cycles/kernel/kernel_id_passes.h | 2 +- intern/cycles/kernel/kernel_jitter.h | 7 +- intern/cycles/kernel/kernel_light.h | 46 +++-- intern/cycles/kernel/kernel_light_background.h | 31 ++-- intern/cycles/kernel/kernel_light_common.h | 5 +- intern/cycles/kernel/kernel_lookup_table.h | 7 +- intern/cycles/kernel/kernel_passes.h | 59 ++++--- intern/cycles/kernel/kernel_path_state.h | 157 +++++++++-------- intern/cycles/kernel/kernel_random.h | 8 +- intern/cycles/kernel/kernel_shader.h | 90 +++++----- intern/cycles/kernel/kernel_shadow_catcher.h | 30 ++-- intern/cycles/kernel/kernel_types.h | 56 +++--- intern/cycles/kernel/osl/osl_closures.cpp | 2 +- intern/cycles/kernel/osl/osl_services.cpp | 38 ++-- intern/cycles/kernel/osl/osl_services.h | 6 +- intern/cycles/kernel/osl/osl_shader.cpp | 16 +- intern/cycles/kernel/osl/osl_shader.h | 16 +- intern/cycles/kernel/svm/svm.h | 86 +++++---- intern/cycles/kernel/svm/svm_ao.h | 18 +- intern/cycles/kernel/svm/svm_aov.h | 18 +- intern/cycles/kernel/svm/svm_attribute.h | 11 +- intern/cycles/kernel/svm/svm_bevel.h | 26 +-- intern/cycles/kernel/svm/svm_blackbody.h | 2 +- intern/cycles/kernel/svm/svm_brick.h | 7 +- intern/cycles/kernel/svm/svm_bump.h | 4 +- intern/cycles/kernel/svm/svm_camera.h | 2 +- intern/cycles/kernel/svm/svm_checker.h | 2 +- intern/cycles/kernel/svm/svm_clamp.h | 2 +- intern/cycles/kernel/svm/svm_closure.h | 40 +++-- intern/cycles/kernel/svm/svm_convert.h | 2 +- intern/cycles/kernel/svm/svm_displace.h | 13 +- intern/cycles/kernel/svm/svm_geometry.h | 12 +- intern/cycles/kernel/svm/svm_hsv.h | 2 +- intern/cycles/kernel/svm/svm_ies.h | 9 +- intern/cycles/kernel/svm/svm_image.h | 14 +- intern/cycles/kernel/svm/svm_light_path.h | 25 ++- intern/cycles/kernel/svm/svm_magic.h | 7 +- intern/cycles/kernel/svm/svm_map_range.h | 2 +- intern/cycles/kernel/svm/svm_mapping.h | 6 +- intern/cycles/kernel/svm/svm_math.h | 4 +- intern/cycles/kernel/svm/svm_mix.h | 2 +- intern/cycles/kernel/svm/svm_musgrave.h | 2 +- intern/cycles/kernel/svm/svm_noisetex.h | 2 +- intern/cycles/kernel/svm/svm_normal.h | 2 +- intern/cycles/kernel/svm/svm_ramp.h | 39 ++-- intern/cycles/kernel/svm/svm_sepcomb_hsv.h | 4 +- intern/cycles/kernel/svm/svm_sky.h | 13 +- intern/cycles/kernel/svm/svm_tex_coord.h | 10 +- intern/cycles/kernel/svm/svm_value.h | 4 +- intern/cycles/kernel/svm/svm_vector_transform.h | 2 +- intern/cycles/kernel/svm/svm_vertex_color.h | 6 +- intern/cycles/kernel/svm/svm_voronoi.h | 11 +- intern/cycles/kernel/svm/svm_voxel.h | 7 +- intern/cycles/kernel/svm/svm_wave.h | 7 +- intern/cycles/kernel/svm/svm_wavelength.h | 2 +- intern/cycles/kernel/svm/svm_white_noise.h | 2 +- intern/cycles/kernel/svm/svm_wireframe.h | 4 +- 120 files changed, 1442 insertions(+), 1416 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index a501cbe7a4b..bdbd574bf0f 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -154,7 +154,7 @@ ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray) return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; } -ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg, +ccl_device_intersect bool scene_intersect(KernelGlobals kg, ccl_private const Ray *ray, const uint visibility, ccl_private Intersection *isect) @@ -248,7 +248,7 @@ ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg, } #ifdef __BVH_LOCAL__ -ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *kg, +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, int local_object, @@ -360,7 +360,7 @@ ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals * #endif #ifdef __SHADOW_RECORD_ALL__ -ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlobals *kg, +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, uint visibility, @@ -448,7 +448,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlob #endif /* __SHADOW_RECORD_ALL__ */ #ifdef __VOLUME__ -ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals *kg, +ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) @@ -510,7 +510,7 @@ ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals #endif /* __VOLUME__ */ #ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(ccl_global const KernelGlobals *kg, +ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint max_hits, diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index d3db6295ea5..7fa0cfdc510 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -35,7 +35,7 @@ struct CCLIntersectContext { RAY_VOLUME_ALL = 4, } RayType; - const KernelGlobals *kg; + KernelGlobals kg; RayType type; /* for shadow rays */ @@ -50,7 +50,7 @@ struct CCLIntersectContext { int local_object_id; uint *lcg_state; - CCLIntersectContext(const KernelGlobals *kg_, RayType type_) + CCLIntersectContext(KernelGlobals kg_, RayType type_) { kg = kg_; type = type_; @@ -101,7 +101,7 @@ ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID; } -ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, +ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect) @@ -137,7 +137,7 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, } ccl_device_inline void kernel_embree_convert_sss_hit( - const KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) + KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) { isect->u = 1.0f - hit->v - hit->u; isect->v = hit->u; diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h index 78ad4a34da9..79cde69699e 100644 --- a/intern/cycles/kernel/bvh/bvh_local.h +++ b/intern/cycles/kernel/bvh/bvh_local.h @@ -36,7 +36,7 @@ ccl_device #else ccl_device_inline #endif - bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg, + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, int local_object, @@ -196,7 +196,7 @@ ccl_device_inline return false; } -ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg, +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, int local_object, diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h index 49b37f39671..71122085f69 100644 --- a/intern/cycles/kernel/bvh/bvh_nodes.h +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -16,7 +16,7 @@ // TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and // 3-vector which might be faster. -ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const KernelGlobals *kg, +ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg, int node_addr, int child) { @@ -28,7 +28,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const return space; } -ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlobals *kg, +ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, const float3 P, const float3 idir, const float t, @@ -76,7 +76,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlo #endif } -ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const KernelGlobals *kg, +ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float3 P, const float3 dir, const float t, @@ -102,7 +102,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const return tnear <= tfar; } -ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelGlobals *kg, +ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, @@ -134,7 +134,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelG return mask; } -ccl_device_forceinline int bvh_node_intersect(ccl_global const KernelGlobals *kg, +ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index 7e2edd2684c..42ab9eda37e 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -36,7 +36,7 @@ ccl_device #else ccl_device_inline #endif - bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg, + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect_array, const uint visibility, @@ -298,7 +298,7 @@ ccl_device_inline return false; } -ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg, +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect_array, const uint visibility, diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 9f271a4730c..1c17ebf767f 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -31,7 +31,7 @@ * BVH_MOTION: motion blur rendering */ -ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg, +ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) @@ -228,7 +228,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlob return (isect->prim != PRIM_NONE); } -ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg, +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index 31aae389da0..d45eeec4815 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -139,8 +139,9 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection * /* Utility to quickly get flags from an intersection. */ -ccl_device_forceinline int intersection_get_shader_flags( - ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type) +ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg, + const int prim, + const int type) { int shader = 0; @@ -159,8 +160,9 @@ ccl_device_forceinline int intersection_get_shader_flags( return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; } -ccl_device_forceinline int intersection_get_shader_from_isect_prim( - ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type) +ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg, + const int prim, + const int isect_type) { int shader = 0; @@ -179,23 +181,21 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim( return shader & SHADER_MASK; } -ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict - isect) +ccl_device_forceinline int intersection_get_shader( + KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type); } ccl_device_forceinline int intersection_get_object_flags( - ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const Intersection *ccl_restrict isect) + KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { return kernel_tex_fetch(__object_flag, isect->object); } /* TODO: find a better (faster) solution for this. Maybe store offset per object for * attributes needed in intersection? */ -ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg, +ccl_device_inline int intersection_find_attribute(KernelGlobals kg, const int object, const uint id) { diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h index d3bfce2d96b..fa56bd02bef 100644 --- a/intern/cycles/kernel/bvh/bvh_volume.h +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -35,7 +35,7 @@ ccl_device #else ccl_device_inline #endif - bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg, + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) @@ -221,7 +221,7 @@ ccl_device_inline return (isect->prim != PRIM_NONE); } -ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg, +ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h index f0fe95924cf..1d7d942e736 100644 --- a/intern/cycles/kernel/bvh/bvh_volume_all.h +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -35,7 +35,7 @@ ccl_device #else ccl_device_inline #endif - uint BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg, + uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, ccl_private const Ray *ray, Intersection *isect_array, const uint max_hits, @@ -289,7 +289,7 @@ ccl_device_inline return num_hits; } -ccl_device_inline uint BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg, +ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals kg, ccl_private const Ray *ray, Intersection *isect_array, const uint max_hits, diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index e115bef3170..28c889f2841 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -111,7 +111,7 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl return val; } -ccl_device_inline int bsdf_sample(ccl_global const KernelGlobals *kg, +ccl_device_inline int bsdf_sample(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private const ShaderClosure *sc, float randu, @@ -467,7 +467,7 @@ ccl_device ccl_device_inline #endif float3 - bsdf_eval(ccl_global const KernelGlobals *kg, + bsdf_eval(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private const ShaderClosure *sc, const float3 omega_in, @@ -652,9 +652,7 @@ ccl_device_inline return eval; } -ccl_device void bsdf_blur(ccl_global const KernelGlobals *kg, - ccl_private ShaderClosure *sc, - float roughness) +ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float roughness) { /* TODO: do we want to blur volume closures? */ #ifdef __SVM__ diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 17097b0739b..a474c5661b3 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -180,7 +180,7 @@ ccl_device_inline float longitudinal_scattering( } /* Combine the three values using their luminances. */ -ccl_device_inline float4 combine_with_energy(ccl_global const KernelGlobals *kg, float3 c) +ccl_device_inline float4 combine_with_energy(KernelGlobals kg, float3 c) { return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c)); } @@ -229,7 +229,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd, #endif /* __HAIR__ */ /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */ -ccl_device_inline void hair_attenuation(ccl_global const KernelGlobals *kg, +ccl_device_inline void hair_attenuation(KernelGlobals kg, float f, float3 T, ccl_private float4 *Ap) @@ -281,7 +281,7 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i, } /* Evaluation function for our shader. */ -ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg, +ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private const ShaderClosure *sc, const float3 omega_in, @@ -359,7 +359,7 @@ ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg, } /* Sampling function for the hair shader. */ -ccl_device int bsdf_principled_hair_sample(ccl_global const KernelGlobals *kg, +ccl_device int bsdf_principled_hair_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, ccl_private ShaderData *sd, float randu, diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 41c35867a6b..a4e1b7a491c 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -55,7 +55,7 @@ static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf i /* Beckmann and GGX microfacet importance sampling. */ -ccl_device_inline void microfacet_beckmann_sample_slopes(ccl_global const KernelGlobals *kg, +ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals kg, const float cos_theta_i, const float sin_theta_i, float randu, @@ -195,7 +195,7 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i, *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x)); } -ccl_device_forceinline float3 microfacet_sample_stretched(ccl_global const KernelGlobals *kg, +ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals kg, const float3 omega_i, const float alpha_x, const float alpha_y, @@ -549,7 +549,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_ggx_sample(ccl_global const KernelGlobals *kg, +ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, @@ -977,7 +977,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_beckmann_sample(ccl_global const KernelGlobals *kg, +ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 6ee1139ddbb..b7bd7faaa54 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -476,7 +476,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const Shade bsdf->extra->cspec0); } -ccl_device int bsdf_microfacet_multi_ggx_sample(ccl_global const KernelGlobals *kg, +ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, @@ -639,7 +639,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const bsdf->extra->cspec0); } -ccl_device int bsdf_microfacet_multi_ggx_glass_sample(ccl_global const KernelGlobals *kg, +ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg, ccl_private const ShaderClosure *sc, float3 Ng, float3 I, diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h index 98b036e269d..fb9aae38cfc 100644 --- a/intern/cycles/kernel/device/cpu/globals.h +++ b/intern/cycles/kernel/device/cpu/globals.h @@ -34,7 +34,7 @@ struct OSLThreadData; struct OSLShadingSystem; #endif -typedef struct KernelGlobals { +typedef struct KernelGlobalsCPU { #define KERNEL_TEX(type, name) texture name; #include "kernel/kernel_textures.h" @@ -51,7 +51,9 @@ typedef struct KernelGlobals { /* **** Run-time data **** */ ProfilingState profiler; -} KernelGlobals; +} KernelGlobalsCPU; + +typedef const KernelGlobalsCPU *ccl_restrict KernelGlobals; /* Abstraction macros */ #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index)) diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h index 57e81ab186d..44c5d7ef065 100644 --- a/intern/cycles/kernel/device/cpu/image.h +++ b/intern/cycles/kernel/device/cpu/image.h @@ -583,7 +583,7 @@ template struct NanoVDBInterpolator { #undef SET_CUBIC_SPLINE_WEIGHTS -ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); @@ -611,7 +611,7 @@ ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float } } -ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg, +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, int id, float3 P, InterpolationType interp) diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp index ac1cdf5fffe..8519b77aa08 100644 --- a/intern/cycles/kernel/device/cpu/kernel.cpp +++ b/intern/cycles/kernel/device/cpu/kernel.cpp @@ -64,7 +64,7 @@ CCL_NAMESPACE_BEGIN /* Memory Copy */ -void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t) +void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t) { if (strcmp(name, "__data") == 0) { kg->__data = *(KernelData *)host; @@ -74,7 +74,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t) } } -void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) +void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size) { if (0) { } diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h index ae2a841835a..28337a58898 100644 --- a/intern/cycles/kernel/device/cpu/kernel.h +++ b/intern/cycles/kernel/device/cpu/kernel.h @@ -29,17 +29,17 @@ CCL_NAMESPACE_BEGIN #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) struct IntegratorStateCPU; -struct KernelGlobals; +struct KernelGlobalsCPU; struct KernelData; -KernelGlobals *kernel_globals_create(); -void kernel_globals_free(KernelGlobals *kg); +KernelGlobalsCPU *kernel_globals_create(); +void kernel_globals_free(KernelGlobalsCPU *kg); -void *kernel_osl_memory(const KernelGlobals *kg); -bool kernel_osl_use(const KernelGlobals *kg); +void *kernel_osl_memory(const KernelGlobalsCPU *kg); +bool kernel_osl_use(const KernelGlobalsCPU *kg); -void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); +void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t size); +void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size); #define KERNEL_ARCH cpu #include "kernel/device/cpu/kernel_arch.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 8b7b0ec0548..ae7fab65100 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -21,16 +21,16 @@ */ #define KERNEL_INTEGRATOR_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ IntegratorStateCPU *state) #define KERNEL_INTEGRATOR_SHADE_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ IntegratorStateCPU *state, \ ccl_global float *render_buffer) #define KERNEL_INTEGRATOR_INIT_FUNCTION(name) \ - bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \ + bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ IntegratorStateCPU *state, \ KernelWorkTile *tile, \ ccl_global float *render_buffer) @@ -56,11 +56,11 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel); * Shader evaluation. */ -void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset); -void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset); @@ -70,7 +70,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, */ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( - const KernelGlobals *kg, + const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int y, @@ -79,14 +79,14 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( int offset, int stride); -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride); -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int start_y, @@ -98,7 +98,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals * * Cryptomatte. */ -void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index); @@ -108,6 +108,6 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg, /* TODO(sergey): Needs to be re-implemented. Or not? Brecht did it already :) */ void KERNEL_FUNCTION_FULL_NAME(bake)( - const KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); + const KernelGlobalsCPU *kg, float *buffer, int sample, int x, int y, int offset, int stride); #undef KERNEL_ARCH diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 23e371f165f..bf8667ac045 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -70,7 +70,7 @@ CCL_NAMESPACE_BEGIN #endif #define DEFINE_INTEGRATOR_KERNEL(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *kg, \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ IntegratorStateCPU *state) \ { \ KERNEL_INVOKE(name, kg, state); \ @@ -78,7 +78,7 @@ CCL_NAMESPACE_BEGIN #define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \ void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ - const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \ + const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \ { \ KERNEL_INVOKE(name, kg, state, render_buffer); \ } @@ -86,7 +86,7 @@ CCL_NAMESPACE_BEGIN /* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so * that it does not contain unused fields. */ #define DEFINE_INTEGRATOR_INIT_KERNEL(name) \ - bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *kg, \ + bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ IntegratorStateCPU *state, \ KernelWorkTile *tile, \ ccl_global float *render_buffer) \ @@ -112,7 +112,7 @@ DEFINE_INTEGRATOR_SHADE_KERNEL(megakernel) * Shader evaluation. */ -void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset) @@ -124,7 +124,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg, #endif } -void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset) @@ -141,7 +141,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg, */ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( - const KernelGlobals *kg, + const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int y, @@ -159,7 +159,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( #endif } -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int y, int start_x, @@ -174,7 +174,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals * #endif } -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int start_y, @@ -193,7 +193,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals * * Cryptomatte. */ -void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg, +void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index) { @@ -210,7 +210,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg, /* TODO(sergey): Needs to be re-implemented. Or not? Brecht did it already :) */ void KERNEL_FUNCTION_FULL_NAME(bake)( - const KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) + const KernelGlobalsCPU *kg, float *buffer, int sample, int x, int y, int offset, int stride) { #if 0 # ifdef KERNEL_STUB diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h index 169047175f5..2c187cf8a23 100644 --- a/intern/cycles/kernel/device/cuda/globals.h +++ b/intern/cycles/kernel/device/cuda/globals.h @@ -27,9 +27,10 @@ CCL_NAMESPACE_BEGIN /* Not actually used, just a NULL pointer that gets passed everywhere, which we * hope gets optimized out by the compiler. */ -struct KernelGlobals { +struct KernelGlobalsGPU { int unused[1]; }; +typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Global scene data and textures */ __constant__ KernelData __data; diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h index b015c78a8f5..95a37c693ae 100644 --- a/intern/cycles/kernel/device/gpu/image.h +++ b/intern/cycles/kernel/device/gpu/image.h @@ -189,7 +189,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb( } #endif -ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y) +ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y) { const TextureInfo &info = kernel_tex_fetch(__texture_info, id); @@ -221,7 +221,7 @@ ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float } } -ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg, +ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, int id, float3 P, InterpolationType interp) diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 21901215757..56beaf1fd91 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -51,8 +51,8 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const int state = ccl_gpu_global_id_x(); if (state < num_states) { - INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; } } @@ -244,7 +244,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array( num_states, indices, num_indices, [kernel](const int state) { - return (INTEGRATOR_STATE(path, queued_kernel) == kernel); + return (INTEGRATOR_STATE(state, path, queued_kernel) == kernel); }); } @@ -256,7 +256,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array( num_states, indices, num_indices, [kernel](const int state) { - return (INTEGRATOR_STATE(shadow_path, queued_kernel) == kernel); + return (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == kernel); }); } @@ -265,8 +265,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array( num_states, indices, num_indices, [](const int state) { - return (INTEGRATOR_STATE(path, queued_kernel) != 0) || - (INTEGRATOR_STATE(shadow_path, queued_kernel) != 0); + return (INTEGRATOR_STATE(state, path, queued_kernel) != 0) || + (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0); }); } @@ -278,8 +278,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B { gpu_parallel_active_index_array( num_states, indices + indices_offset, num_indices, [](const int state) { - return (INTEGRATOR_STATE(path, queued_kernel) == 0) && - (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0); + return (INTEGRATOR_STATE(state, path, queued_kernel) == 0) && + (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); }); } @@ -289,8 +289,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_B { gpu_parallel_sorted_index_array( num_states, indices, num_indices, key_prefix_sum, [kernel](const int state) { - return (INTEGRATOR_STATE(path, queued_kernel) == kernel) ? - INTEGRATOR_STATE(path, shader_sort_key) : + return (INTEGRATOR_STATE(state, path, queued_kernel) == kernel) ? + INTEGRATOR_STATE(state, path, shader_sort_key) : GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY; }); } @@ -304,8 +304,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B gpu_parallel_active_index_array( num_states, indices, num_indices, [num_active_paths](const int state) { return (state >= num_active_paths) && - ((INTEGRATOR_STATE(path, queued_kernel) != 0) || - (INTEGRATOR_STATE(shadow_path, queued_kernel) != 0)); + ((INTEGRATOR_STATE(state, path, queued_kernel) != 0) || + (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0)); }); } diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h index 7d898ed5d91..7b8ebfe50e6 100644 --- a/intern/cycles/kernel/device/optix/globals.h +++ b/intern/cycles/kernel/device/optix/globals.h @@ -27,9 +27,10 @@ CCL_NAMESPACE_BEGIN /* Not actually used, just a NULL pointer that gets passed everywhere, which we * hope gets optimized out by the compiler. */ -struct KernelGlobals { +struct KernelGlobalsGPU { int unused[1]; }; +typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Launch parameters */ struct KernelParamsOptiX { diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 850ac44e6e0..848e0430caa 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -27,11 +27,9 @@ CCL_NAMESPACE_BEGIN * Lookup of attributes is different between OSL and SVM, as OSL is ustring * based while for SVM we use integer ids. */ -ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd); +ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd); -ccl_device_inline uint attribute_primitive_type(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd) { if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) { return ATTR_PRIM_SUBD; @@ -50,12 +48,12 @@ ccl_device_inline AttributeDescriptor attribute_not_found() /* Find attribute based on ID */ -ccl_device_inline uint object_attribute_map_offset(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object) { return kernel_tex_fetch(__objects, object).attribute_map_offset; } -ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlobals *kg, +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg, ccl_private const ShaderData *sd, uint id) { @@ -102,7 +100,7 @@ ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlob /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(ccl_global const KernelGlobals *kg, +ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc) { diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 07f218d781b..7271193eef8 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN /* Reading attributes on various curve elements */ -ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg, +ccl_device float curve_attribute_float(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float *dx, @@ -69,7 +69,7 @@ ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg, } } -ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg, +ccl_device float2 curve_attribute_float2(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float2 *dx, @@ -115,7 +115,7 @@ ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg, } } -ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg, +ccl_device float3 curve_attribute_float3(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float3 *dx, @@ -157,7 +157,7 @@ ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg, } } -ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg, +ccl_device float4 curve_attribute_float4(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float4 *dx, @@ -201,8 +201,7 @@ ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg, /* Curve thickness */ -ccl_device float curve_thickness(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData *sd) { float r = 0.0f; @@ -230,8 +229,7 @@ ccl_device float curve_thickness(ccl_global const KernelGlobals *kg, /* Curve location for motion pass, linear interpolation between keys and * ignoring radius because we do the same for the motion keys */ -ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 curve_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd) { KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); @@ -247,8 +245,7 @@ ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *k /* Curve tangent normal */ -ccl_device float3 curve_tangent_normal(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 curve_tangent_normal(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 tgN = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index 04af8ea1421..fb0b80b281f 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -625,7 +625,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, return false; } -ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg, +ccl_device_forceinline bool curve_intersect(KernelGlobals kg, ccl_private Intersection *isect, const float3 P, const float3 dir, @@ -679,7 +679,7 @@ ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg, } } -ccl_device_inline void curve_shader_setup(ccl_global const KernelGlobals *kg, +ccl_device_inline void curve_shader_setup(KernelGlobals kg, ccl_private ShaderData *sd, float3 P, float3 D, diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 5754608a69b..2dd213d43f6 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg, +ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg, int offset, int numkeys, int numsteps, @@ -54,13 +54,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const Kernel } /* return 2 curve key locations */ -ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *kg, - int object, - int prim, - float time, - int k0, - int k1, - float4 keys[2]) +ccl_device_inline void motion_curve_keys_linear( + KernelGlobals kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) { /* get motion info */ int numsteps, numkeys; @@ -86,7 +81,7 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals * keys[1] = (1.0f - t) * keys[1] + t * next_keys[1]; } -ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals *kg, +ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg, int offset, int numkeys, int numsteps, @@ -119,7 +114,7 @@ ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals } /* return 2 curve key locations */ -ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg, +ccl_device_inline void motion_curve_keys(KernelGlobals kg, int object, int prim, float time, diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 547f03af47c..69d15f950ec 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -33,7 +33,7 @@ CCL_NAMESPACE_BEGIN /* Time interpolation of vertex positions and normals */ -ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg, +ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg, uint4 tri_vindex, int offset, int numverts, @@ -60,7 +60,7 @@ ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlo } } -ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelGlobals *kg, +ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg, uint4 tri_vindex, int offset, int numverts, @@ -88,7 +88,7 @@ ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelG } ccl_device_inline void motion_triangle_vertices( - ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) + KernelGlobals kg, int object, int prim, float time, float3 verts[3]) { /* get motion info */ int numsteps, numverts; @@ -116,13 +116,8 @@ ccl_device_inline void motion_triangle_vertices( verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; } -ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGlobals *kg, - float3 Ng, - int object, - int prim, - float u, - float v, - float time) +ccl_device_inline float3 motion_triangle_smooth_normal( + KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time) { /* get motion info */ int numsteps, numverts; diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index 94d00875f0a..256e7add21e 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN * a closer distance. */ -ccl_device_inline float3 motion_triangle_refine(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg, ccl_private ShaderData *sd, float3 P, float3 D, @@ -92,7 +92,7 @@ ccl_device_noinline ccl_device_inline # endif float3 - motion_triangle_refine_local(ccl_global const KernelGlobals *kg, + motion_triangle_refine_local(KernelGlobals kg, ccl_private ShaderData *sd, float3 P, float3 D, @@ -145,7 +145,7 @@ ccl_device_inline * time and do a ray intersection with the resulting triangle. */ -ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals *kg, +ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, @@ -202,7 +202,7 @@ ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals * Returns whether traversal should be stopped. */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool motion_triangle_intersect_local(ccl_global const KernelGlobals *kg, +ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg, ccl_private LocalIntersection *local_isect, float3 P, float3 dir, diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index 25a68fa7781..fc7c181882e 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN * normals */ /* return 3 triangle vertex normals */ -ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlobals *kg, +ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, ccl_private ShaderData *sd, const float3 P, const float3 D, diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 730c01d4709..34a9d639d9d 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -37,7 +37,7 @@ enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST /* Object to world space transformation */ -ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline Transform object_fetch_transform(KernelGlobals kg, int object, enum ObjectTransform type) { @@ -51,9 +51,7 @@ ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobal /* Lamp to world space transformation */ -ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals *kg, - int lamp, - bool inverse) +ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse) { if (inverse) { return kernel_tex_fetch(__lights, lamp).itfm; @@ -65,7 +63,7 @@ ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals /* Object to world space transformation for motion vectors */ -ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg, int object, enum ObjectVectorTransform type) { @@ -76,9 +74,7 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const /* Motion blurred object transformations */ #ifdef __OBJECT_MOTION__ -ccl_device_inline Transform object_fetch_transform_motion(ccl_global const KernelGlobals *kg, - int object, - float time) +ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time) { const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); @@ -90,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion(ccl_global const Kerne return tfm; } -ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const KernelGlobals *kg, +ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg, int object, float time, ccl_private Transform *itfm) @@ -117,7 +113,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const /* Get transform matrix for shading point. */ -ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline Transform object_get_transform(KernelGlobals kg, ccl_private const ShaderData *sd) { #ifdef __OBJECT_MOTION__ @@ -129,7 +125,7 @@ ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals #endif } -ccl_device_inline Transform object_get_inverse_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline Transform object_get_inverse_transform(KernelGlobals kg, ccl_private const ShaderData *sd) { #ifdef __OBJECT_MOTION__ @@ -142,7 +138,7 @@ ccl_device_inline Transform object_get_inverse_transform(ccl_global const Kernel } /* Transform position from object to world space */ -ccl_device_inline void object_position_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_position_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *P) { @@ -159,7 +155,7 @@ ccl_device_inline void object_position_transform(ccl_global const KernelGlobals /* Transform position from world to object space */ -ccl_device_inline void object_inverse_position_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_inverse_position_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *P) { @@ -176,7 +172,7 @@ ccl_device_inline void object_inverse_position_transform(ccl_global const Kernel /* Transform normal from world to object space */ -ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_inverse_normal_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *N) { @@ -201,7 +197,7 @@ ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGl /* Transform normal from object to world space */ -ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_normal_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *N) { @@ -218,7 +214,7 @@ ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *k /* Transform direction vector from object to world space */ -ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_dir_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *D) { @@ -235,7 +231,7 @@ ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg, /* Transform direction vector from world to object space */ -ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_inverse_dir_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *D) { @@ -252,8 +248,7 @@ ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGloba /* Object center position */ -ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device_inline float3 object_location(KernelGlobals kg, ccl_private const ShaderData *sd) { if (sd->object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); @@ -270,7 +265,7 @@ ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg, /* Color of the object */ -ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float3 object_color(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); @@ -281,7 +276,7 @@ ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int ob /* Pass ID number of object */ -ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_pass_id(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0.0f; @@ -291,7 +286,7 @@ ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int o /* Per lamp random number for shader variation */ -ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, int lamp) +ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp) { if (lamp == LAMP_NONE) return 0.0f; @@ -301,7 +296,7 @@ ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, i /* Per object random number for shader variation */ -ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_random_number(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0.0f; @@ -311,7 +306,7 @@ ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg, /* Particle ID from which this object was generated */ -ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline int object_particle_id(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0; @@ -321,7 +316,7 @@ ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int /* Generated texture coordinate on surface from where object was instanced */ -ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); @@ -333,7 +328,7 @@ ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals * /* UV texture coordinate on surface from where object was instanced */ -ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); @@ -344,7 +339,7 @@ ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int /* Information about mesh for motion blurred triangles and curves */ -ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg, +ccl_device_inline void object_motion_info(KernelGlobals kg, int object, ccl_private int *numsteps, ccl_private int *numverts, @@ -362,7 +357,7 @@ ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg, /* Offset to an objects patch map */ -ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0; @@ -372,7 +367,7 @@ ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *k /* Volume step size */ -ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_volume_density(KernelGlobals kg, int object) { if (object == OBJECT_NONE) { return 1.0f; @@ -381,7 +376,7 @@ ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg return kernel_tex_fetch(__objects, object).volume_density; } -ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object) { if (object == OBJECT_NONE) { return kernel_data.background.volume_step_size; @@ -392,14 +387,14 @@ ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals * /* Pass ID for shader */ -ccl_device int shader_pass_id(ccl_global const KernelGlobals *kg, ccl_private const ShaderData *sd) +ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd) { return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } /* Cryptomatte ID */ -ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0.0f; @@ -407,7 +402,7 @@ ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg return kernel_tex_fetch(__objects, object).cryptomatte_object; } -ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGlobals *kg, int object) +ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object) { if (object == OBJECT_NONE) return 0; @@ -417,42 +412,42 @@ ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGloba /* Particle data from which object was instanced */ -ccl_device_inline uint particle_index(ccl_global const KernelGlobals *kg, int particle) +ccl_device_inline uint particle_index(KernelGlobals kg, int particle) { return kernel_tex_fetch(__particles, particle).index; } -ccl_device float particle_age(ccl_global const KernelGlobals *kg, int particle) +ccl_device float particle_age(KernelGlobals kg, int particle) { return kernel_tex_fetch(__particles, particle).age; } -ccl_device float particle_lifetime(ccl_global const KernelGlobals *kg, int particle) +ccl_device float particle_lifetime(KernelGlobals kg, int particle) { return kernel_tex_fetch(__particles, particle).lifetime; } -ccl_device float particle_size(ccl_global const KernelGlobals *kg, int particle) +ccl_device float particle_size(KernelGlobals kg, int particle) { return kernel_tex_fetch(__particles, particle).size; } -ccl_device float4 particle_rotation(ccl_global const KernelGlobals *kg, int particle) +ccl_device float4 particle_rotation(KernelGlobals kg, int particle) { return kernel_tex_fetch(__particles, particle).rotation; } -ccl_device float3 particle_location(ccl_global const KernelGlobals *kg, int particle) +ccl_device float3 particle_location(KernelGlobals kg, int particle) { return float4_to_float3(kernel_tex_fetch(__particles, particle).location); } -ccl_device float3 particle_velocity(ccl_global const KernelGlobals *kg, int particle) +ccl_device float3 particle_velocity(KernelGlobals kg, int particle) { return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); } -ccl_device float3 particle_angular_velocity(ccl_global const KernelGlobals *kg, int particle) +ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle) { return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); } @@ -474,7 +469,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg, +ccl_device_inline float bvh_instance_push(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, @@ -494,7 +489,7 @@ ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg, /* Transform ray to exit static object in BVH. */ -ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg, +ccl_device_inline float bvh_instance_pop(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, @@ -516,7 +511,7 @@ ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg, /* Same as above, but returns scale factor to apply to multiple intersection distances */ -ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *kg, +ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, @@ -535,7 +530,7 @@ ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *k #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals *kg, +ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, @@ -556,7 +551,7 @@ ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals /* Transform ray to exit motion blurred object in BVH. */ -ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals *kg, +ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, @@ -578,7 +573,7 @@ ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals * /* Same as above, but returns scale factor to apply to multiple intersection distances */ -ccl_device_inline void bvh_instance_motion_pop_factor(ccl_global const KernelGlobals *kg, +ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h index b54eafd6220..bd797ef52ab 100644 --- a/intern/cycles/kernel/geom/geom_patch.h +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -64,7 +64,7 @@ ccl_device_inline int patch_map_resolve_quadrant(float median, /* retrieve PatchHandle from patch coords */ ccl_device_inline PatchHandle -patch_map_find_patch(ccl_global const KernelGlobals *kg, int object, int patch, float u, float v) +patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v) { PatchHandle handle; @@ -201,7 +201,7 @@ ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, /* retrieve patch control indices */ -ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg, +ccl_device_inline int patch_eval_indices(KernelGlobals kg, ccl_private const PatchHandle *handle, int channel, int indices[PATCH_MAX_CONTROL_VERTS]) @@ -218,7 +218,7 @@ ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg, /* evaluate patch basis functions */ -ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg, +ccl_device_inline void patch_eval_basis(KernelGlobals kg, ccl_private const PatchHandle *handle, float u, float v, @@ -257,7 +257,7 @@ ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg, /* generic function for evaluating indices and weights from patch coords */ -ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *kg, +ccl_device_inline int patch_eval_control_verts(KernelGlobals kg, int object, int patch, float u, @@ -279,7 +279,7 @@ ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *k /* functions for evaluating attributes on patches */ -ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg, +ccl_device float patch_eval_float(KernelGlobals kg, ccl_private const ShaderData *sd, int offset, int patch, @@ -316,7 +316,7 @@ ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg, return val; } -ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg, +ccl_device float2 patch_eval_float2(KernelGlobals kg, ccl_private const ShaderData *sd, int offset, int patch, @@ -353,7 +353,7 @@ ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg, return val; } -ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg, +ccl_device float3 patch_eval_float3(KernelGlobals kg, ccl_private const ShaderData *sd, int offset, int patch, @@ -390,7 +390,7 @@ ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg, return val; } -ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg, +ccl_device float4 patch_eval_float4(KernelGlobals kg, ccl_private const ShaderData *sd, int offset, int patch, @@ -427,7 +427,7 @@ ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg, return val; } -ccl_device float4 patch_eval_uchar4(ccl_global const KernelGlobals *kg, +ccl_device float4 patch_eval_uchar4(KernelGlobals kg, ccl_private const ShaderData *sd, int offset, int patch, diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 869b911f76f..91b29c7f990 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN * attributes for performance, mainly for GPU performance to avoid bringing in * heavy volume interpolation code. */ -ccl_device_inline float primitive_surface_attribute_float(ccl_global const KernelGlobals *kg, +ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float *dx, @@ -57,7 +57,7 @@ ccl_device_inline float primitive_surface_attribute_float(ccl_global const Kerne } } -ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const KernelGlobals *kg, +ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float2 *dx, @@ -83,7 +83,7 @@ ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const Ker } } -ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float3 *dx, @@ -109,12 +109,11 @@ ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const Ker } } -ccl_device_forceinline float4 -primitive_surface_attribute_float4(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd, - const AttributeDescriptor desc, - ccl_private float4 *dx, - ccl_private float4 *dy) +ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals kg, + ccl_private const ShaderData *sd, + const AttributeDescriptor desc, + ccl_private float4 *dx, + ccl_private float4 *dy) { if (sd->type & PRIMITIVE_ALL_TRIANGLE) { if (subd_triangle_patch(kg, sd) == ~0) @@ -149,7 +148,7 @@ ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderDat return sd->type == PRIMITIVE_VOLUME; } -ccl_device_inline float primitive_volume_attribute_float(ccl_global const KernelGlobals *kg, +ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc) { @@ -161,7 +160,7 @@ ccl_device_inline float primitive_volume_attribute_float(ccl_global const Kernel } } -ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc) { @@ -173,7 +172,7 @@ ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const Kern } } -ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const KernelGlobals *kg, +ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc) { @@ -188,8 +187,7 @@ ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const Kern /* Default UV coordinate */ -ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device_inline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd) { const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); @@ -202,7 +200,7 @@ ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg, /* Ptex coordinates */ -ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg, +ccl_device bool primitive_ptex(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float2 *uv, ccl_private int *face_id) @@ -225,7 +223,7 @@ ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg, /* Surface tangent */ -ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_private ShaderData *sd) +ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd) { #ifdef __HAIR__ if (sd->type & PRIMITIVE_ALL_CURVE) @@ -257,7 +255,7 @@ ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_priv /* Motion vector for motion pass */ -ccl_device_inline float4 primitive_motion_vector(ccl_global const KernelGlobals *kg, +ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg, ccl_private const ShaderData *sd) { /* center position */ diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h index 2cf60e263c3..e6a5b8f7923 100644 --- a/intern/cycles/kernel/geom/geom_shader_data.h +++ b/intern/cycles/kernel/geom/geom_shader_data.h @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN /* ShaderData setup from incoming ray */ #ifdef __OBJECT_MOTION__ -ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device void shader_setup_object_transforms(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, float time) { @@ -38,7 +38,7 @@ ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *c /* TODO: break this up if it helps reduce register pressure to load data from * global memory as we write it to shader-data. */ -ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, ccl_private const Ray *ccl_restrict ray, ccl_private const Intersection *ccl_restrict isect) @@ -135,7 +135,7 @@ ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl /* ShaderData setup from position sampled on mesh */ -ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline void shader_setup_from_sample(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, const float3 P, const float3 Ng, @@ -247,7 +247,7 @@ ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals * /* ShaderData setup for displacement */ -ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device void shader_setup_from_displace(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, int object, int prim, @@ -281,8 +281,7 @@ ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_r /* ShaderData setup from ray into background */ -ccl_device_inline void shader_setup_from_background(ccl_global const KernelGlobals *ccl_restrict - kg, +ccl_device_inline void shader_setup_from_background(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, const float3 ray_P, const float3 ray_D, @@ -326,7 +325,7 @@ ccl_device_inline void shader_setup_from_background(ccl_global const KernelGloba /* ShaderData setup from point inside volume */ #ifdef __VOLUME__ -ccl_device_inline void shader_setup_from_volume(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline void shader_setup_from_volume(KernelGlobals kg, ccl_private ShaderData *ccl_restrict sd, ccl_private const Ray *ccl_restrict ray) { diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index 927d630fe91..8a9a3f71231 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -22,15 +22,14 @@ CCL_NAMESPACE_BEGIN /* Patch index for triangle, -1 if not subdivision triangle */ -ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd) { return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; } /* UV coords of triangle within patch */ -ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg, +ccl_device_inline void subd_triangle_patch_uv(KernelGlobals kg, ccl_private const ShaderData *sd, float2 uv[3]) { @@ -43,7 +42,7 @@ ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg /* Vertex indices of patch */ -ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGlobals *kg, int patch) +ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch) { uint4 indices; @@ -57,24 +56,21 @@ ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGloba /* Originating face for patch */ -ccl_device_inline uint subd_triangle_patch_face(ccl_global const KernelGlobals *kg, int patch) +ccl_device_inline uint subd_triangle_patch_face(KernelGlobals kg, int patch) { return kernel_tex_fetch(__patches, patch + 4); } /* Number of corners on originating face */ -ccl_device_inline uint subd_triangle_patch_num_corners(ccl_global const KernelGlobals *kg, - int patch) +ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals kg, int patch) { return kernel_tex_fetch(__patches, patch + 5) & 0xffff; } /* Indices of the four corners that are used by the patch */ -ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobals *kg, - int patch, - int corners[4]) +ccl_device_inline void subd_triangle_patch_corners(KernelGlobals kg, int patch, int corners[4]) { uint4 data; @@ -105,7 +101,7 @@ ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobal /* Reading attributes on various subdivision triangle elements */ -ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelGlobals *kg, +ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float *dx, @@ -244,7 +240,7 @@ ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelG } } -ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const KernelGlobals *kg, +ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float2 *dx, @@ -387,7 +383,7 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const Kerne } } -ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const KernelGlobals *kg, +ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float3 *dx, @@ -529,7 +525,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const Kerne } } -ccl_device_noinline float4 subd_triangle_attribute_float4(ccl_global const KernelGlobals *kg, +ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float4 *dx, diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 17f87b7c570..233e901c7ca 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -25,8 +25,7 @@ CCL_NAMESPACE_BEGIN /* Normal on triangle. */ -ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd) +ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderData *sd) { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); @@ -44,7 +43,7 @@ ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg, } /* Point and normal on triangle. */ -ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg, +ccl_device_inline void triangle_point_normal(KernelGlobals kg, int object, int prim, float u, @@ -76,7 +75,7 @@ ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg, /* Triangle vertex locations */ -ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int prim, float3 P[3]) +ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); @@ -86,7 +85,7 @@ ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int /* Triangle vertex locations and vertex normals */ -ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlobals *kg, +ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg, int prim, float3 P[3], float3 N[3]) @@ -103,7 +102,7 @@ ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlob /* Interpolate smooth vertex normal from vertices */ ccl_device_inline float3 -triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim, float u, float v) +triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v) { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); @@ -116,12 +115,8 @@ triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim, return is_zero(N) ? Ng : N; } -ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd, - float3 Ng, - int prim, - float u, - float v) +ccl_device_inline float3 triangle_smooth_normal_unnormalized( + KernelGlobals kg, ccl_private const ShaderData *sd, float3 Ng, int prim, float u, float v) { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); @@ -143,7 +138,7 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const Ke /* Ray differentials on triangle */ -ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg, +ccl_device_inline void triangle_dPdudv(KernelGlobals kg, int prim, ccl_private float3 *dPdu, ccl_private float3 *dPdv) @@ -161,7 +156,7 @@ ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg, /* Reading attributes on various triangle elements */ -ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg, +ccl_device float triangle_attribute_float(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float *dx, @@ -211,7 +206,7 @@ ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg, } } -ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg, +ccl_device float2 triangle_attribute_float2(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float2 *dx, @@ -261,7 +256,7 @@ ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg, } } -ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg, +ccl_device float3 triangle_attribute_float3(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float3 *dx, @@ -311,7 +306,7 @@ ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg, } } -ccl_device float4 triangle_attribute_float4(ccl_global const KernelGlobals *kg, +ccl_device float4 triangle_attribute_float4(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc, ccl_private float4 *dx, diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index f637206da19..fee629cc75a 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg, +ccl_device_inline bool triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, @@ -85,7 +85,7 @@ ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg, */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals *kg, +ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, ccl_private LocalIntersection *local_isect, float3 P, float3 dir, @@ -200,7 +200,7 @@ ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals * * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf */ -ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 triangle_refine(KernelGlobals kg, ccl_private ShaderData *sd, float3 P, float3 D, @@ -256,7 +256,7 @@ ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg, /* Same as above, except that t is assumed to be in object space for * instancing. */ -ccl_device_inline float3 triangle_refine_local(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 triangle_refine_local(KernelGlobals kg, ccl_private ShaderData *sd, float3 P, float3 D, diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index c466c3fb07a..4e83ad6acb3 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN /* Return position normalized to 0..1 in mesh bounds */ -ccl_device_inline float3 volume_normalized_position(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 volume_normalized_position(KernelGlobals kg, ccl_private const ShaderData *sd, float3 P) { @@ -70,7 +70,7 @@ ccl_device float3 volume_attribute_value_to_float3(const float4 value) } } -ccl_device float4 volume_attribute_float4(ccl_global const KernelGlobals *kg, +ccl_device float4 volume_attribute_float4(KernelGlobals kg, ccl_private const ShaderData *sd, const AttributeDescriptor desc) { diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h index c822823de9c..df3c2103c5b 100644 --- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h +++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h @@ -43,7 +43,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max) /* Return false to indicate that this pixel is finished. * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known * that the pixel did converge. */ -ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, +ccl_device bool integrator_init_from_bake(KernelGlobals kg, + IntegratorState state, ccl_global const KernelWorkTile *ccl_restrict tile, ccl_global float *render_buffer, const int x, @@ -53,18 +54,18 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, PROFILING_INIT(kg, PROFILING_RAY_SETUP); /* Initialize path state to give basic buffer access and allow early outputs. */ - path_state_init(INTEGRATOR_STATE_PASS, tile, x, y); + path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) { + if (!kernel_need_sample_pixel(kg, state, render_buffer)) { return false; } /* Always count the sample, even if the camera sample will reject the ray. */ - const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample); + const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample); /* Setup render buffers. */ - const int index = INTEGRATOR_STATE(path, render_pixel_index); + const int index = INTEGRATOR_STATE(state, path, render_pixel_index); const int pass_stride = kernel_data.film.pass_stride; render_buffer += index * pass_stride; @@ -91,7 +92,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, } /* Initialize path state for path integration. */ - path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash); + path_state_init_integrator(kg, state, sample, rng_hash); /* Barycentric UV with sub-pixel offset. */ float u = primitive[2]; @@ -131,7 +132,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, ray.time = 0.5f; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); - integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_ray(kg, state, &ray); /* Setup next kernel to execute. */ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); @@ -169,7 +170,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, ray.dD = differential_zero_compact(); /* Write ray. */ - integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_ray(kg, state, &ray); /* Setup and write intersection. */ Intersection isect ccl_optional_struct_init; @@ -182,7 +183,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, #ifdef __EMBREE__ isect.Ng = Ng; #endif - integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect); + integrator_state_write_isect(kg, state, &isect); /* Setup next kernel to execute. */ const int shader_index = shader & SHADER_MASK; diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h index 291f0f106f0..5bab6b2e2fd 100644 --- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h +++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline void integrate_camera_sample(KernelGlobals kg, const int sample, const int x, const int y, @@ -63,7 +63,8 @@ ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *c /* Return false to indicate that this pixel is finished. * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known * that the pixel did converge. */ -ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS, +ccl_device bool integrator_init_from_camera(KernelGlobals kg, + IntegratorState state, ccl_global const KernelWorkTile *ccl_restrict tile, ccl_global float *render_buffer, const int x, @@ -73,10 +74,10 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS, PROFILING_INIT(kg, PROFILING_RAY_SETUP); /* Initialize path state to give basic buffer access and allow early outputs. */ - path_state_init(INTEGRATOR_STATE_PASS, tile, x, y); + path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) { + if (!kernel_need_sample_pixel(kg, state, render_buffer)) { return false; } @@ -85,7 +86,7 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS, * This logic allows to both count actual number of samples per pixel, and to add samples to this * pixel after it was converged and samples were added somewhere else (in which case the * `scheduled_sample` will be different from actual number of samples in this pixel). */ - const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample); + const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample); /* Initialize random number seed for path. */ const uint rng_hash = path_rng_hash_init(kg, sample, x, y); @@ -99,11 +100,11 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS, } /* Write camera ray to state. */ - integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_ray(kg, state, &ray); } /* Initialize path state for path integration. */ - path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash); + path_state_init_integrator(kg, state, sample, rng_hash); /* Continue with intersect_closest kernel, optionally initializing volume * stack before that if the camera may be inside a volume. */ diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h index 760c08159e3..e915d984e1d 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -29,7 +29,8 @@ CCL_NAMESPACE_BEGIN template -ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg, + IntegratorState state, const int shader_flags) { @@ -37,12 +38,12 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS * We continue evaluating emissive/transparent surfaces and volumes, similar * to direct lighting. Only if we know there are none can we terminate the * path immediately. */ - if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + if (path_state_ao_bounce(kg, state)) { if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) { - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } - else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME; + else if (!integrator_state_volume_stack_is_empty(kg, state)) { + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME; } else { return true; @@ -51,14 +52,14 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS /* Load random number state. */ RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); /* We perform path termination in this kernel to avoid launching shade_surface * and evaluating the shader when not needed. Only for emission and transparent * surfaces in front of emission do we need to evaluate the shader, since we * perform MIS as part of indirect rays. */ - const int path_flag = INTEGRATOR_STATE(path, flag); - const float probability = path_state_continuation_probability(INTEGRATOR_STATE_PASS, path_flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); + const float probability = path_state_continuation_probability(kg, state, path_flag); if (probability != 1.0f) { const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE); @@ -66,11 +67,11 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS if (probability == 0.0f || terminate >= probability) { if (shader_flags & SD_HAS_EMISSION) { /* Mark path to be terminated right after shader evaluation on the surface. */ - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE; } - else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + else if (!integrator_state_volume_stack_is_empty(kg, state)) { /* TODO: only do this for emissive volumes. */ - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME; } else { return true; @@ -85,7 +86,8 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS * leads to poor performance with CUDA atomics. */ template ccl_device_forceinline void integrator_intersect_shader_next_kernel( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private const Intersection *ccl_restrict isect, const int shader, const int shader_flags) @@ -122,9 +124,9 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel( #ifdef __SHADOW_CATCHER__ const int object_flags = intersection_get_object_flags(kg, isect); - if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) { + if (kernel_shadow_catcher_split(kg, state, object_flags)) { if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) { - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } @@ -137,7 +139,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel( /* If the split happened after bounce through a transparent object it's possible to have shadow * patch. Make sure it is properly re-scheduled on the split path. */ - const int shadow_kernel = INTEGRATOR_STATE(shadow_path, queued_kernel); + const int shadow_kernel = INTEGRATOR_STATE(state, shadow_path, queued_kernel); if (shadow_kernel != 0) { INTEGRATOR_SHADOW_PATH_INIT(shadow_kernel); } @@ -145,21 +147,21 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel( #endif } -ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) +ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST); /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; - integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_ray(kg, state, &ray); kernel_assert(ray.t != 0.0f); - const uint visibility = path_state_ray_visibility(INTEGRATOR_STATE_PASS); - const int last_isect_prim = INTEGRATOR_STATE(isect, prim); - const int last_isect_object = INTEGRATOR_STATE(isect, object); + const uint visibility = path_state_ray_visibility(state); + const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim); + const int last_isect_object = INTEGRATOR_STATE(state, isect, object); /* Trick to use short AO rays to approximate indirect light at the end of the path. */ - if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + if (path_state_ao_bounce(kg, state)) { ray.t = kernel_data.integrator.ao_bounces_distance; const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance; @@ -181,8 +183,8 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) if (kernel_data.integrator.use_lamp_mis) { /* NOTE: if we make lights visible to camera rays, we'll need to initialize * these in the path_state_init. */ - const int last_type = INTEGRATOR_STATE(isect, type); - const int path_flag = INTEGRATOR_STATE(path, flag); + const int last_type = INTEGRATOR_STATE(state, isect, type); + const int path_flag = INTEGRATOR_STATE(state, path, flag); hit = lights_intersect( kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) || @@ -190,16 +192,16 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) } /* Write intersection result into global integrator state memory. */ - integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect); + integrator_state_write_isect(kg, state, &isect); #ifdef __VOLUME__ - if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + if (!integrator_state_volume_stack_is_empty(kg, state)) { const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP); const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE; const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; if (!integrator_intersect_terminate( - INTEGRATOR_STATE_PASS, flags)) { + kg, state, flags)) { /* Continue with volume kernel if we are inside a volume, regardless * if we hit anything. */ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, @@ -225,9 +227,9 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) const int flags = kernel_tex_fetch(__shaders, shader).flags; if (!integrator_intersect_terminate( - INTEGRATOR_STATE_PASS, flags)) { + kg, state, flags)) { integrator_intersect_shader_next_kernel( - INTEGRATOR_STATE_PASS, &isect, shader, flags); + kg, state, &isect, shader, flags); return; } else { diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h index 3ebd21e4651..06f58f88bc8 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h @@ -19,19 +19,21 @@ CCL_NAMESPACE_BEGIN /* Visibility for the shadow ray. */ -ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg, + ConstIntegratorState state) { uint visibility = PATH_RAY_SHADOW; #ifdef __SHADOW_CATCHER__ - const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag); visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility); #endif return visibility; } -ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS, +ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg, + IntegratorState state, ccl_private const Ray *ray, const uint visibility) { @@ -46,22 +48,24 @@ ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS, const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect); if (!opaque_hit) { - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0; } return opaque_hit; } -ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg, + ConstIntegratorState state) { const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce); + const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce); return max(transparent_max_bounce - transparent_bounce - 1, 0); } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, +ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg, + IntegratorState state, ccl_private const Ray *ray, const uint visibility) { @@ -69,7 +73,7 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, /* Limit the number hits to the max transparent bounces allowed and the size that we * have available in the integrator state. */ - const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS); + const uint max_transparent_hits = integrate_shadow_max_transparent_hits(kg, state); const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE); uint num_hits = 0; bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits); @@ -88,41 +92,39 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, /* Write intersection result into global integrator state memory. * More efficient may be to do this directly from the intersection kernel. */ for (int hit = 0; hit < num_recorded_hits; hit++) { - integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit); + integrator_state_write_shadow_isect(state, &isect[hit], hit); } } - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits; } else { - INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0; } return opaque_hit; } #endif -ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS) +ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW); /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; - integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_shadow_ray(kg, state, &ray); /* Compute visibility. */ - const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS); + const uint visibility = integrate_intersect_shadow_visibility(kg, state); #ifdef __TRANSPARENT_SHADOWS__ /* TODO: compile different kernels depending on this? Especially for OptiX * conditional trace calls are bad. */ - const bool opaque_hit = - (kernel_data.integrator.transparent_shadows) ? - integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) : - integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility); + const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ? + integrate_intersect_shadow_transparent(kg, state, &ray, visibility) : + integrate_intersect_shadow_opaque(kg, state, &ray, visibility); #else - const bool opaque_hit = integrate_intersect_shadow_opaque( - INTEGRATOR_STATE_PASS, &ray, visibility); + const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility); #endif if (opaque_hit) { diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h index 7c090952dc7..b575e7fd1e6 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h @@ -20,12 +20,12 @@ CCL_NAMESPACE_BEGIN -ccl_device void integrator_intersect_subsurface(INTEGRATOR_STATE_ARGS) +ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE); #ifdef __SUBSURFACE__ - if (subsurface_scatter(INTEGRATOR_STATE_PASS)) { + if (subsurface_scatter(kg, state)) { return; } #endif diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h index 192e9c6ab43..7def3e2f3f3 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h @@ -23,7 +23,8 @@ CCL_NAMESPACE_BEGIN -ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, + IntegratorState state, const float3 from_P, const float3 to_P) { @@ -52,7 +53,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A for (uint hit = 0; hit < num_hits; ++hit, ++isect) { shader_setup_from_ray(kg, stack_sd, &volume_ray, isect); - volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd); + volume_stack_enter_exit(kg, state, stack_sd); } } #else @@ -61,7 +62,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A while (step < 2 * volume_stack_size && scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) { shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect); - volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd); + volume_stack_enter_exit(kg, state, stack_sd); /* Move ray forward. */ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); @@ -73,7 +74,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A #endif } -ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) +ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK); @@ -81,16 +82,16 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage); Ray volume_ray ccl_optional_struct_init; - integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray); + integrator_state_read_ray(kg, state, &volume_ray); volume_ray.t = FLT_MAX; - const uint visibility = (INTEGRATOR_STATE(path, flag) & PATH_RAY_ALL_VISIBILITY); + const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY); int stack_index = 0, enclosed_index = 0; /* Write background shader. */ if (kernel_data.background.volume_shader != SHADER_NONE) { const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader}; - integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + integrator_state_write_volume_stack(state, stack_index, new_entry); stack_index++; } @@ -121,7 +122,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) } for (int i = 0; i < stack_index && need_add; ++i) { /* Don't add intersections twice. */ - VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + VolumeStack entry = integrator_state_read_volume_stack(state, i); if (entry.object == stack_sd->object) { need_add = false; break; @@ -129,7 +130,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) } if (need_add && stack_index < volume_stack_size - 1) { const VolumeStack new_entry = {stack_sd->object, stack_sd->shader}; - integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + integrator_state_write_volume_stack(state, stack_index, new_entry); ++stack_index; } } @@ -169,7 +170,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) } for (int i = 0; i < stack_index && need_add; ++i) { /* Don't add intersections twice. */ - VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + VolumeStack entry = integrator_state_read_volume_stack(state, i); if (entry.object == stack_sd->object) { need_add = false; break; @@ -177,7 +178,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) } if (need_add) { const VolumeStack new_entry = {stack_sd->object, stack_sd->shader}; - integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + integrator_state_write_volume_stack(state, stack_index, new_entry); ++stack_index; } } @@ -196,7 +197,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) /* Write terminator. */ const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE}; - integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + integrator_state_write_volume_stack(state, stack_index, new_entry); INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h index 91363ea1c7f..a3b2b1f9e90 100644 --- a/intern/cycles/kernel/integrator/integrator_megakernel.h +++ b/intern/cycles/kernel/integrator/integrator_megakernel.h @@ -29,7 +29,8 @@ CCL_NAMESPACE_BEGIN -ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_megakernel(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { /* Each kernel indicates the next kernel to execute, so here we simply @@ -38,46 +39,46 @@ ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS, * TODO: investigate if we can use device side enqueue for GPUs to avoid * having to compile this big kernel. */ while (true) { - if (INTEGRATOR_STATE(shadow_path, queued_kernel)) { + if (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) { /* First handle any shadow paths before we potentially create more shadow paths. */ - switch (INTEGRATOR_STATE(shadow_path, queued_kernel)) { + switch (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) { case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: - integrator_intersect_shadow(INTEGRATOR_STATE_PASS); + integrator_intersect_shadow(kg, state); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: - integrator_shade_shadow(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_shadow(kg, state, render_buffer); break; default: kernel_assert(0); break; } } - else if (INTEGRATOR_STATE(path, queued_kernel)) { + else if (INTEGRATOR_STATE(state, path, queued_kernel)) { /* Then handle regular path kernels. */ - switch (INTEGRATOR_STATE(path, queued_kernel)) { + switch (INTEGRATOR_STATE(state, path, queued_kernel)) { case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: - integrator_intersect_closest(INTEGRATOR_STATE_PASS); + integrator_intersect_closest(kg, state); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: - integrator_shade_background(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_background(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: - integrator_shade_surface(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_surface(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: - integrator_shade_volume(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_volume(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: - integrator_shade_surface_raytrace(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_surface_raytrace(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: - integrator_shade_light(INTEGRATOR_STATE_PASS, render_buffer); + integrator_shade_light(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: - integrator_intersect_subsurface(INTEGRATOR_STATE_PASS); + integrator_intersect_subsurface(kg, state); break; case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: - integrator_intersect_volume_stack(INTEGRATOR_STATE_PASS); + integrator_intersect_volume_stack(kg, state); break; default: kernel_assert(0); diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h index a898f3fb2fc..d98e53e6bbf 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_background.h +++ b/intern/cycles/kernel/integrator/integrator_shade_background.h @@ -23,12 +23,13 @@ CCL_NAMESPACE_BEGIN -ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS, +ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { #ifdef __BACKGROUND__ const int shader = kernel_data.background.surface_shader; - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Use visibility flag to skip lights. */ if (shader & SHADER_EXCLUDE_ANY) { @@ -54,14 +55,14 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS, PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); shader_setup_from_background(kg, emission_sd, - INTEGRATOR_STATE(ray, P), - INTEGRATOR_STATE(ray, D), - INTEGRATOR_STATE(ray, time)); + INTEGRATOR_STATE(state, ray, P), + INTEGRATOR_STATE(state, ray, D), + INTEGRATOR_STATE(state, ray, time)); PROFILING_SHADER(emission_sd->object, emission_sd->shader); PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); shader_eval_surface( - INTEGRATOR_STATE_PASS, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); + kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); L = shader_background_eval(emission_sd); } @@ -69,11 +70,12 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS, /* Background MIS weights. */ # ifdef __BACKGROUND_MIS__ /* Check if background light exists or if we should skip pdf. */ - if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) { - const float3 ray_P = INTEGRATOR_STATE(ray, P); - const float3 ray_D = INTEGRATOR_STATE(ray, D); - const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); - const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t); + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && + kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); /* multiple importance sampling, get background light pdf for ray * direction, and compute weight with respect to BSDF pdf */ @@ -90,7 +92,8 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS, #endif } -ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS, +ccl_device_inline void integrate_background(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { /* Accumulate transparency for transparent background. We can skip background @@ -99,11 +102,11 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS, float transparent = 0.0f; const bool is_transparent_background_ray = kernel_data.background.transparent && - (INTEGRATOR_STATE(path, flag) & + (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TRANSPARENT_BACKGROUND); if (is_transparent_background_ray) { - transparent = average(INTEGRATOR_STATE(path, throughput)); + transparent = average(INTEGRATOR_STATE(state, path, throughput)); #ifdef __PASSES__ eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)); @@ -113,32 +116,31 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS, } /* Evaluate background shader. */ - float3 L = (eval_background) ? - integrator_eval_background_shader(INTEGRATOR_STATE_PASS, render_buffer) : - zero_float3(); + float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : + zero_float3(); /* When using the ao bounces approximation, adjust background * shader intensity with ao factor. */ - if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + if (path_state_ao_bounce(kg, state)) { L *= kernel_data.integrator.ao_bounces_factor; } /* Write to render buffer. */ - kernel_accum_background( - INTEGRATOR_STATE_PASS, L, transparent, is_transparent_background_ray, render_buffer); + kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); } -ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS, +ccl_device_inline void integrate_distant_lights(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { - const float3 ray_D = INTEGRATOR_STATE(ray, D); - const float ray_time = INTEGRATOR_STATE(ray, time); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float ray_time = INTEGRATOR_STATE(state, ray, time); LightSample ls ccl_optional_struct_init; for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) { /* Use visibility flag to skip lights. */ #ifdef __PASSES__ - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (ls.shader & SHADER_EXCLUDE_ANY) { if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || @@ -156,8 +158,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval( - INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time); + float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } @@ -166,33 +167,34 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS, if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ - const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf); light_eval *= mis_weight; } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); - kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_accum_emission(kg, state, throughput, light_eval, render_buffer); } } } -ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_shade_background(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP); /* TODO: unify these in a single loop to only have a single shader evaluation call. */ - integrate_distant_lights(INTEGRATOR_STATE_PASS, render_buffer); - integrate_background(INTEGRATOR_STATE_PASS, render_buffer); + integrate_distant_lights(kg, state, render_buffer); + integrate_background(kg, state, render_buffer); #ifdef __SHADOW_CATCHER__ - if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { - INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; - const int isect_prim = INTEGRATOR_STATE(isect, prim); - const int isect_type = INTEGRATOR_STATE(isect, type); + const int isect_prim = INTEGRATOR_STATE(state, isect, prim); + const int isect_type = INTEGRATOR_STATE(state, isect, type); const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type); const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h index d8f8da63023..4f0f5a39756 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_light.h +++ b/intern/cycles/kernel/integrator/integrator_shade_light.h @@ -23,29 +23,30 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS, +ccl_device_inline void integrate_light(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { /* Setup light sample. */ Intersection isect ccl_optional_struct_init; - integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + integrator_state_read_isect(kg, state, &isect); - float3 ray_P = INTEGRATOR_STATE(ray, P); - const float3 ray_D = INTEGRATOR_STATE(ray, D); - const float ray_time = INTEGRATOR_STATE(ray, time); + float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float ray_time = INTEGRATOR_STATE(state, ray, time); /* Advance ray beyond light. */ /* TODO: can we make this more numerically robust to avoid reintersecting the * same light in some cases? */ const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D); - INTEGRATOR_STATE_WRITE(ray, P) = new_ray_P; - INTEGRATOR_STATE_WRITE(ray, t) -= isect.t; + INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P; + INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t; /* Set position to where the BSDF was sampled, for correct MIS PDF. */ - const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t); + const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); ray_P -= ray_D * mis_ray_t; isect.t += mis_ray_t; - INTEGRATOR_STATE_WRITE(path, mis_ray_t) = mis_ray_t + isect.t; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t; LightSample ls ccl_optional_struct_init; const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); @@ -56,7 +57,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS, /* Use visibility flag to skip lights. */ #ifdef __PASSES__ - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (ls.shader & SHADER_EXCLUDE_ANY) { if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || @@ -73,7 +74,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time); + float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } @@ -82,22 +83,23 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS, if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ - const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf); light_eval *= mis_weight; } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); - kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_accum_emission(kg, state, throughput, light_eval, render_buffer); } -ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_shade_light(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP); - integrate_light(INTEGRATOR_STATE_PASS, render_buffer); + integrate_light(kg, state, render_buffer); /* TODO: we could get stuck in an infinite loop if there are precision issues * and the same light is hit again. @@ -105,8 +107,8 @@ ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS, * As a workaround count this as a transparent bounce. It makes some sense * to interpret lights as transparent surfaces (and support making them opaque), * but this needs to be revisited. */ - uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce; + uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h index 3857b522b25..cdbe85f6b8c 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h +++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h @@ -29,7 +29,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits) } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit) +ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, + IntegratorState state, + const int hit) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); @@ -43,22 +45,22 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A /* Setup shader data at surface. */ Intersection isect ccl_optional_struct_init; - integrator_state_read_shadow_isect(INTEGRATOR_STATE_PASS, &isect, hit); + integrator_state_read_shadow_isect(state, &isect, hit); Ray ray ccl_optional_struct_init; - integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_shadow_ray(kg, state, &ray); shader_setup_from_ray(kg, shadow_sd, &ray, &isect); /* Evaluate shader. */ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { shader_eval_surface( - INTEGRATOR_STATE_PASS, shadow_sd, NULL, PATH_RAY_SHADOW); + kg, state, shadow_sd, NULL, PATH_RAY_SHADOW); } # ifdef __VOLUME__ /* Exit/enter volume. */ - shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, shadow_sd); + shadow_volume_stack_enter_exit(kg, state, shadow_sd); # endif /* Compute transparency from closures. */ @@ -66,7 +68,8 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A } # ifdef __VOLUME__ -ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS, +ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, + IntegratorState state, const int hit, const int num_recorded_hits, ccl_private float3 *ccl_restrict @@ -80,26 +83,29 @@ ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS /* Setup shader data. */ Ray ray ccl_optional_struct_init; - integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_shadow_ray(kg, state, &ray); /* Modify ray position and length to match current segment. */ - const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(shadow_isect, hit - 1, t); - const float end_t = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(shadow_isect, hit, t) : - ray.t; + const float start_t = (hit == 0) ? 0.0f : + INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); + const float end_t = (hit < num_recorded_hits) ? + INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : + ray.t; ray.P += start_t * ray.D; ray.t = end_t - start_t; shader_setup_from_volume(kg, shadow_sd, &ray); - const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) { - return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); - }); + const float step_size = volume_stack_step_size( + kg, state, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); }); - volume_shadow_heterogeneous(INTEGRATOR_STATE_PASS, &ray, shadow_sd, throughput, step_size); + volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size); } # endif -ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits) +ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, + IntegratorState state, + const int num_hits) { /* Accumulate shadow for transparent surfaces. */ const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE); @@ -108,29 +114,28 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const /* Volume shaders. */ if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) { # ifdef __VOLUME__ - if (!integrator_state_shadow_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { - float3 throughput = INTEGRATOR_STATE(shadow_path, throughput); - integrate_transparent_volume_shadow( - INTEGRATOR_STATE_PASS, hit, num_recorded_hits, &throughput); + if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) { + float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput); if (is_zero(throughput)) { return true; } - INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput; } # endif } /* Surface shaders. */ if (hit < num_recorded_hits) { - const float3 shadow = integrate_transparent_surface_shadow(INTEGRATOR_STATE_PASS, hit); - const float3 throughput = INTEGRATOR_STATE(shadow_path, throughput) * shadow; + const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit); + const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; if (is_zero(throughput)) { return true; } - INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; - INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) += 1; + INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1; } /* Note we do not need to check max_transparent_bounce here, the number @@ -141,26 +146,27 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const if (shadow_intersections_has_remaining(num_hits)) { /* There are more hits that we could not recorded due to memory usage, * adjust ray to intersect again from the last hit. */ - const float last_hit_t = INTEGRATOR_STATE_ARRAY(shadow_isect, num_recorded_hits - 1, t); - const float3 ray_P = INTEGRATOR_STATE(shadow_ray, P); - const float3 ray_D = INTEGRATOR_STATE(shadow_ray, D); - INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D); - INTEGRATOR_STATE_WRITE(shadow_ray, t) -= last_hit_t; + const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t); + const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D); + INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D); + INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t; } return false; } #endif /* __TRANSPARENT_SHADOWS__ */ -ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_shade_shadow(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP); - const int num_hits = INTEGRATOR_STATE(shadow_path, num_hits); + const int num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits); #ifdef __TRANSPARENT_SHADOWS__ /* Evaluate transparent shadows. */ - const bool opaque = integrate_transparent_shadow(INTEGRATOR_STATE_PASS, num_hits); + const bool opaque = integrate_transparent_shadow(kg, state, num_hits); if (opaque) { INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; @@ -174,7 +180,7 @@ ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS, return; } else { - kernel_accum_light(INTEGRATOR_STATE_PASS, render_buffer); + kernel_accum_light(kg, state, render_buffer); INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h index 0d739517592..bc97fde0e4a 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_surface.h +++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h @@ -28,33 +28,35 @@ CCL_NAMESPACE_BEGIN -ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd) { Intersection isect ccl_optional_struct_init; - integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + integrator_state_read_isect(kg, state, &isect); Ray ray ccl_optional_struct_init; - integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_ray(kg, state, &ray); shader_setup_from_ray(kg, sd, &ray, &isect); } #ifdef __HOLDOUT__ -ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd, ccl_global float *ccl_restrict render_buffer) { /* Write holdout transparency to render buffer and stop if fully holdout. */ - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { const float3 holdout_weight = shader_holdout_apply(kg, sd); if (kernel_data.background.transparent) { - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); const float transparent = average(holdout_weight * throughput); - kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer); + kernel_accum_transparent(kg, state, transparent, render_buffer); } if (isequal_float3(holdout_weight, one_float3())) { return false; @@ -66,12 +68,13 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG #endif /* __HOLDOUT__ */ #ifdef __EMISSION__ -ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, + ConstIntegratorState state, ccl_private const ShaderData *sd, ccl_global float *ccl_restrict render_buffer) { - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Evaluate emissive closure. */ float3 L = shader_emissive_eval(sd); @@ -83,8 +86,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) # endif { - const float bsdf_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); - const float t = sd->ray_length + INTEGRATOR_STATE(path, mis_ray_t); + const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t); /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ @@ -94,15 +97,16 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR L *= mis_weight; } - const float3 throughput = INTEGRATOR_STATE(path, throughput); - kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, L, render_buffer); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_accum_emission(kg, state, throughput, L, render_buffer); } #endif /* __EMISSION__ */ #ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ -ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state) { @@ -114,8 +118,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS /* Sample position on a light. */ LightSample ls ccl_optional_struct_init; { - const int path_flag = INTEGRATOR_STATE(path, flag); - const uint bounce = INTEGRATOR_STATE(path, bounce); + const int path_flag = INTEGRATOR_STATE(state, path, flag); + const uint bounce = INTEGRATOR_STATE(state, path, bounce); float light_u, light_v; path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); @@ -135,8 +139,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS * non-constant light sources. */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval( - INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time); + const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); if (is_zero(light_eval)) { return; } @@ -165,39 +168,39 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS const bool is_light = light_sample_is_light(&ls); /* Copy volume stack and enter/exit volume. */ - integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS); + integrator_state_copy_volume_stack_to_shadow(kg, state); if (is_transmission) { # ifdef __VOLUME__ - shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, sd); + shadow_volume_stack_enter_exit(kg, state, sd); # endif } /* Write shadow ray and associated state to global memory. */ - integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_shadow_ray(kg, state, &ray); /* Copy state from main path to shadow path. */ - const uint16_t bounce = INTEGRATOR_STATE(path, bounce); - const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce); - uint32_t shadow_flag = INTEGRATOR_STATE(path, flag); + const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); + const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); + uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS; - const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&bsdf_eval); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { const float3 diffuse_glossy_ratio = (bounce == 0) ? bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) : - INTEGRATOR_STATE(path, diffuse_glossy_ratio); - INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); + INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; } - INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag; - INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce; - INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce; - INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput; if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { - INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput; } /* Branch off shadow kernel. */ @@ -207,7 +210,10 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS /* Path tracing: bounce off or through surface with new direction. */ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( - INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state) + KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private const RNGState *rng_state) { /* Sample BSDF or BSSRDF. */ if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) { @@ -221,7 +227,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( #ifdef __SUBSURFACE__ /* BSSRDF closure, we schedule subsurface intersection kernel. */ if (CLOSURE_IS_BSSRDF(sc->type)) { - return subsurface_bounce(INTEGRATOR_STATE_PASS, sd, sc); + return subsurface_bounce(kg, state, sd, sc); } #endif @@ -240,63 +246,64 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } /* Setup ray. Note that clipping works through transparent bounces. */ - INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); - INTEGRATOR_STATE_WRITE(ray, D) = normalize(bsdf_omega_in); - INTEGRATOR_STATE_WRITE(ray, t) = (label & LABEL_TRANSPARENT) ? - INTEGRATOR_STATE(ray, t) - sd->ray_length : - FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, + (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); + INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ? + INTEGRATOR_STATE(state, ray, t) - sd->ray_length : + FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(bsdf_domega_in); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); #endif /* Update throughput. */ - float3 throughput = INTEGRATOR_STATE(path, throughput); + float3 throughput = INTEGRATOR_STATE(state, path, throughput); throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; - INTEGRATOR_STATE_WRITE(path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - if (INTEGRATOR_STATE(path, bounce) == 0) { - INTEGRATOR_STATE_WRITE(path, - diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(&bsdf_eval); + if (INTEGRATOR_STATE(state, path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio( + &bsdf_eval); } } /* Update path state */ if (label & LABEL_TRANSPARENT) { - INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; } else { - INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = bsdf_pdf; - INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f; - INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(bsdf_pdf, - INTEGRATOR_STATE(path, min_ray_pdf)); + INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( + bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); } - path_state_next(INTEGRATOR_STATE_PASS, label); + path_state_next(kg, state, label); return label; } #ifdef __VOLUME__ -ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state, ccl_private ShaderData *sd) { - if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) { + if (!path_state_volume_next(state)) { return LABEL_NONE; } /* Setup ray position, direction stays unchanged. */ - INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, -sd->Ng); + INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng); /* Clipping works through transparent. */ - INTEGRATOR_STATE_WRITE(ray, t) -= sd->ray_length; + INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length; # ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); # endif - INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; return LABEL_TRANSMIT | LABEL_TRANSPARENT; } @@ -304,17 +311,19 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT #if defined(__AO__) && defined(__SHADER_RAYTRACE__) ccl_device_forceinline void integrate_surface_ao_pass( - INTEGRATOR_STATE_CONST_ARGS, + KernelGlobals kg, + ConstIntegratorState state, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, ccl_global float *ccl_restrict render_buffer) { # ifdef __KERNEL_OPTIX__ - optixDirectCall(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer); + optixDirectCall(2, kg, state, sd, rng_state, render_buffer); } extern "C" __device__ void __direct_callable__ao_pass( - INTEGRATOR_STATE_CONST_ARGS, + KernelGlobals kg, + ConstIntegratorState state, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, ccl_global float *ccl_restrict render_buffer) @@ -339,9 +348,8 @@ extern "C" __device__ void __direct_callable__ao_pass( Intersection isect ccl_optional_struct_init; if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { - ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); - const float3 throughput = INTEGRATOR_STATE(path, throughput); + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, throughput); } } @@ -349,7 +357,8 @@ extern "C" __device__ void __direct_callable__ao_pass( #endif /* defined(__AO__) && defined(__SHADER_RAYTRACE__) */ template -ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, +ccl_device bool integrate_surface(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { @@ -357,7 +366,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, /* Setup shader data. */ ShaderData sd; - integrate_surface_shader_setup(INTEGRATOR_STATE_PASS, &sd); + integrate_surface_shader_setup(kg, state, &sd); PROFILING_SHADER(sd.object, sd.shader); int continue_path_label = 0; @@ -366,7 +375,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, #ifdef __VOLUME__ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { #endif - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); #ifdef __SUBSURFACE__ /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */ @@ -375,23 +384,23 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, { /* Evaluate shader. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL); - shader_eval_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag); + shader_eval_surface(kg, state, &sd, render_buffer, path_flag); } #ifdef __SUBSURFACE__ if (path_flag & PATH_RAY_SUBSURFACE) { /* When coming from inside subsurface scattering, setup a diffuse * closure to perform lighting at the exit point. */ - subsurface_shader_data_setup(INTEGRATOR_STATE_PASS, &sd, path_flag); - INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SUBSURFACE; + subsurface_shader_data_setup(kg, state, &sd, path_flag); + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SUBSURFACE; } #endif - shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd); + shader_prepare_surface_closures(kg, state, &sd); #ifdef __HOLDOUT__ /* Evaluate holdout. */ - if (!integrate_surface_holdout(INTEGRATOR_STATE_PASS, &sd, render_buffer)) { + if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) { return false; } #endif @@ -399,19 +408,19 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, #ifdef __EMISSION__ /* Write emission. */ if (sd.flag & SD_EMISSION) { - integrate_surface_emission(INTEGRATOR_STATE_PASS, &sd, render_buffer); + integrate_surface_emission(kg, state, &sd, render_buffer); } #endif #ifdef __PASSES__ /* Write render passes. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); - kernel_write_data_passes(INTEGRATOR_STATE_PASS, &sd, render_buffer); + kernel_write_data_passes(kg, state, &sd, render_buffer); #endif /* Load random number state. */ RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); /* Perform path termination. Most paths have already been terminated in * the intersect_closest kernel, this is just for emission and for dividing @@ -421,52 +430,50 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, if (!(path_flag & PATH_RAY_SUBSURFACE)) { const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ? 0.0f : - path_state_continuation_probability(INTEGRATOR_STATE_PASS, - path_flag); + path_state_continuation_probability(kg, state, path_flag); if (probability == 0.0f) { return false; } else if (probability != 1.0f) { - INTEGRATOR_STATE_WRITE(path, throughput) /= probability; + INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability; } } #ifdef __DENOISING_FEATURES__ - kernel_write_denoising_features_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer); + kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif #ifdef __SHADOW_CATCHER__ - kernel_write_shadow_catcher_bounce_data(INTEGRATOR_STATE_PASS, &sd, render_buffer); + kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer); #endif /* Direct light. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT); - integrate_surface_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state); + integrate_surface_direct_light(kg, state, &sd, &rng_state); #if defined(__AO__) && defined(__SHADER_RAYTRACE__) /* Ambient occlusion pass. */ if (node_feature_mask & KERNEL_FEATURE_NODE_RAYTRACE) { if ((kernel_data.film.pass_ao != PASS_UNUSED) && - (INTEGRATOR_STATE(path, flag) & PATH_RAY_CAMERA)) { + (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_CAMERA)) { PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO); - integrate_surface_ao_pass(INTEGRATOR_STATE_PASS, &sd, &rng_state, render_buffer); + integrate_surface_ao_pass(kg, state, &sd, &rng_state, render_buffer); } } #endif PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT); - continue_path_label = integrate_surface_bsdf_bssrdf_bounce( - INTEGRATOR_STATE_PASS, &sd, &rng_state); + continue_path_label = integrate_surface_bsdf_bssrdf_bounce(kg, state, &sd, &rng_state); #ifdef __VOLUME__ } else { PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT); - continue_path_label = integrate_surface_volume_only_bounce(INTEGRATOR_STATE_PASS, &sd); + continue_path_label = integrate_surface_volume_only_bounce(state, &sd); } if (continue_path_label & LABEL_TRANSMIT) { /* Enter/Exit volume. */ - volume_stack_enter_exit(INTEGRATOR_STATE_PASS, &sd); + volume_stack_enter_exit(kg, state, &sd); } #endif @@ -475,15 +482,16 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, template -ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { - if (integrate_surface(INTEGRATOR_STATE_PASS, render_buffer)) { - if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) { + if (integrate_surface(kg, state, render_buffer)) { + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) { INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } else { - kernel_assert(INTEGRATOR_STATE(ray, t) != 0.0f); + kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f); INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } @@ -493,11 +501,11 @@ ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS, } ccl_device_forceinline void integrator_shade_surface_raytrace( - INTEGRATOR_STATE_ARGS, ccl_global float *ccl_restrict render_buffer) + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) { integrator_shade_surface(INTEGRATOR_STATE_PASS, - render_buffer); + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>( + kg, state, render_buffer); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h index 72c609751f7..e465a993041 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_volume.h +++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h @@ -70,12 +70,13 @@ typedef struct VolumeShaderCoefficients { } VolumeShaderCoefficients; /* Evaluate shader to get extinction coefficient at P. */ -ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS, +ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *ccl_restrict sd, ccl_private float3 *ccl_restrict extinction) { - shader_eval_volume(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) { - return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); + shader_eval_volume(kg, state, sd, PATH_RAY_SHADOW, [=](const int i) { + return integrator_state_read_shadow_volume_stack(state, i); }); if (!(sd->flag & SD_EXTINCTION)) { @@ -88,13 +89,14 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS, } /* Evaluate shader to get absorption, scattering and emission at P. */ -ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS, +ccl_device_inline bool volume_shader_sample(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *ccl_restrict sd, ccl_private VolumeShaderCoefficients *coeff) { - const int path_flag = INTEGRATOR_STATE(path, flag); - shader_eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) { - return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + const int path_flag = INTEGRATOR_STATE(state, path, flag); + shader_eval_volume(kg, state, sd, path_flag, [=](const int i) { + return integrator_state_read_volume_stack(state, i); }); if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { @@ -123,7 +125,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS, return true; } -ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg, +ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_private const RNGState *rng_state, const float object_step_size, float t, @@ -169,14 +171,14 @@ ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg, # if 0 /* homogeneous volume: assume shader evaluation at the starts gives * the extinction coefficient for the entire line segment */ -ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS, +ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, ccl_global float3 *ccl_restrict throughput) { float3 sigma_t = zero_float3(); - if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) { + if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { *throughput *= volume_color_transmittance(sigma_t, ray->t); } } @@ -184,7 +186,8 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS, /* heterogeneous volume: integrate stepping through the volume until we * reach the end, get absorbed entirely, or run out of iterations */ -ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS, +ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, + IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, ccl_private float3 *ccl_restrict throughput, @@ -192,7 +195,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS, { /* Load random number state. */ RNGState rng_state; - shadow_path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + shadow_path_state_rng_load(state, &rng_state); float3 tp = *throughput; @@ -227,7 +230,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS, /* compute attenuation over segment */ sd->P = new_P; - if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) { + if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { /* Compute `expf()` only for every Nth step, to save some calculations * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON * check then. */ @@ -510,7 +513,8 @@ ccl_device_forceinline void volume_integrate_step_scattering( * iterations. this does probabilistically scatter or get transmitted through * for path tracing where we don't want to branch. */ ccl_device_forceinline void volume_integrate_heterogeneous( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, ccl_private const RNGState *rng_state, @@ -560,7 +564,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( vstate.distance_pdf = 1.0f; /* Initialize volume integration result. */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); result.direct_throughput = throughput; result.indirect_throughput = throughput; @@ -571,7 +575,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( } # ifdef __DENOISING_FEATURES__ - const bool write_denoising_features = (INTEGRATOR_STATE(path, flag) & + const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES); float3 accum_albedo = zero_float3(); # endif @@ -585,7 +589,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* compute segment */ VolumeShaderCoefficients coeff ccl_optional_struct_init; - if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) { + if (volume_shader_sample(kg, state, sd, &coeff)) { const int closure_flag = sd->flag; /* Evaluate transmittance over segment. */ @@ -654,15 +658,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Write accumulated emission. */ if (!is_zero(accum_emission)) { - kernel_accum_emission( - INTEGRATOR_STATE_PASS, result.indirect_throughput, accum_emission, render_buffer); + kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer); } # ifdef __DENOISING_FEATURES__ /* Write denoising features. */ if (write_denoising_features) { kernel_write_denoising_features_volume( - INTEGRATOR_STATE_PASS, accum_albedo, result.indirect_scatter, render_buffer); + kg, state, accum_albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ } @@ -671,7 +674,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ ccl_device_forceinline bool integrate_volume_sample_light( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, ccl_private LightSample *ccl_restrict ls) @@ -682,8 +686,8 @@ ccl_device_forceinline bool integrate_volume_sample_light( } /* Sample position on a light. */ - const int path_flag = INTEGRATOR_STATE(path, flag); - const uint bounce = INTEGRATOR_STATE(path, bounce); + const int path_flag = INTEGRATOR_STATE(state, path, flag); + const uint bounce = INTEGRATOR_STATE(state, path, bounce); float light_u, light_v; path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); @@ -700,7 +704,8 @@ ccl_device_forceinline bool integrate_volume_sample_light( /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ ccl_device_forceinline void integrate_volume_direct_light( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, const float3 P, @@ -720,8 +725,8 @@ ccl_device_forceinline void integrate_volume_direct_light( * TODO: decorrelate random numbers and use light_sample_new_position to * avoid resampling the CDF. */ { - const int path_flag = INTEGRATOR_STATE(path, flag); - const uint bounce = INTEGRATOR_STATE(path, bounce); + const int path_flag = INTEGRATOR_STATE(state, path, flag); + const uint bounce = INTEGRATOR_STATE(state, path, bounce); float light_u, light_v; path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); @@ -743,8 +748,7 @@ ccl_device_forceinline void integrate_volume_direct_light( * non-constant light sources. */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval( - INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time); + const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); if (is_zero(light_eval)) { return; } @@ -772,12 +776,12 @@ ccl_device_forceinline void integrate_volume_direct_light( const bool is_light = light_sample_is_light(ls); /* Write shadow ray and associated state to global memory. */ - integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_shadow_ray(kg, state, &ray); /* Copy state from main path to shadow path. */ - const uint16_t bounce = INTEGRATOR_STATE(path, bounce); - const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce); - uint32_t shadow_flag = INTEGRATOR_STATE(path, flag); + const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); + const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); + uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; shadow_flag |= PATH_RAY_VOLUME_PASS; const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); @@ -785,20 +789,20 @@ ccl_device_forceinline void integrate_volume_direct_light( if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { const float3 diffuse_glossy_ratio = (bounce == 0) ? one_float3() : - INTEGRATOR_STATE(path, diffuse_glossy_ratio); - INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); + INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; } - INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag; - INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce; - INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce; - INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase; + INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { - INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput; } - integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS); + integrator_state_copy_volume_stack_to_shadow(kg, state); /* Branch off shadow kernel. */ INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); @@ -807,7 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light( /* Path tracing: scatter in new direction using phase function */ ccl_device_forceinline bool integrate_volume_phase_scatter( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state, ccl_private const ShaderVolumePhases *phases) @@ -838,31 +843,31 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( } /* Setup ray. */ - INTEGRATOR_STATE_WRITE(ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(ray, D) = normalize(phase_omega_in); - INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; + INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(phase_domega_in); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); # endif /* Update throughput. */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; - INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase; + INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3(); + INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3(); } /* Update path state */ - INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = phase_pdf; - INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f; - INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(phase_pdf, - INTEGRATOR_STATE(path, min_ray_pdf)); + INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( + phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); - path_state_next(INTEGRATOR_STATE_PASS, label); + path_state_next(kg, state, label); return true; } @@ -870,7 +875,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( * ray, with the assumption that there are no surfaces blocking light * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ -ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, +ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, + IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_global float *ccl_restrict render_buffer) { @@ -879,29 +885,29 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, /* Load random number state. */ RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); /* Sample light ahead of volume stepping, for equiangular sampling. */ /* TODO: distant lights are ignored now, but could instead use even distribution. */ LightSample ls ccl_optional_struct_init; - const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE); + const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE); const bool have_equiangular_sample = need_light_sample && integrate_volume_sample_light( - INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) && + kg, state, &sd, &rng_state, &ls) && (ls.t != FLT_MAX); VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ? - volume_stack_sample_method(INTEGRATOR_STATE_PASS) : + volume_stack_sample_method(kg, state) : VOLUME_SAMPLE_DISTANCE; /* Step through volume. */ - const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) { - return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); - }); + const float step_size = volume_stack_step_size( + kg, state, [=](const int i) { return integrator_state_read_volume_stack(state, i); }); /* TODO: expensive to zero closures? */ VolumeIntegrateResult result = {}; - volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS, + volume_integrate_heterogeneous(kg, + state, ray, &sd, &rng_state, @@ -914,11 +920,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, /* Perform path termination. The intersect_closest will have already marked this path * to be terminated. That will shading evaluating to leave out any scattering closures, * but emission and absorption are still handled for multiple importance sampling. */ - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ? 0.0f : - path_state_continuation_probability(INTEGRATOR_STATE_PASS, - path_flag); + path_state_continuation_probability(kg, state, path_flag); if (probability == 0.0f) { return VOLUME_PATH_MISSED; } @@ -927,7 +932,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, if (result.direct_scatter) { const float3 direct_P = ray->P + result.direct_t * ray->D; result.direct_throughput /= probability; - integrate_volume_direct_light(INTEGRATOR_STATE_PASS, + integrate_volume_direct_light(kg, + state, &sd, &rng_state, direct_P, @@ -943,13 +949,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, if (result.indirect_scatter) { result.indirect_throughput /= probability; } - INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput; + INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput; if (result.indirect_scatter) { sd.P = ray->P + result.indirect_t * ray->D; - if (integrate_volume_phase_scatter( - INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) { + if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) { return VOLUME_PATH_SCATTERED; } else { @@ -963,7 +968,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, #endif -ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS, +ccl_device void integrator_shade_volume(KernelGlobals kg, + IntegratorState state, ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP); @@ -971,20 +977,20 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS, #ifdef __VOLUME__ /* Setup shader data. */ Ray ray ccl_optional_struct_init; - integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_read_ray(kg, state, &ray); Intersection isect ccl_optional_struct_init; - integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + integrator_state_read_isect(kg, state, &isect); /* Set ray length to current segment. */ ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; /* Clean volume stack for background rays. */ if (isect.prim == PRIM_NONE) { - volume_stack_clean(INTEGRATOR_STATE_PASS); + volume_stack_clean(kg, state); } - VolumeIntegrateEvent event = volume_integrate(INTEGRATOR_STATE_PASS, &ray, render_buffer); + VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer); if (event == VOLUME_PATH_SCATTERED) { /* Queue intersect_closest kernel. */ @@ -1015,7 +1021,7 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS, const int flags = kernel_tex_fetch(__shaders, shader).flags; integrator_intersect_shader_next_kernel( - INTEGRATOR_STATE_PASS, &isect, shader, flags); + kg, state, &isect, shader, flags); return; } } diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h index 517e2891769..3aab456a021 100644 --- a/intern/cycles/kernel/integrator/integrator_state.h +++ b/intern/cycles/kernel/integrator/integrator_state.h @@ -27,24 +27,17 @@ * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract * these differences between devices and experiment with different layouts, macros are used. * - * INTEGRATOR_STATE_ARGS: prepend to argument definitions for every function that accesses - * path state. - * INTEGRATOR_STATE_CONST_ARGS: same as INTEGRATOR_STATE_ARGS, when state is read-only - * INTEGRATOR_STATE_PASS: use to pass along state to other functions access it. + * Use IntegratorState to pass a reference to the integrator state for the current path. These are + * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const + * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior. * - * INTEGRATOR_STATE(x, y): read nested struct member x.y of IntegratorState - * INTEGRATOR_STATE_WRITE(x, y): write to nested struct member x.y of IntegratorState + * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState + * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState * - * INTEGRATOR_STATE_ARRAY(x, index, y): read x[index].y - * INTEGRATOR_STATE_ARRAY_WRITE(x, index, y): write x[index].y + * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y + * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y * - * INTEGRATOR_STATE_COPY(to_x, from_x): copy contents of one nested struct to another - * - * INTEGRATOR_STATE_IS_NULL: test if any integrator state is available, for shader evaluation - * INTEGRATOR_STATE_PASS_NULL: use to pass empty state to other functions. - * - * NOTE: if we end up with a device that passes no arguments, the leading comma will be a problem. - * Can solve it with more macros if we encounter it, but rather ugly so postpone for now. + * INTEGRATOR_STATE_NULL: use to pass empty state to other functions. */ #include "kernel/kernel_types.h" @@ -146,50 +139,36 @@ typedef struct IntegratorStateGPU { /* Scalar access on CPU. */ typedef IntegratorStateCPU *ccl_restrict IntegratorState; +typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState; -# define INTEGRATOR_STATE_ARGS \ - ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \ - IntegratorStateCPU *ccl_restrict state -# define INTEGRATOR_STATE_CONST_ARGS \ - ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \ - const IntegratorStateCPU *ccl_restrict state -# define INTEGRATOR_STATE_PASS kg, state - -# define INTEGRATOR_STATE_PASS_NULL kg, NULL -# define INTEGRATOR_STATE_IS_NULL (state == NULL) +# define INTEGRATOR_STATE_NULL nullptr -# define INTEGRATOR_STATE(nested_struct, member) \ - (((const IntegratorStateCPU *)state)->nested_struct.member) -# define INTEGRATOR_STATE_WRITE(nested_struct, member) (state->nested_struct.member) +# define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member) +# define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member) -# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \ - (((const IntegratorStateCPU *)state)->nested_struct[array_index].member) -# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \ +# define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \ + ((state)->nested_struct[array_index].member) +# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ ((state)->nested_struct[array_index].member) #else /* __KERNEL_CPU__ */ /* Array access on GPU with Structure-of-Arrays. */ -typedef int IntegratorState; - -# define INTEGRATOR_STATE_ARGS \ - ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state -# define INTEGRATOR_STATE_CONST_ARGS \ - ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state -# define INTEGRATOR_STATE_PASS kg, state +typedef const int IntegratorState; +typedef const int ConstIntegratorState; -# define INTEGRATOR_STATE_PASS_NULL kg, -1 -# define INTEGRATOR_STATE_IS_NULL (state == -1) +# define INTEGRATOR_STATE_NULL -1 -# define INTEGRATOR_STATE(nested_struct, member) \ +# define INTEGRATOR_STATE(state, nested_struct, member) \ kernel_integrator_state.nested_struct.member[state] -# define INTEGRATOR_STATE_WRITE(nested_struct, member) INTEGRATOR_STATE(nested_struct, member) +# define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \ + INTEGRATOR_STATE(state, nested_struct, member) -# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \ +# define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \ kernel_integrator_state.nested_struct[array_index].member[state] -# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \ - INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) +# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ + INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) #endif /* __KERNEL_CPU__ */ diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h index 8477efd7b66..9829da875eb 100644 --- a/intern/cycles/kernel/integrator/integrator_state_flow.h +++ b/intern/cycles/kernel/integrator/integrator_state_flow.h @@ -42,48 +42,49 @@ CCL_NAMESPACE_BEGIN * one of them, and only once. */ -#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(path, queued_kernel) == 0) -#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0) +#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0) +#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \ + (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0) #ifdef __KERNEL_GPU__ # define INTEGRATOR_PATH_INIT(next_kernel) \ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ 1); \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; # define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ atomic_fetch_and_sub_uint32( \ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ 1); \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; # define INTEGRATOR_PATH_TERMINATE(current_kernel) \ atomic_fetch_and_sub_uint32( \ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; # define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ 1); \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; # define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ atomic_fetch_and_sub_uint32( \ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ 1); \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; # define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ atomic_fetch_and_sub_uint32( \ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; # define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ { \ const int key_ = key; \ atomic_fetch_and_add_uint32( \ &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ 1); \ } @@ -94,8 +95,8 @@ CCL_NAMESPACE_BEGIN &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ atomic_fetch_and_add_uint32( \ &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ 1); \ } @@ -103,39 +104,39 @@ CCL_NAMESPACE_BEGIN #else # define INTEGRATOR_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; # define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ { \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ (void)key; \ } # define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ { \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ (void)current_kernel; \ } # define INTEGRATOR_PATH_TERMINATE(current_kernel) \ { \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \ (void)current_kernel; \ } # define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ { \ - INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ (void)key; \ (void)current_kernel; \ } # define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; # define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ { \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ (void)current_kernel; \ } # define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ { \ - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; \ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \ (void)current_kernel; \ } diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h index fddd9eb5ac8..fee59e451d9 100644 --- a/intern/cycles/kernel/integrator/integrator_state_util.h +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -23,145 +23,150 @@ CCL_NAMESPACE_BEGIN /* Ray */ -ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg, + IntegratorState state, ccl_private const Ray *ccl_restrict ray) { - INTEGRATOR_STATE_WRITE(ray, P) = ray->P; - INTEGRATOR_STATE_WRITE(ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(ray, t) = ray->t; - INTEGRATOR_STATE_WRITE(ray, time) = ray->time; - INTEGRATOR_STATE_WRITE(ray, dP) = ray->dP; - INTEGRATOR_STATE_WRITE(ray, dD) = ray->dD; + INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P; + INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D; + INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time; + INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP; + INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD; } -ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg, + ConstIntegratorState state, ccl_private Ray *ccl_restrict ray) { - ray->P = INTEGRATOR_STATE(ray, P); - ray->D = INTEGRATOR_STATE(ray, D); - ray->t = INTEGRATOR_STATE(ray, t); - ray->time = INTEGRATOR_STATE(ray, time); - ray->dP = INTEGRATOR_STATE(ray, dP); - ray->dD = INTEGRATOR_STATE(ray, dD); + ray->P = INTEGRATOR_STATE(state, ray, P); + ray->D = INTEGRATOR_STATE(state, ray, D); + ray->t = INTEGRATOR_STATE(state, ray, t); + ray->time = INTEGRATOR_STATE(state, ray, time); + ray->dP = INTEGRATOR_STATE(state, ray, dP); + ray->dD = INTEGRATOR_STATE(state, ray, dD); } /* Shadow Ray */ ccl_device_forceinline void integrator_state_write_shadow_ray( - INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray) + KernelGlobals kg, IntegratorState state, ccl_private const Ray *ccl_restrict ray) { - INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P; - INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(shadow_ray, t) = ray->t; - INTEGRATOR_STATE_WRITE(shadow_ray, time) = ray->time; - INTEGRATOR_STATE_WRITE(shadow_ray, dP) = ray->dP; + INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P; + INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D; + INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time; + INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP; } -ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg, + ConstIntegratorState state, ccl_private Ray *ccl_restrict ray) { - ray->P = INTEGRATOR_STATE(shadow_ray, P); - ray->D = INTEGRATOR_STATE(shadow_ray, D); - ray->t = INTEGRATOR_STATE(shadow_ray, t); - ray->time = INTEGRATOR_STATE(shadow_ray, time); - ray->dP = INTEGRATOR_STATE(shadow_ray, dP); + ray->P = INTEGRATOR_STATE(state, shadow_ray, P); + ray->D = INTEGRATOR_STATE(state, shadow_ray, D); + ray->t = INTEGRATOR_STATE(state, shadow_ray, t); + ray->time = INTEGRATOR_STATE(state, shadow_ray, time); + ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP); ray->dD = differential_zero_compact(); } /* Intersection */ ccl_device_forceinline void integrator_state_write_isect( - INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect) + KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) { - INTEGRATOR_STATE_WRITE(isect, t) = isect->t; - INTEGRATOR_STATE_WRITE(isect, u) = isect->u; - INTEGRATOR_STATE_WRITE(isect, v) = isect->v; - INTEGRATOR_STATE_WRITE(isect, object) = isect->object; - INTEGRATOR_STATE_WRITE(isect, prim) = isect->prim; - INTEGRATOR_STATE_WRITE(isect, type) = isect->type; + INTEGRATOR_STATE_WRITE(state, isect, t) = isect->t; + INTEGRATOR_STATE_WRITE(state, isect, u) = isect->u; + INTEGRATOR_STATE_WRITE(state, isect, v) = isect->v; + INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object; + INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim; + INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type; #ifdef __EMBREE__ - INTEGRATOR_STATE_WRITE(isect, Ng) = isect->Ng; + INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng; #endif } ccl_device_forceinline void integrator_state_read_isect( - INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect) + KernelGlobals kg, ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect) { - isect->prim = INTEGRATOR_STATE(isect, prim); - isect->object = INTEGRATOR_STATE(isect, object); - isect->type = INTEGRATOR_STATE(isect, type); - isect->u = INTEGRATOR_STATE(isect, u); - isect->v = INTEGRATOR_STATE(isect, v); - isect->t = INTEGRATOR_STATE(isect, t); + isect->prim = INTEGRATOR_STATE(state, isect, prim); + isect->object = INTEGRATOR_STATE(state, isect, object); + isect->type = INTEGRATOR_STATE(state, isect, type); + isect->u = INTEGRATOR_STATE(state, isect, u); + isect->v = INTEGRATOR_STATE(state, isect, v); + isect->t = INTEGRATOR_STATE(state, isect, t); #ifdef __EMBREE__ - isect->Ng = INTEGRATOR_STATE(isect, Ng); + isect->Ng = INTEGRATOR_STATE(state, isect, Ng); #endif } -ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state, int i) { - VolumeStack entry = {INTEGRATOR_STATE_ARRAY(volume_stack, i, object), - INTEGRATOR_STATE_ARRAY(volume_stack, i, shader)}; + VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, volume_stack, i, object), + INTEGRATOR_STATE_ARRAY(state, volume_stack, i, shader)}; return entry; } -ccl_device_forceinline void integrator_state_write_volume_stack(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void integrator_state_write_volume_stack(IntegratorState state, int i, VolumeStack entry) { - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, object) = entry.object; - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, shader) = entry.shader; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, object) = entry.object; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, shader) = entry.shader; } -ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals kg, + ConstIntegratorState state) { return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ? - INTEGRATOR_STATE_ARRAY(volume_stack, 0, shader) == SHADER_NONE : + INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, shader) == SHADER_NONE : true; } /* Shadow Intersection */ ccl_device_forceinline void integrator_state_write_shadow_isect( - INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index) + IntegratorState state, ccl_private const Intersection *ccl_restrict isect, const int index) { - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, v) = isect->v; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, object) = isect->object; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, prim) = isect->prim; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, type) = isect->type; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, v) = isect->v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type; #ifdef __EMBREE__ - INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, Ng) = isect->Ng; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng; #endif } ccl_device_forceinline void integrator_state_read_shadow_isect( - INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index) + ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect, const int index) { - isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim); - isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object); - isect->type = INTEGRATOR_STATE_ARRAY(shadow_isect, index, type); - isect->u = INTEGRATOR_STATE_ARRAY(shadow_isect, index, u); - isect->v = INTEGRATOR_STATE_ARRAY(shadow_isect, index, v); - isect->t = INTEGRATOR_STATE_ARRAY(shadow_isect, index, t); + isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim); + isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object); + isect->type = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, type); + isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u); + isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v); + isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t); #ifdef __EMBREE__ - isect->Ng = INTEGRATOR_STATE_ARRAY(shadow_isect, index, Ng); + isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng); #endif } -ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS) +ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg, + IntegratorState state) { if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { int index = 0; int shader; do { - shader = INTEGRATOR_STATE_ARRAY(volume_stack, index, shader); + shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader); - INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, object) = INTEGRATOR_STATE_ARRAY( - volume_stack, index, object); - INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, shader) = shader; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, object) = + INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object); + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, shader) = shader; ++index; } while (shader != OBJECT_NONE); @@ -169,27 +174,27 @@ ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRA } ccl_device_forceinline VolumeStack -integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_CONST_ARGS, int i) +integrator_state_read_shadow_volume_stack(ConstIntegratorState state, int i) { - VolumeStack entry = {INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, object), - INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, shader)}; + VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object), + INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)}; return entry; } ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty( - INTEGRATOR_STATE_CONST_ARGS) + KernelGlobals kg, ConstIntegratorState state) { return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ? - INTEGRATOR_STATE_ARRAY(shadow_volume_stack, 0, shader) == SHADER_NONE : + INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE : true; } -ccl_device_forceinline void integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorState state, int i, VolumeStack entry) { - INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = entry.object; - INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = entry.shader; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, object) = entry.object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, shader) = entry.shader; } #if defined(__KERNEL_GPU__) @@ -244,15 +249,16 @@ ccl_device_inline void integrator_state_move(const IntegratorState to_state, { integrator_state_copy_only(to_state, state); - INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; } #endif /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths * after this function. */ -ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_ARGS) +ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg, + IntegratorState state) { #if defined(__KERNEL_GPU__) const IntegratorState to_state = atomic_fetch_and_add_uint32( diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index 153f9b79743..448c99765e3 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -36,29 +36,30 @@ CCL_NAMESPACE_BEGIN #ifdef __SUBSURFACE__ -ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, +ccl_device int subsurface_bounce(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *sd, ccl_private const ShaderClosure *sc) { /* We should never have two consecutive BSSRDF bounces, the second one should * be converted to a diffuse BSDF to avoid this. */ - kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR)); + kernel_assert(!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DIFFUSE_ANCESTOR)); /* Setup path state for intersect_subsurface kernel. */ ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; /* Setup ray into surface. */ - INTEGRATOR_STATE_WRITE(ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(ray, D) = bssrdf->N; - INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX; - INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(ray, dD) = differential_zero_compact(); + INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; + INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N; + INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact(); /* Pass along object info, reusing isect to save memory. */ - INTEGRATOR_STATE_WRITE(isect, Ng) = sd->Ng; - INTEGRATOR_STATE_WRITE(isect, object) = sd->object; + INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng; + INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; - uint32_t path_flag = (INTEGRATOR_STATE(path, flag) & ~PATH_RAY_CAMERA) | + uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) | ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK : PATH_RAY_SUBSURFACE_RANDOM_WALK); @@ -70,27 +71,28 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, } # endif - INTEGRATOR_STATE_WRITE(path, throughput) *= weight; - INTEGRATOR_STATE_WRITE(path, flag) = path_flag; + INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight; + INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag; /* Advance random number offset for bounce. */ - INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - if (INTEGRATOR_STATE(path, bounce) == 0) { - INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3(); + if (INTEGRATOR_STATE(state, path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3(); } } /* Pass BSSRDF parameters. */ - INTEGRATOR_STATE_WRITE(subsurface, albedo) = bssrdf->albedo; - INTEGRATOR_STATE_WRITE(subsurface, radius) = bssrdf->radius; - INTEGRATOR_STATE_WRITE(subsurface, anisotropy) = bssrdf->anisotropy; + INTEGRATOR_STATE_WRITE(state, subsurface, albedo) = bssrdf->albedo; + INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius; + INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy; return LABEL_SUBSURFACE_SCATTER; } -ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS, +ccl_device void subsurface_shader_data_setup(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *sd, const uint32_t path_flag) { @@ -131,21 +133,21 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS, } } -ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) +ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState state) { RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); Ray ray ccl_optional_struct_init; LocalIntersection ss_isect ccl_optional_struct_init; - if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) { - if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) { + if (!subsurface_random_walk(kg, state, rng_state, ray, ss_isect)) { return false; } } else { - if (!subsurface_disk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { + if (!subsurface_disk(kg, state, rng_state, ray, ss_isect)) { return false; } } @@ -157,11 +159,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) const int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { - float3 P = INTEGRATOR_STATE(ray, P); - const float3 Ng = INTEGRATOR_STATE(isect, Ng); + float3 P = INTEGRATOR_STATE(state, ray, P); + const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); const float3 offset_P = ray_offset(P, -Ng); - integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_PASS, offset_P, ray.P); + integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P); } } # endif /* __VOLUME__ */ @@ -172,11 +174,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) ray.P += ray.D * ray.t * 2.0f; ray.D = -ray.D; - integrator_state_write_isect(INTEGRATOR_STATE_PASS, &ss_isect.hits[0]); - integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + integrator_state_write_isect(kg, state, &ss_isect.hits[0]); + integrator_state_write_ray(kg, state, &ray); /* Advance random number offset for bounce. */ - INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; const int shader = intersection_get_shader(kg, &ss_isect.hits[0]); const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h index 788a5e9b929..1de05ea2696 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h @@ -31,7 +31,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, /* Subsurface scattering step, from a point on the surface to other * nearby points on the same object. */ -ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS, +ccl_device_inline bool subsurface_disk(KernelGlobals kg, + IntegratorState state, RNGState rng_state, ccl_private Ray &ray, ccl_private LocalIntersection &ss_isect) @@ -41,14 +42,14 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS, path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v); /* Read shading point info from integrator state. */ - const float3 P = INTEGRATOR_STATE(ray, P); - const float ray_dP = INTEGRATOR_STATE(ray, dP); - const float time = INTEGRATOR_STATE(ray, time); - const float3 Ng = INTEGRATOR_STATE(isect, Ng); - const int object = INTEGRATOR_STATE(isect, object); + const float3 P = INTEGRATOR_STATE(state, ray, P); + const float ray_dP = INTEGRATOR_STATE(state, ray, dP); + const float time = INTEGRATOR_STATE(state, ray, time); + const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); + const int object = INTEGRATOR_STATE(state, isect, object); /* Read subsurface scattering parameters. */ - const float3 radius = INTEGRATOR_STATE(subsurface, radius); + const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -175,7 +176,7 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS, if (r < next_sum) { /* Return exit point. */ - INTEGRATOR_STATE_WRITE(path, throughput) *= weight * sum_weights / sample_weight; + INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight; ss_isect.hits[0] = ss_isect.hits[hit]; ss_isect.Ng[0] = ss_isect.Ng[hit]; diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h index 45a43ea67a9..5365093decf 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h @@ -180,7 +180,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, * and the value represents the cutoff level */ #define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9 -ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, +ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, + IntegratorState state, RNGState rng_state, ccl_private Ray &ray, ccl_private LocalIntersection &ss_isect) @@ -188,12 +189,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, float bssrdf_u, bssrdf_v; path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - const float3 P = INTEGRATOR_STATE(ray, P); - const float3 N = INTEGRATOR_STATE(ray, D); - const float ray_dP = INTEGRATOR_STATE(ray, dP); - const float time = INTEGRATOR_STATE(ray, time); - const float3 Ng = INTEGRATOR_STATE(isect, Ng); - const int object = INTEGRATOR_STATE(isect, object); + const float3 P = INTEGRATOR_STATE(state, ray, P); + const float3 N = INTEGRATOR_STATE(state, ray, D); + const float ray_dP = INTEGRATOR_STATE(state, ray, dP); + const float time = INTEGRATOR_STATE(state, ray, time); + const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); + const int object = INTEGRATOR_STATE(state, isect, object); /* Sample diffuse surface scatter into the object. */ float3 D; @@ -219,12 +220,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, /* Convert subsurface to volume coefficients. * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ - const float3 albedo = INTEGRATOR_STATE(subsurface, albedo); - const float3 radius = INTEGRATOR_STATE(subsurface, radius); - const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy); + const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo); + const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy); float3 sigma_t, alpha; - float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput); + float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); float3 sigma_s = sigma_t * alpha; @@ -459,7 +460,7 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, if (hit) { kernel_assert(isfinite3_safe(throughput)); - INTEGRATOR_STATE_WRITE(path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; } return hit; diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h index 0c4a723de6f..e3a4546508f 100644 --- a/intern/cycles/kernel/integrator/integrator_volume_stack.h +++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h @@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN * is inside of. */ template -ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, +ccl_device void volume_stack_enter_exit(KernelGlobals kg, ccl_private const ShaderData *sd, StackReadOp stack_read, StackWriteOp stack_write) @@ -84,28 +84,29 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, } } -ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd) +ccl_device void volume_stack_enter_exit(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd) { volume_stack_enter_exit( - INTEGRATOR_STATE_PASS, + kg, sd, - [=](const int i) { return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); }, + [=](const int i) { return integrator_state_read_volume_stack(state, i); }, [=](const int i, const VolumeStack entry) { - integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, i, entry); + integrator_state_write_volume_stack(state, i, entry); }); } -ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, +ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *sd) { volume_stack_enter_exit( - INTEGRATOR_STATE_PASS, + kg, sd, - [=](const int i) { - return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); - }, + [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); }, [=](const int i, const VolumeStack entry) { - integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_PASS, i, entry); + integrator_state_write_shadow_volume_stack(state, i, entry); }); } @@ -123,19 +124,21 @@ ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, * Use this function after the last bounce to get rid of all volumes apart from * the world's one after the last bounce to avoid render artifacts. */ -ccl_device_inline void volume_stack_clean(INTEGRATOR_STATE_ARGS) +ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState state) { if (kernel_data.background.volume_shader != SHADER_NONE) { /* Keep the world's volume in stack. */ - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE; } else { - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = SHADER_NONE; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, shader) = SHADER_NONE; } } template -ccl_device float volume_stack_step_size(INTEGRATOR_STATE_ARGS, StackReadOp stack_read) +ccl_device float volume_stack_step_size(KernelGlobals kg, + IntegratorState state, + StackReadOp stack_read) { float step_size = FLT_MAX; @@ -182,12 +185,12 @@ typedef enum VolumeSampleMethod { VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR), } VolumeSampleMethod; -ccl_device VolumeSampleMethod volume_stack_sample_method(INTEGRATOR_STATE_ARGS) +ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, IntegratorState state) { VolumeSampleMethod method = VOLUME_SAMPLE_NONE; for (int i = 0;; i++) { - VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + VolumeStack entry = integrator_state_read_volume_stack(state, i); if (entry.shader == SHADER_NONE) { break; } diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index dc0aa9356f7..bc45bbd5b07 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -98,9 +98,7 @@ ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEv * to render buffers instead of using per-thread memory, and to avoid the * impact of clamping on other contributions. */ -ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *kg, - ccl_private float3 *L, - int bounce) +ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce) { #ifdef __KERNEL_DEBUG_NAN__ if (!isfinite3_safe(*L)) { @@ -128,9 +126,9 @@ ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *k /* Get pointer to pixel in render buffer. */ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( - INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer) + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) { - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; return render_buffer + render_buffer_offset; @@ -140,7 +138,8 @@ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( * Adaptive sampling. */ -ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline int kernel_accum_sample(KernelGlobals kg, + ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer, int sample) { @@ -148,13 +147,13 @@ ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS, return sample; } - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1); } -ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { @@ -167,7 +166,7 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, return; } - const int sample = INTEGRATOR_STATE(path, sample); + const int sample = INTEGRATOR_STATE(state, path, sample); if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { kernel_write_pass_float4( buffer + kernel_data.film.pass_adaptive_aux_buffer, @@ -186,7 +185,8 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, * Returns truth if the contribution is fully handled here and is not to be added to the other * passes (like combined, adaptive sampling). */ -ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, +ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { @@ -198,7 +198,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive * sampling is based on how noisy the combined pass is as if there were no catchers in the @@ -206,7 +206,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, } /* Shadow catcher pass. */ - if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_object_pass(kg, state)) { kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } @@ -214,7 +214,8 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, return false; } -ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS, +ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, const float transparent, ccl_global float *ccl_restrict buffer) @@ -226,12 +227,12 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { return true; } /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float4( buffer + kernel_data.film.pass_shadow_catcher_matte, make_float4(contribution.x, contribution.y, contribution.z, transparent)); @@ -241,7 +242,7 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A } /* Shadow catcher pass. */ - if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_object_pass(kg, state)) { /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the * calculation and the alpha channel of the pass contains numbers of samples contributed to a * pixel of the pass. */ @@ -252,7 +253,8 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A return false; } -ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, + ConstIntegratorState state, const float transparent, ccl_global float *ccl_restrict buffer) { @@ -263,7 +265,7 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); } } @@ -275,12 +277,13 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO */ /* Write combined pass. */ -ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) { + if (kernel_accum_shadow_catcher(kg, state, contribution, buffer)) { return; } #endif @@ -289,19 +292,19 @@ ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS, kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution); } - kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_adaptive_buffer(kg, state, contribution, buffer); } /* Write combined pass with transparency. */ -ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, const float transparent, ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher_transparent( - INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) { + if (kernel_accum_shadow_catcher_transparent(kg, state, contribution, transparent, buffer)) { return; } #endif @@ -312,11 +315,12 @@ ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_C make_float4(contribution.x, contribution.y, contribution.z, transparent)); } - kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_adaptive_buffer(kg, state, contribution, buffer); } /* Write background or emission to appropriate pass. */ -ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg, + ConstIntegratorState state, float3 contribution, ccl_global float *ccl_restrict buffer, @@ -327,15 +331,15 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE } #ifdef __PASSES__ - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); int pass_offset = PASS_UNUSED; /* Denoising albedo. */ # ifdef __DENOISING_FEATURES__ if (path_flag & PATH_RAY_DENOISING_FEATURES) { if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, - denoising_feature_throughput); + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); const float3 denoising_albedo = denoising_feature_throughput * contribution; kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); } @@ -349,32 +353,34 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { /* Indirectly visible through reflection. */ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? - ((INTEGRATOR_STATE(path, bounce) == 1) ? + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect) : - ((INTEGRATOR_STATE(path, bounce) == 1) ? + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? kernel_data.film.pass_transmission_direct : kernel_data.film.pass_transmission_indirect); if (glossy_pass_offset != PASS_UNUSED) { /* Glossy is a subset of the throughput, reconstruct it here using the * diffuse-glossy ratio. */ - const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio); + const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); const float3 glossy_contribution = (one_float3() - ratio) * contribution; kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); } /* Reconstruct diffuse subset of throughput. */ - pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct : - kernel_data.film.pass_diffuse_indirect; + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; if (pass_offset != PASS_UNUSED) { - contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio); + contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); } } else if (path_flag & PATH_RAY_VOLUME_PASS) { /* Indirectly visible through volume. */ - pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct : - kernel_data.film.pass_volume_indirect; + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; } /* Single write call for GPU coherence. */ @@ -385,52 +391,52 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE } /* Write light contribution to render buffer. */ -ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_light(KernelGlobals kg, + ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) { /* The throughput for shadow paths already contains the light shader evaluation. */ - float3 contribution = INTEGRATOR_STATE(shadow_path, throughput); - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce)); + float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_combined_pass(kg, state, contribution, buffer); #ifdef __PASSES__ if (kernel_data.film.light_pass_flag & PASS_ANY) { - const int path_flag = INTEGRATOR_STATE(shadow_path, flag); + const int path_flag = INTEGRATOR_STATE(state, shadow_path, flag); int pass_offset = PASS_UNUSED; if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { /* Indirectly visible through reflection. */ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? - ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect) : - ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_transmission_direct : kernel_data.film.pass_transmission_indirect); if (glossy_pass_offset != PASS_UNUSED) { /* Glossy is a subset of the throughput, reconstruct it here using the * diffuse-glossy ratio. */ - const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); const float3 glossy_contribution = (one_float3() - ratio) * contribution; kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); } /* Reconstruct diffuse subset of throughput. */ - pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_diffuse_direct : kernel_data.film.pass_diffuse_indirect; if (pass_offset != PASS_UNUSED) { - contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); } } else if (path_flag & PATH_RAY_VOLUME_PASS) { /* Indirectly visible through volume. */ - pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_volume_direct : kernel_data.film.pass_volume_indirect; } @@ -443,8 +449,9 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, /* Write shadow pass. */ if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) && (path_flag & PATH_RAY_CAMERA)) { - const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput); - const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput); + const float3 unshadowed_throughput = INTEGRATOR_STATE( + state, shadow_path, unshadowed_throughput); + const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) * kernel_data.film.pass_shadow_scale; kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow); @@ -458,61 +465,60 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, * Note that we accumulate transparency = 1 - alpha in the render buffer. * Otherwise we'd have to write alpha on path termination, which happens * in many places. */ -ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_transparent(KernelGlobals kg, + ConstIntegratorState state, const float transparent, ccl_global float *ccl_restrict render_buffer) { - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); } - kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer); + kernel_accum_shadow_catcher_transparent_only(kg, state, transparent, buffer); } /* Write background contribution to render buffer. * * Includes transparency, matching kernel_accum_transparent. */ -ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_background(KernelGlobals kg, + ConstIntegratorState state, const float3 L, const float transparent, const bool is_transparent_background_ray, ccl_global float *ccl_restrict render_buffer) { - float3 contribution = INTEGRATOR_STATE(path, throughput) * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); + float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); if (is_transparent_background_ray) { - kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer); + kernel_accum_transparent(kg, state, transparent, render_buffer); } else { - kernel_accum_combined_transparent_pass( - INTEGRATOR_STATE_PASS, contribution, transparent, buffer); + kernel_accum_combined_transparent_pass(kg, state, contribution, transparent, buffer); } kernel_accum_emission_or_background_pass( - INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background); + kg, state, contribution, buffer, kernel_data.film.pass_background); } /* Write emission to render buffer. */ -ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_emission(KernelGlobals kg, + ConstIntegratorState state, const float3 throughput, const float3 L, ccl_global float *ccl_restrict render_buffer) { float3 contribution = throughput * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_combined_pass(kg, state, contribution, buffer); kernel_accum_emission_or_background_pass( - INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission); + kg, state, contribution, buffer, kernel_data.film.pass_emission); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h index cdf2601f6c3..b80853fcc51 100644 --- a/intern/cycles/kernel/kernel_adaptive_sampling.h +++ b/intern/cycles/kernel/kernel_adaptive_sampling.h @@ -22,14 +22,15 @@ CCL_NAMESPACE_BEGIN /* Check whether the pixel has converged and should not be sampled anymore. */ -ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, + ConstIntegratorState state, ccl_global float *render_buffer) { if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { return true; } - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; ccl_global float *buffer = render_buffer + render_buffer_offset; @@ -40,7 +41,7 @@ ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ -ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const KernelGlobals *kg, +ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, int x, int y, @@ -90,7 +91,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const Kern /* This is a simple box filter in two passes. * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ -ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals *kg, +ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, int y, int start_x, @@ -123,7 +124,7 @@ ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals } } -ccl_device void kernel_adaptive_sampling_filter_y(ccl_global const KernelGlobals *kg, +ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, int x, int start_y, diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 6cbb8dcc291..933ee0082c2 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN -ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg, +ccl_device void kernel_displace_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset) @@ -37,7 +37,7 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg, /* Evaluate displacement shader. */ const float3 P = sd.P; - shader_eval_displacement(INTEGRATOR_STATE_PASS_NULL, &sd); + shader_eval_displacement(kg, INTEGRATOR_STATE_NULL, &sd); float3 D = sd.P - P; object_inverse_dir_transform(kg, &sd, &D); @@ -58,7 +58,7 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg, output[offset * 3 + 2] += D.z; } -ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg, +ccl_device void kernel_background_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset) @@ -77,7 +77,7 @@ ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg, * This is being evaluated for all BSDFs, so path flag does not contain a specific type. */ const int path_flag = PATH_RAY_EMISSION; shader_eval_surface( - INTEGRATOR_STATE_PASS_NULL, &sd, NULL, path_flag); + kg, INTEGRATOR_STATE_NULL, &sd, NULL, path_flag); float3 color = shader_background_eval(&sd); #ifdef __KERNEL_DEBUG_NAN__ diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 73683a15c5d..58a34668f45 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -46,7 +46,7 @@ ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u return bokeh; } -ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device void camera_sample_perspective(KernelGlobals kg, float raster_x, float raster_y, float lens_u, @@ -185,7 +185,7 @@ ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_re } /* Orthographic Camera */ -ccl_device void camera_sample_orthographic(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device void camera_sample_orthographic(KernelGlobals kg, float raster_x, float raster_y, float lens_u, @@ -370,7 +370,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, /* Common */ -ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline void camera_sample(KernelGlobals kg, int x, int y, float filter_u, @@ -444,13 +444,13 @@ ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restric /* Utilities */ -ccl_device_inline float3 camera_position(ccl_global const KernelGlobals *kg) +ccl_device_inline float3 camera_position(KernelGlobals kg) { Transform cameratoworld = kernel_data.cam.cameratoworld; return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); } -ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, float3 P) +ccl_device_inline float camera_distance(KernelGlobals kg, float3 P) { Transform cameratoworld = kernel_data.cam.cameratoworld; float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); @@ -464,7 +464,7 @@ ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, floa } } -ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float3 P) +ccl_device_inline float camera_z_depth(KernelGlobals kg, float3 P) { if (kernel_data.cam.type != CAMERA_PANORAMA) { Transform worldtocamera = kernel_data.cam.worldtocamera; @@ -477,7 +477,7 @@ ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float } } -ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlobals *kg, float3 P) +ccl_device_inline float3 camera_direction_from_point(KernelGlobals kg, float3 P) { Transform cameratoworld = kernel_data.cam.cameratoworld; @@ -491,7 +491,7 @@ ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlob } } -ccl_device_inline float3 camera_world_to_ndc(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 camera_world_to_ndc(KernelGlobals kg, ccl_private ShaderData *sd, float3 P) { diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h index 9e8e0e68b8f..0d7bfecd5f3 100644 --- a/intern/cycles/kernel/kernel_color.h +++ b/intern/cycles/kernel/kernel_color.h @@ -20,14 +20,14 @@ CCL_NAMESPACE_BEGIN -ccl_device float3 xyz_to_rgb(ccl_global const KernelGlobals *kg, float3 xyz) +ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz) { return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz), dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz), dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz)); } -ccl_device float linear_rgb_to_gray(ccl_global const KernelGlobals *kg, float3 c) +ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c) { return dot(c, float4_to_float3(kernel_data.film.rgb_to_y)); } diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 015587ccbbd..8d329b8dac3 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -25,7 +25,8 @@ CCL_NAMESPACE_BEGIN /* Evaluate shader on light. */ ccl_device_noinline_cpu float3 -light_sample_shader_eval(INTEGRATOR_STATE_ARGS, +light_sample_shader_eval(KernelGlobals kg, + IntegratorState state, ccl_private ShaderData *ccl_restrict emission_sd, ccl_private LightSample *ccl_restrict ls, float time) @@ -73,7 +74,7 @@ light_sample_shader_eval(INTEGRATOR_STATE_ARGS, /* No proper path flag, we're evaluating this for all closures. that's * weak but we'd have to do multiple evaluations otherwise. */ shader_eval_surface( - INTEGRATOR_STATE_PASS, emission_sd, NULL, PATH_RAY_EMISSION); + kg, state, emission_sd, NULL, PATH_RAY_EMISSION); /* Evaluate closures. */ #ifdef __BACKGROUND_MIS__ @@ -105,7 +106,7 @@ ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_ } /* Early path termination of shadow rays. */ -ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline bool light_sample_terminate(KernelGlobals kg, ccl_private const LightSample *ccl_restrict ls, ccl_private BsdfEval *ccl_restrict eval, const float rand_terminate) @@ -133,10 +134,8 @@ ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *cc * of a triangle. Surface is lifted by amount h along normal n in the incident * point. */ -ccl_device_inline float3 -shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg, - ccl_private const ShaderData *ccl_restrict sd, - float3 Ng) +ccl_device_inline float3 shadow_ray_smooth_surface_offset( + KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 Ng) { float3 V[3], N[3]; triangle_vertices_and_normals(kg, sd->prim, V, N); @@ -180,7 +179,7 @@ shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg /* Ray offset to avoid shadow terminator artifact. */ -ccl_device_inline float3 shadow_ray_offset(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 L) { @@ -247,7 +246,7 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri /* Create shadow ray towards light sample. */ ccl_device_inline void light_sample_to_surface_shadow_ray( - ccl_global const KernelGlobals *ccl_restrict kg, + KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, ccl_private const LightSample *ccl_restrict ls, ccl_private Ray *ray) @@ -258,7 +257,7 @@ ccl_device_inline void light_sample_to_surface_shadow_ray( /* Create shadow ray towards light sample. */ ccl_device_inline void light_sample_to_volume_shadow_ray( - ccl_global const KernelGlobals *ccl_restrict kg, + KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, ccl_private const LightSample *ccl_restrict ls, const float3 P, diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h index 07b96d0e1a8..d5b8c90a828 100644 --- a/intern/cycles/kernel/kernel_id_passes.h +++ b/intern/cycles/kernel/kernel_id_passes.h @@ -92,7 +92,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl } /* post-sorting for Cryptomatte */ -ccl_device_inline void kernel_cryptomatte_post(ccl_global const KernelGlobals *kg, +ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, int pixel_index) { diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 1f745ab1da9..b62ec7fda42 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -72,10 +72,7 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p) return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF); } -ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg, - uint sample, - uint rng_hash, - uint dimension) +ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension) { /* Perform Owen shuffle of the sample number to reorder the samples. */ #ifdef _SIMPLE_HASH_ @@ -118,7 +115,7 @@ ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg, return fx; } -ccl_device void pmj_sample_2D(ccl_global const KernelGlobals *kg, +ccl_device void pmj_sample_2D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension, diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 33d0c09a32a..a7a95918b4e 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -45,7 +45,7 @@ typedef struct LightSample { /* Regular Light */ template -ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg, +ccl_device_inline bool light_sample(KernelGlobals kg, const int lamp, const float randu, const float randv, @@ -209,7 +209,7 @@ ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg, return (ls->pdf > 0.0f); } -ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device bool lights_intersect(KernelGlobals kg, ccl_private const Ray *ccl_restrict ray, ccl_private Intersection *ccl_restrict isect, const int last_prim, @@ -298,7 +298,7 @@ ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg return isect->prim != PRIM_NONE; } -ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device bool light_sample_from_distant_ray(KernelGlobals kg, const float3 ray_D, const int lamp, ccl_private LightSample *ccl_restrict ls) @@ -362,7 +362,7 @@ ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *cc return true; } -ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device bool light_sample_from_intersection(KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect, const float3 ray_P, const float3 ray_D, @@ -464,7 +464,7 @@ ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *c /* returns true if the triangle is has motion blur or an instancing transform applied */ ccl_device_inline bool triangle_world_space_vertices( - ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 V[3]) + KernelGlobals kg, int object, int prim, float time, float3 V[3]) { bool has_motion = false; const int object_flag = kernel_tex_fetch(__object_flag, object); @@ -492,7 +492,7 @@ ccl_device_inline bool triangle_world_space_vertices( return has_motion; } -ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals *kg, +ccl_device_inline float triangle_light_pdf_area(KernelGlobals kg, const float3 Ng, const float3 I, float t) @@ -506,7 +506,7 @@ ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals * return t * t * pdf / cos_pi; } -ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals *kg, +ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg, ccl_private const ShaderData *sd, float t) { @@ -578,7 +578,7 @@ ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals * } template -ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals *kg, +ccl_device_forceinline void triangle_light_sample(KernelGlobals kg, int prim, int object, float randu, @@ -747,8 +747,7 @@ ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals /* Light Distribution */ -ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg, - ccl_private float *randu) +ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *randu) { /* This is basically std::upper_bound as used by PBRT, to find a point light or * triangle to emit from, proportional to area. a good improvement would be to @@ -786,15 +785,13 @@ ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg, /* Generic Light */ -ccl_device_inline bool light_select_reached_max_bounces(ccl_global const KernelGlobals *kg, - int index, - int bounce) +ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals kg, int index, int bounce) { return (bounce > kernel_tex_fetch(__lights, index).max_bounces); } template -ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobals *kg, +ccl_device_noinline bool light_distribution_sample(KernelGlobals kg, float randu, const float randv, const float time, @@ -834,20 +831,19 @@ ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobal return light_sample(kg, lamp, randu, randv, P, path_flag, ls); } -ccl_device_inline bool light_distribution_sample_from_volume_segment( - ccl_global const KernelGlobals *kg, - float randu, - const float randv, - const float time, - const float3 P, - const int bounce, - const int path_flag, - ccl_private LightSample *ls) +ccl_device_inline bool light_distribution_sample_from_volume_segment(KernelGlobals kg, + float randu, + const float randv, + const float time, + const float3 P, + const int bounce, + const int path_flag, + ccl_private LightSample *ls) { return light_distribution_sample(kg, randu, randv, time, P, bounce, path_flag, ls); } -ccl_device_inline bool light_distribution_sample_from_position(ccl_global const KernelGlobals *kg, +ccl_device_inline bool light_distribution_sample_from_position(KernelGlobals kg, float randu, const float randv, const float time, @@ -859,7 +855,7 @@ ccl_device_inline bool light_distribution_sample_from_position(ccl_global const return light_distribution_sample(kg, randu, randv, time, P, bounce, path_flag, ls); } -ccl_device_inline bool light_distribution_sample_new_position(ccl_global const KernelGlobals *kg, +ccl_device_inline bool light_distribution_sample_new_position(KernelGlobals kg, const float randu, const float randv, const float time, diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h index 3669ff50455..2e828b8b765 100644 --- a/intern/cycles/kernel/kernel_light_background.h +++ b/intern/cycles/kernel/kernel_light_background.h @@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN #ifdef __BACKGROUND_MIS__ -ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg, +ccl_device float3 background_map_sample(KernelGlobals kg, float randu, float randv, ccl_private float *pdf) @@ -109,7 +109,7 @@ ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg, /* TODO(sergey): Same as above, after the release we should consider using * 'noinline' for all devices. */ -ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 direction) +ccl_device float background_map_pdf(KernelGlobals kg, float3 direction) { float2 uv = direction_to_equirectangular(direction); int res_x = kernel_data.background.map_res_x; @@ -143,11 +143,7 @@ ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 d } ccl_device_inline bool background_portal_data_fetch_and_check_side( - ccl_global const KernelGlobals *kg, - float3 P, - int index, - ccl_private float3 *lightpos, - ccl_private float3 *dir) + KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir) { int portal = kernel_data.background.portal_offset + index; const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); @@ -162,11 +158,8 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side( return false; } -ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg, - float3 P, - float3 direction, - int ignore_portal, - ccl_private bool *is_possible) +ccl_device_inline float background_portal_pdf( + KernelGlobals kg, float3 P, float3 direction, int ignore_portal, ccl_private bool *is_possible) { float portal_pdf = 0.0f; @@ -226,7 +219,7 @@ ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg return (num_possible > 0) ? portal_pdf / num_possible : 0.0f; } -ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *kg, float3 P) +ccl_device int background_num_possible_portals(KernelGlobals kg, float3 P) { int num_possible_portals = 0; for (int p = 0; p < kernel_data.background.num_portals; p++) { @@ -237,7 +230,7 @@ ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *k return num_possible_portals; } -ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg, +ccl_device float3 background_portal_sample(KernelGlobals kg, float3 P, float randu, float randv, @@ -292,7 +285,7 @@ ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg, return zero_float3(); } -ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *kg, +ccl_device_inline float3 background_sun_sample(KernelGlobals kg, float randu, float randv, ccl_private float *pdf) @@ -304,7 +297,7 @@ ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *k return D; } -ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, float3 D) +ccl_device_inline float background_sun_pdf(KernelGlobals kg, float3 D) { const float3 N = float4_to_float3(kernel_data.background.sun); const float angle = kernel_data.background.sun.w; @@ -312,7 +305,7 @@ ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, f } ccl_device_inline float3 background_light_sample( - ccl_global const KernelGlobals *kg, float3 P, float randu, float randv, ccl_private float *pdf) + KernelGlobals kg, float3 P, float randu, float randv, ccl_private float *pdf) { float portal_method_pdf = kernel_data.background.portal_weight; float sun_method_pdf = kernel_data.background.sun_weight; @@ -412,9 +405,7 @@ ccl_device_inline float3 background_light_sample( return D; } -ccl_device float background_light_pdf(ccl_global const KernelGlobals *kg, - float3 P, - float3 direction) +ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 direction) { float portal_method_pdf = kernel_data.background.portal_weight; float sun_method_pdf = kernel_data.background.sun_weight; diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h index 9421ac462e2..9e2b738f376 100644 --- a/intern/cycles/kernel/kernel_light_common.h +++ b/intern/cycles/kernel/kernel_light_common.h @@ -214,10 +214,7 @@ ccl_device bool light_spread_clamp_area_light(const float3 P, return true; } -ccl_device float lamp_light_pdf(ccl_global const KernelGlobals *kg, - const float3 Ng, - const float3 I, - float t) +ccl_device float lamp_light_pdf(KernelGlobals kg, const float3 Ng, const float3 I, float t) { float cos_pi = dot(Ng, I); diff --git a/intern/cycles/kernel/kernel_lookup_table.h b/intern/cycles/kernel/kernel_lookup_table.h index 3c8577af417..2c26e668d7b 100644 --- a/intern/cycles/kernel/kernel_lookup_table.h +++ b/intern/cycles/kernel/kernel_lookup_table.h @@ -20,10 +20,7 @@ CCL_NAMESPACE_BEGIN /* Interpolated lookup table access */ -ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg, - float x, - int offset, - int size) +ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size) { x = saturate(x) * (size - 1); @@ -40,7 +37,7 @@ ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg, } ccl_device float lookup_table_read_2D( - ccl_global const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize) + KernelGlobals kg, float x, float y, int offset, int xsize, int ysize) { y = saturate(y) * (ysize - 1); diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index b981e750dda..4d05b63bfbd 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -25,9 +25,9 @@ CCL_NAMESPACE_BEGIN /* Get pointer to pixel in render buffer. */ ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer( - INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer) + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) { - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; return render_buffer + render_buffer_offset; @@ -36,11 +36,12 @@ ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer( #ifdef __DENOISING_FEATURES__ ccl_device_forceinline void kernel_write_denoising_features_surface( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *sd, ccl_global float *ccl_restrict render_buffer) { - if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DENOISING_FEATURES)) { + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { return; } @@ -49,7 +50,7 @@ ccl_device_forceinline void kernel_write_denoising_features_surface( return; } - ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer); + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); float3 normal = zero_float3(); float3 diffuse_albedo = zero_float3(); @@ -109,32 +110,34 @@ ccl_device_forceinline void kernel_write_denoising_features_surface( } if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, - denoising_feature_throughput); + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * diffuse_albedo); kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); } - INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; } else { - INTEGRATOR_STATE_WRITE(path, denoising_feature_throughput) *= specular_albedo; + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; } } -ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_STATE_ARGS, +ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg, + IntegratorState state, const float3 albedo, const bool scatter, ccl_global float *ccl_restrict render_buffer) { - ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer); - const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, denoising_feature_throughput); + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ - INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; /* Write view direction as normal. */ const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); @@ -153,7 +156,8 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_ST /* Write shadow catcher passes on a bounce from the shadow catcher object. */ ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - INTEGRATOR_STATE_ARGS, + KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *sd, ccl_global float *ccl_restrict render_buffer) { @@ -164,18 +168,18 @@ ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - if (!kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_PASS, sd->object_flag)) { + if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) { return; } - ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer); + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); /* Count sample for the shadow catcher object. */ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); /* Since the split is done, the sample does not contribute to the matte, so accumulate it as * transparency to the matte. */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, average(throughput)); } @@ -191,12 +195,13 @@ ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buf return depth * 4; } -ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS, +ccl_device_inline void kernel_write_data_passes(KernelGlobals kg, + IntegratorState state, ccl_private const ShaderData *sd, ccl_global float *ccl_restrict render_buffer) { #ifdef __PASSES__ - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); if (!(path_flag & PATH_RAY_CAMERA)) { return; @@ -208,12 +213,12 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS, return; } - ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer); + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { - if (INTEGRATOR_STATE(path, sample) == 0) { + if (INTEGRATOR_STATE(state, path, sample) == 0) { if (flag & PASSMASK(DEPTH)) { const float depth = camera_z_depth(kg, sd->P); kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); @@ -250,12 +255,12 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS, kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); } - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SINGLE_PASS_DONE; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; } } if (kernel_data.film.cryptomatte_passes) { - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd))); if (matte_weight > 0.0f) { @@ -279,17 +284,17 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS, } if (flag & PASSMASK(DIFFUSE_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, shader_bsdf_diffuse(kg, sd) * throughput); } if (flag & PASSMASK(GLOSSY_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, shader_bsdf_glossy(kg, sd) * throughput); } if (flag & PASSMASK(TRANSMISSION_COLOR)) { - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, shader_bsdf_transmission(kg, sd) * throughput); } @@ -314,7 +319,7 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS, mist = powf(mist, mist_falloff); /* Modulate by transparency */ - const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); const float3 alpha = shader_bsdf_alpha(kg, sd); const float mist_output = (1.0f - mist) * average(throughput * alpha); diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index e04ed5b1cc1..66eb468fdca 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -23,71 +23,73 @@ CCL_NAMESPACE_BEGIN /* Initialize queues, so that the this path is considered terminated. * Used for early outputs in the camera ray initialization, as well as initialization of split * states for shadow catcher. */ -ccl_device_inline void path_state_init_queues(INTEGRATOR_STATE_ARGS) +ccl_device_inline void path_state_init_queues(IntegratorState state) { - INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; - INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; } /* Minimalistic initialization of the path state, which is needed for early outputs in the * integrator initialization to work. */ -ccl_device_inline void path_state_init(INTEGRATOR_STATE_ARGS, +ccl_device_inline void path_state_init(IntegratorState state, ccl_global const KernelWorkTile *ccl_restrict tile, const int x, const int y) { const uint render_pixel_index = (uint)tile->offset + x + y * tile->stride; - INTEGRATOR_STATE_WRITE(path, render_pixel_index) = render_pixel_index; + INTEGRATOR_STATE_WRITE(state, path, render_pixel_index) = render_pixel_index; - path_state_init_queues(INTEGRATOR_STATE_PASS); + path_state_init_queues(state); } /* Initialize the rest of the path state needed to continue the path integration. */ -ccl_device_inline void path_state_init_integrator(INTEGRATOR_STATE_ARGS, +ccl_device_inline void path_state_init_integrator(KernelGlobals kg, + IntegratorState state, const int sample, const uint rng_hash) { - INTEGRATOR_STATE_WRITE(path, sample) = sample; - INTEGRATOR_STATE_WRITE(path, bounce) = 0; - INTEGRATOR_STATE_WRITE(path, diffuse_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, glossy_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, transmission_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, transparent_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, volume_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, volume_bounds_bounce) = 0; - INTEGRATOR_STATE_WRITE(path, rng_hash) = rng_hash; - INTEGRATOR_STATE_WRITE(path, rng_offset) = PRNG_BASE_NUM; - INTEGRATOR_STATE_WRITE(path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | - PATH_RAY_TRANSPARENT_BACKGROUND; - INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = 0.0f; - INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f; - INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = FLT_MAX; - INTEGRATOR_STATE_WRITE(path, throughput) = make_float3(1.0f, 1.0f, 1.0f); + INTEGRATOR_STATE_WRITE(state, path, sample) = sample; + INTEGRATOR_STATE_WRITE(state, path, bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0; + INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM; + INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | + PATH_RAY_TRANSPARENT_BACKGROUND; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, object) = OBJECT_NONE; - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = kernel_data.background.volume_shader; - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, object) = OBJECT_NONE; - INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE( + state, volume_stack, 0, shader) = kernel_data.background.volume_shader; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE; } #ifdef __DENOISING_FEATURES__ if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) { - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_DENOISING_FEATURES; - INTEGRATOR_STATE_WRITE(path, denoising_feature_throughput) = one_float3(); + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES; + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3(); } #endif } -ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) +ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label) { - uint32_t flag = INTEGRATOR_STATE(path, flag); + uint32_t flag = INTEGRATOR_STATE(state, path, flag); /* ray through transparent keeps same flags from previous ray and is * not counted as a regular bounce, transparent has separate max */ if (label & LABEL_TRANSPARENT) { - uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1; + uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1; flag |= PATH_RAY_TRANSPARENT; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { @@ -97,14 +99,14 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) if (!kernel_data.integrator.transparent_shadows) flag |= PATH_RAY_MIS_SKIP; - INTEGRATOR_STATE_WRITE(path, flag) = flag; - INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(state, path, flag) = flag; + INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; /* Random number generator next bounce. */ - INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; return; } - uint32_t bounce = INTEGRATOR_STATE(path, bounce) + 1; + uint32_t bounce = INTEGRATOR_STATE(state, path, bounce) + 1; if (bounce >= kernel_data.integrator.max_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } @@ -120,8 +122,8 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) flag |= PATH_RAY_VOLUME_PASS; } - const int volume_bounce = INTEGRATOR_STATE(path, volume_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, volume_bounce) = volume_bounce; + const int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = volume_bounce; if (volume_bounce >= kernel_data.integrator.max_volume_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } @@ -135,15 +137,15 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; if (label & LABEL_DIFFUSE) { - const int diffuse_bounce = INTEGRATOR_STATE(path, diffuse_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, diffuse_bounce) = diffuse_bounce; + const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce; if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } } else { - const int glossy_bounce = INTEGRATOR_STATE(path, glossy_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, glossy_bounce) = glossy_bounce; + const int glossy_bounce = INTEGRATOR_STATE(state, path, glossy_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = glossy_bounce; if (glossy_bounce >= kernel_data.integrator.max_glossy_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } @@ -158,8 +160,8 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; } - const int transmission_bounce = INTEGRATOR_STATE(path, transmission_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, transmission_bounce) = transmission_bounce; + const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce; if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) { flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; } @@ -183,36 +185,36 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label) } } - INTEGRATOR_STATE_WRITE(path, flag) = flag; - INTEGRATOR_STATE_WRITE(path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(state, path, flag) = flag; + INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce; /* Random number generator next bounce. */ - INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; } #ifdef __VOLUME__ -ccl_device_inline bool path_state_volume_next(INTEGRATOR_STATE_ARGS) +ccl_device_inline bool path_state_volume_next(IntegratorState state) { /* For volume bounding meshes we pass through without counting transparent * bounces, only sanity check in case self intersection gets us stuck. */ - uint32_t volume_bounds_bounce = INTEGRATOR_STATE(path, volume_bounds_bounce) + 1; - INTEGRATOR_STATE_WRITE(path, volume_bounds_bounce) = volume_bounds_bounce; + uint32_t volume_bounds_bounce = INTEGRATOR_STATE(state, path, volume_bounds_bounce) + 1; + INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = volume_bounds_bounce; if (volume_bounds_bounce > VOLUME_BOUNDS_MAX) { return false; } /* Random number generator next bounce. */ if (volume_bounds_bounce > 1) { - INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; } return true; } #endif -ccl_device_inline uint path_state_ray_visibility(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_inline uint path_state_ray_visibility(ConstIntegratorState state) { - const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); uint32_t visibility = path_flag & PATH_RAY_ALL_VISIBILITY; @@ -231,18 +233,19 @@ ccl_device_inline uint path_state_ray_visibility(INTEGRATOR_STATE_CONST_ARGS) return visibility; } -ccl_device_inline float path_state_continuation_probability(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline float path_state_continuation_probability(KernelGlobals kg, + ConstIntegratorState state, const uint32_t path_flag) { if (path_flag & PATH_RAY_TRANSPARENT) { - const uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce); + const uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); /* Do at least specified number of bounces without RR. */ if (transparent_bounce <= kernel_data.integrator.transparent_min_bounce) { return 1.0f; } } else { - const uint32_t bounce = INTEGRATOR_STATE(path, bounce); + const uint32_t bounce = INTEGRATOR_STATE(state, path, bounce); /* Do at least specified number of bounces without RR. */ if (bounce <= kernel_data.integrator.min_bounce) { return 1.0f; @@ -251,17 +254,18 @@ ccl_device_inline float path_state_continuation_probability(INTEGRATOR_STATE_CON /* Probabilistic termination: use sqrt() to roughly match typical view * transform and do path termination a bit later on average. */ - return min(sqrtf(max3(fabs(INTEGRATOR_STATE(path, throughput)))), 1.0f); + return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); } -ccl_device_inline bool path_state_ao_bounce(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state) { if (!kernel_data.integrator.ao_bounces) { return false; } - const int bounce = INTEGRATOR_STATE(path, bounce) - INTEGRATOR_STATE(path, transmission_bounce) - - (INTEGRATOR_STATE(path, glossy_bounce) > 0) + 1; + const int bounce = INTEGRATOR_STATE(state, path, bounce) - + INTEGRATOR_STATE(state, path, transmission_bounce) - + (INTEGRATOR_STATE(state, path, glossy_bounce) > 0) + 1; return (bounce > kernel_data.integrator.ao_bounces); } @@ -281,26 +285,27 @@ typedef struct RNGState { int sample; } RNGState; -ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void path_state_rng_load(ConstIntegratorState state, ccl_private RNGState *rng_state) { - rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash); - rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset); - rng_state->sample = INTEGRATOR_STATE(path, sample); + rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash); + rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset); + rng_state->sample = INTEGRATOR_STATE(state, path, sample); } -ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorState state, ccl_private RNGState *rng_state) { - const uint shadow_bounces = INTEGRATOR_STATE(shadow_path, transparent_bounce) - - INTEGRATOR_STATE(path, transparent_bounce); + const uint shadow_bounces = INTEGRATOR_STATE(state, shadow_path, transparent_bounce) - + INTEGRATOR_STATE(state, path, transparent_bounce); - rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash); - rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset) + PRNG_BOUNCE_NUM * shadow_bounces; - rng_state->sample = INTEGRATOR_STATE(path, sample); + rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash); + rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset) + + PRNG_BOUNCE_NUM * shadow_bounces; + rng_state->sample = INTEGRATOR_STATE(state, path, sample); } -ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg, +ccl_device_inline float path_state_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, int dimension) { @@ -308,7 +313,7 @@ ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg, kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg, +ccl_device_inline void path_state_rng_2D(KernelGlobals kg, ccl_private const RNGState *rng_state, int dimension, ccl_private float *fx, @@ -318,7 +323,7 @@ ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg, kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy); } -ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *kg, +ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, ccl_private const RNGState *rng_state, uint hash) { @@ -329,7 +334,7 @@ ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *k kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); } -ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg, +ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, int branch, int num_branches, @@ -341,7 +346,7 @@ ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg, rng_state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg, +ccl_device_inline void path_branched_rng_2D(KernelGlobals kg, ccl_private const RNGState *rng_state, int branch, int num_branches, @@ -360,7 +365,7 @@ ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg, /* Utility functions to get light termination value, * since it might not be needed in many cases. */ -ccl_device_inline float path_state_rng_light_termination(ccl_global const KernelGlobals *kg, +ccl_device_inline float path_state_rng_light_termination(KernelGlobals kg, ccl_private const RNGState *state) { if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 7db4289acec..e5e87453611 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN */ # define SOBOL_SKIP 64 -ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, int dimension) +ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension) { uint result = 0; uint i = index + SOBOL_SKIP; @@ -51,7 +51,7 @@ ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, i #endif /* __SOBOL__ */ -ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg, +ccl_device_forceinline float path_rng_1D(KernelGlobals kg, uint rng_hash, int sample, int dimension) @@ -85,7 +85,7 @@ ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg, #endif } -ccl_device_forceinline void path_rng_2D(ccl_global const KernelGlobals *kg, +ccl_device_forceinline void path_rng_2D(KernelGlobals kg, uint rng_hash, int sample, int dimension, @@ -141,7 +141,7 @@ ccl_device_inline uint hash_iqnt2d(const uint x, const uint y) return n; } -ccl_device_inline uint path_rng_hash_init(ccl_global const KernelGlobals *ccl_restrict kg, +ccl_device_inline uint path_rng_hash_init(KernelGlobals kg, const int sample, const int x, const int y) diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b5a52ff866d..4a57d22775a 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -104,7 +104,8 @@ ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases } #endif /* __VOLUME__ */ -ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd) { /* Defensive sampling. @@ -112,7 +113,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR * We can likely also do defensive sampling at deeper bounces, particularly * for cases like a perfect mirror but possibly also others. This will need * a good heuristic. */ - if (INTEGRATOR_STATE(path, bounce) + INTEGRATOR_STATE(path, transparent_bounce) == 0 && + if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == + 0 && sd->num_closure > 1) { float sum = 0.0f; @@ -136,7 +138,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR * Blurring of bsdf after bounces, for rays that have a small likelihood * of following this particular path (diffuse, rough glossy) */ if (kernel_data.integrator.filter_glossy != FLT_MAX) { - float blur_pdf = kernel_data.integrator.filter_glossy * INTEGRATOR_STATE(path, min_ray_pdf); + float blur_pdf = kernel_data.integrator.filter_glossy * + INTEGRATOR_STATE(state, path, min_ray_pdf); if (blur_pdf < 1.0f) { float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; @@ -182,7 +185,7 @@ ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_sh return false; } -ccl_device_inline float _shader_bsdf_multi_eval(ccl_global const KernelGlobals *kg, +ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg, ccl_private ShaderData *sd, const float3 omega_in, const bool is_transmission, @@ -226,7 +229,7 @@ ccl_device ccl_device_inline #endif float - shader_bsdf_eval(ccl_global const KernelGlobals *kg, + shader_bsdf_eval(KernelGlobals kg, ccl_private ShaderData *sd, const float3 omega_in, const bool is_transmission, @@ -306,7 +309,7 @@ shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, /* Sample direction for picked BSDF, and return evaluation and pdf for all * BSDFs combined using MIS. */ -ccl_device int shader_bsdf_sample_closure(ccl_global const KernelGlobals *kg, +ccl_device int shader_bsdf_sample_closure(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private const ShaderClosure *sc, float randu, @@ -360,8 +363,7 @@ ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd) return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; } -ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) { if (sd->flag & SD_HAS_ONLY_VOLUME) { return one_float3(); @@ -374,8 +376,7 @@ ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg, } } -ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd) +ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) { if (sd->flag & SD_TRANSPARENT) { for (int i = 0; i < sd->num_closure; i++) { @@ -391,8 +392,7 @@ ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals } } -ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd); @@ -402,8 +402,7 @@ ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg, return alpha; } -ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 eval = zero_float3(); @@ -417,8 +416,7 @@ ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg, return eval; } -ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 eval = zero_float3(); @@ -432,8 +430,7 @@ ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg, return eval; } -ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 eval = zero_float3(); @@ -447,8 +444,7 @@ ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg, return eval; } -ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 N = zero_float3(); @@ -461,8 +457,7 @@ ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg, return (is_zero(N)) ? sd->N : normalize(N); } -ccl_device float3 shader_bsdf_ao_normal(ccl_global const KernelGlobals *kg, - ccl_private const ShaderData *sd) +ccl_device float3 shader_bsdf_ao_normal(KernelGlobals kg, ccl_private const ShaderData *sd) { float3 N = zero_float3(); @@ -499,7 +494,7 @@ ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd) /* Constant emission optimization */ -ccl_device bool shader_constant_emission_eval(ccl_global const KernelGlobals *kg, +ccl_device bool shader_constant_emission_eval(KernelGlobals kg, int shader, ccl_private float3 *eval) { @@ -543,8 +538,7 @@ ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd) /* Holdout */ -ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd) +ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd) { float3 weight = zero_float3(); @@ -582,7 +576,8 @@ ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg, /* Surface Evaluation */ template -ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void shader_eval_surface(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *ccl_restrict sd, ccl_global float *ccl_restrict buffer, int path_flag) @@ -604,18 +599,17 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS, #ifdef __OSL__ if (kg->osl) { if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { - OSLShader::eval_background(INTEGRATOR_STATE_PASS, sd, path_flag); + OSLShader::eval_background(kg, state, sd, path_flag); } else { - OSLShader::eval_surface(INTEGRATOR_STATE_PASS, sd, path_flag); + OSLShader::eval_surface(kg, state, sd, path_flag); } } else #endif { #ifdef __SVM__ - svm_eval_nodes( - INTEGRATOR_STATE_PASS, sd, buffer, path_flag); + svm_eval_nodes(kg, state, sd, buffer, path_flag); #else if (sd->object == OBJECT_NONE) { sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); @@ -632,11 +626,14 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS, #endif } - if (KERNEL_NODES_FEATURE(BSDF) && (sd->flag & SD_BSDF_NEEDS_LCG)) { - sd->lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash), - INTEGRATOR_STATE(path, rng_offset), - INTEGRATOR_STATE(path, sample), - 0xb4bc3953); + IF_KERNEL_NODES_FEATURE(BSDF) + { + if (sd->flag & SD_BSDF_NEEDS_LCG) { + sd->lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash), + INTEGRATOR_STATE(state, path, rng_offset), + INTEGRATOR_STATE(state, path, sample), + 0xb4bc3953); + } } } @@ -672,7 +669,7 @@ ccl_device_inline float _shader_volume_phase_multi_eval( return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; } -ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg, +ccl_device float shader_volume_phase_eval(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private const ShaderVolumePhases *phases, const float3 omega_in, @@ -683,7 +680,7 @@ ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg, return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); } -ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg, +ccl_device int shader_volume_phase_sample(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private const ShaderVolumePhases *phases, float randu, @@ -742,7 +739,7 @@ ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg, return label; } -ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg, +ccl_device int shader_phase_sample_closure(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private const ShaderVolumeClosure *sc, float randu, @@ -767,7 +764,8 @@ ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg, /* Volume Evaluation */ template -ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void shader_eval_volume(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *ccl_restrict sd, const int path_flag, StackReadOp stack_read) @@ -820,13 +818,13 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS, # ifdef __SVM__ # ifdef __OSL__ if (kg->osl) { - OSLShader::eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag); + OSLShader::eval_volume(kg, state, sd, path_flag); } else # endif { svm_eval_nodes( - INTEGRATOR_STATE_PASS, sd, NULL, path_flag); + kg, state, sd, NULL, path_flag); } # endif @@ -843,7 +841,9 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS, /* Displacement Evaluation */ -ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_private ShaderData *sd) +ccl_device void shader_eval_displacement(KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd) { sd->num_closure = 0; sd->num_closure_left = 0; @@ -852,19 +852,19 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_privat #ifdef __SVM__ # ifdef __OSL__ if (kg->osl) - OSLShader::eval_displacement(INTEGRATOR_STATE_PASS, sd); + OSLShader::eval_displacement(kg, state, sd); else # endif { svm_eval_nodes( - INTEGRATOR_STATE_PASS, sd, NULL, 0); + kg, state, sd, NULL, 0); } #endif } /* Cryptomatte */ -ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader) +ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader) { return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; } diff --git a/intern/cycles/kernel/kernel_shadow_catcher.h b/intern/cycles/kernel/kernel_shadow_catcher.h index 824749818a4..8dc7a568b33 100644 --- a/intern/cycles/kernel/kernel_shadow_catcher.h +++ b/intern/cycles/kernel/kernel_shadow_catcher.h @@ -22,7 +22,8 @@ CCL_NAMESPACE_BEGIN /* Check whether current surface bounce is where path is to be split for the shadow catcher. */ -ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_ARGS, +ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals kg, + IntegratorState state, const int object_flag) { #ifdef __SHADOW_CATCHER__ @@ -38,7 +39,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STA return false; } - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); if ((path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) == 0) { /* Split only on primary rays, secondary bounces are to treat shadow catcher as a regular @@ -58,13 +59,14 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STA } /* Check whether the current path can still split. */ -ccl_device_inline bool kernel_shadow_catcher_path_can_split(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, + ConstIntegratorState state) { if (INTEGRATOR_PATH_IS_TERMINATED && INTEGRATOR_SHADOW_PATH_IS_TERMINATED) { return false; } - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) { /* Shadow catcher was already hit and the state was split. No further split is allowed. */ @@ -76,21 +78,23 @@ ccl_device_inline bool kernel_shadow_catcher_path_can_split(INTEGRATOR_STATE_CON /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths * after this function. */ -ccl_device_inline bool kernel_shadow_catcher_split(INTEGRATOR_STATE_ARGS, const int object_flags) +ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg, + IntegratorState state, + const int object_flags) { #ifdef __SHADOW_CATCHER__ - if (!kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_PASS, object_flags)) { + if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) { return false; } /* The split is to be done. Mark the current state as such, so that it stops contributing to the * shadow catcher matte pass, but keeps contributing to the combined pass. */ - INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; /* Split new state from the current one. This new state will only track contribution of shadow * catcher objects ignoring non-catcher objects. */ - integrator_state_shadow_catcher_split(INTEGRATOR_STATE_PASS); + integrator_state_shadow_catcher_split(kg, state); return true; #else @@ -101,14 +105,16 @@ ccl_device_inline bool kernel_shadow_catcher_split(INTEGRATOR_STATE_ARGS, const #ifdef __SHADOW_CATCHER__ -ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(KernelGlobals kg, + ConstIntegratorState state) { - return (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_HIT) == 0; + return (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_HIT) == 0; } -ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_CONST_ARGS) +ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(KernelGlobals kg, + ConstIntegratorState state) { - return INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_PASS; + return INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_PASS; } #endif /* __SHADOW_CATCHER__ */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 3a5a11d2c10..5625c0e4d19 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -813,7 +813,7 @@ typedef struct ccl_align(16) ShaderData float ray_dP; #ifdef __OSL__ - const struct KernelGlobals *osl_globals; + const struct KernelGlobalsCPU *osl_globals; const struct IntegratorStateCPU *osl_path_state; #endif @@ -1505,63 +1505,77 @@ enum KernelFeatureFlag : unsigned int { KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 5U), KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 6U), KERNEL_FEATURE_NODE_RAYTRACE = (1U << 7U), + KERNEL_FEATURE_NODE_AOV = (1U << 8U), + KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 9U), /* Use denoising kernels and output denoising passes. */ - KERNEL_FEATURE_DENOISING = (1U << 8U), + KERNEL_FEATURE_DENOISING = (1U << 10U), /* Use path tracing kernels. */ - KERNEL_FEATURE_PATH_TRACING = (1U << 9U), + KERNEL_FEATURE_PATH_TRACING = (1U << 11U), /* BVH/sampling kernel features. */ - KERNEL_FEATURE_HAIR = (1U << 10U), - KERNEL_FEATURE_HAIR_THICK = (1U << 11U), - KERNEL_FEATURE_OBJECT_MOTION = (1U << 12U), - KERNEL_FEATURE_CAMERA_MOTION = (1U << 13U), + KERNEL_FEATURE_HAIR = (1U << 12U), + KERNEL_FEATURE_HAIR_THICK = (1U << 13U), + KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U), + KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U), /* Denotes whether baking functionality is needed. */ - KERNEL_FEATURE_BAKING = (1U << 14U), + KERNEL_FEATURE_BAKING = (1U << 16U), /* Use subsurface scattering materials. */ - KERNEL_FEATURE_SUBSURFACE = (1U << 15U), + KERNEL_FEATURE_SUBSURFACE = (1U << 17U), /* Use volume materials. */ - KERNEL_FEATURE_VOLUME = (1U << 16U), + KERNEL_FEATURE_VOLUME = (1U << 18U), /* Use OpenSubdiv patch evaluation */ - KERNEL_FEATURE_PATCH_EVALUATION = (1U << 17U), + KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U), /* Use Transparent shadows */ - KERNEL_FEATURE_TRANSPARENT = (1U << 18U), + KERNEL_FEATURE_TRANSPARENT = (1U << 20U), /* Use shadow catcher. */ - KERNEL_FEATURE_SHADOW_CATCHER = (1U << 19U), + KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U), /* Per-uber shader usage flags. */ - KERNEL_FEATURE_PRINCIPLED = (1U << 20U), + KERNEL_FEATURE_PRINCIPLED = (1U << 22U), /* Light render passes. */ - KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U), + KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U), /* Shadow render pass. */ - KERNEL_FEATURE_SHADOW_PASS = (1U << 22U), + KERNEL_FEATURE_SHADOW_PASS = (1U << 24U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ #define KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT \ - (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA) + (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \ + KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \ (KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \ KERNEL_FEATURE_NODE_HAIR | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \ - KERNEL_FEATURE_NODE_VORONOI_EXTRA) + KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_SURFACE \ - (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE) + (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \ + KERNEL_FEATURE_NODE_AOV | KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_VOLUME \ - (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | KERNEL_FEATURE_NODE_VORONOI_EXTRA) + (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \ + KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_DISPLACEMENT \ (KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE) #define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT -#define KERNEL_NODES_FEATURE(feature) ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) +/* Must be constexpr on the CPU to avoid compile errors because the state types + * are different depending on the main, shadow or null path. For GPU we don't have + * C++17 everywhere so can't use it. */ +#ifdef __KERNEL_CPU__ +# define IF_KERNEL_NODES_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) +#else +# define IF_KERNEL_NODES_FEATURE(feature) \ + if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) +#endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index e814fcca246..94712a4dd13 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -500,7 +500,7 @@ bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering) { /* caustic options */ if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 2c7f5eb4948..bb7655fbe9a 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -149,7 +149,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; int object = sd->object; if (object != OBJECT_NONE) { @@ -187,7 +187,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; int object = sd->object; if (object != OBJECT_NONE) { @@ -222,7 +222,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, float time) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if (from == u_ndc) { copy_matrix(result, kernel_data.cam.ndctoworld); @@ -254,7 +254,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, float time) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if (to == u_ndc) { copy_matrix(result, kernel_data.cam.worldtondc); @@ -288,7 +288,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; int object = sd->object; if (object != OBJECT_NONE) { @@ -316,7 +316,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; int object = sd->object; if (object != OBJECT_NONE) { @@ -339,7 +339,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if (from == u_ndc) { copy_matrix(result, kernel_data.cam.ndctoworld); @@ -366,7 +366,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, ustring to) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if (to == u_ndc) { copy_matrix(result, kernel_data.cam.worldtondc); @@ -745,7 +745,7 @@ static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val) return false; } -static bool get_primitive_attribute(const KernelGlobals *kg, +static bool get_primitive_attribute(const KernelGlobalsCPU *kg, const ShaderData *sd, const OSLGlobals::Attribute &attr, const TypeDesc &type, @@ -806,7 +806,7 @@ static bool get_primitive_attribute(const KernelGlobals *kg, } } -static bool get_mesh_attribute(const KernelGlobals *kg, +static bool get_mesh_attribute(const KernelGlobalsCPU *kg, const ShaderData *sd, const OSLGlobals::Attribute &attr, const TypeDesc &type, @@ -855,7 +855,7 @@ static bool get_object_attribute(const OSLGlobals::Attribute &attr, } } -bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg, +bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg, ShaderData *sd, ustring name, TypeDesc type, @@ -1000,7 +1000,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg, } } -bool OSLRenderServices::get_background_attribute(const KernelGlobals *kg, +bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, ShaderData *sd, ustring name, TypeDesc type, @@ -1091,7 +1091,7 @@ bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool OSLRenderServices::get_attribute( ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val) { - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; int prim_type = 0; int object; @@ -1220,7 +1220,7 @@ bool OSLRenderServices::texture(ustring filename, OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle; OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO; ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kernel_globals = sd->osl_globals; + KernelGlobals kernel_globals = sd->osl_globals; bool status = false; switch (texture_type) { @@ -1367,7 +1367,7 @@ bool OSLRenderServices::texture3d(ustring filename, case OSLTextureHandle::SVM: { /* Packed texture. */ ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kernel_globals = sd->osl_globals; + KernelGlobals kernel_globals = sd->osl_globals; int slot = handle->svm_slot; float3 P_float3 = make_float3(P.x, P.y, P.z); float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE); @@ -1389,7 +1389,7 @@ bool OSLRenderServices::texture3d(ustring filename, if (handle && handle->oiio_handle) { if (texture_thread_info == NULL) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kernel_globals = sd->osl_globals; + KernelGlobals kernel_globals = sd->osl_globals; OSLThreadData *tdata = kernel_globals->osl_tdata; texture_thread_info = tdata->oiio_thread_info; } @@ -1474,7 +1474,7 @@ bool OSLRenderServices::environment(ustring filename, if (handle && handle->oiio_handle) { if (thread_info == NULL) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobals *kernel_globals = sd->osl_globals; + KernelGlobals kernel_globals = sd->osl_globals; OSLThreadData *tdata = kernel_globals->osl_tdata; thread_info = tdata->oiio_thread_info; } @@ -1629,7 +1629,7 @@ bool OSLRenderServices::trace(TraceOpt &options, tracedata->hit = false; tracedata->sd.osl_globals = sd->osl_globals; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; /* Can't raytrace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { @@ -1662,7 +1662,7 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, } else { ShaderData *sd = &tracedata->sd; - const KernelGlobals *kg = sd->osl_globals; + const KernelGlobalsCPU *kg = sd->osl_globals; if (!tracedata->setup) { /* lazy shader data setup */ diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index a9671485eda..d9f57c642ad 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -40,7 +40,7 @@ class Scene; class Shader; struct ShaderData; struct float3; -struct KernelGlobals; +struct KernelGlobalsCPU; /* OSL Texture Handle * @@ -250,13 +250,13 @@ class OSLRenderServices : public OSL::RendererServices { void *data) override; #endif - static bool get_background_attribute(const KernelGlobals *kg, + static bool get_background_attribute(const KernelGlobalsCPU *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val); - static bool get_object_standard_attribute(const KernelGlobals *kg, + static bool get_object_standard_attribute(const KernelGlobalsCPU *kg, ShaderData *sd, ustring name, TypeDesc type, diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 880ef635c76..a1df63ca8ff 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN /* Threads */ -void OSLShader::thread_init(KernelGlobals *kg, OSLGlobals *osl_globals) +void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals) { /* no osl used? */ if (!osl_globals->use) { @@ -67,7 +67,7 @@ void OSLShader::thread_init(KernelGlobals *kg, OSLGlobals *osl_globals) kg->osl_tdata = tdata; } -void OSLShader::thread_free(KernelGlobals *kg) +void OSLShader::thread_free(KernelGlobalsCPU *kg) { if (!kg->osl) return; @@ -87,7 +87,7 @@ void OSLShader::thread_free(KernelGlobals *kg) /* Globals */ -static void shaderdata_to_shaderglobals(const KernelGlobals *kg, +static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg, ShaderData *sd, const IntegratorStateCPU *state, int path_flag, @@ -174,7 +174,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, } } -void OSLShader::eval_surface(const KernelGlobals *kg, +void OSLShader::eval_surface(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag) @@ -282,7 +282,7 @@ static void flatten_background_closure_tree(ShaderData *sd, } } -void OSLShader::eval_background(const KernelGlobals *kg, +void OSLShader::eval_background(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag) @@ -340,7 +340,7 @@ static void flatten_volume_closure_tree(ShaderData *sd, } } -void OSLShader::eval_volume(const KernelGlobals *kg, +void OSLShader::eval_volume(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag) @@ -366,7 +366,7 @@ void OSLShader::eval_volume(const KernelGlobals *kg, /* Displacement */ -void OSLShader::eval_displacement(const KernelGlobals *kg, +void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd) { @@ -391,7 +391,7 @@ void OSLShader::eval_displacement(const KernelGlobals *kg, /* Attributes */ -int OSLShader::find_attribute(const KernelGlobals *kg, +int OSLShader::find_attribute(const KernelGlobalsCPU *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc) diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index f1f17b141eb..686a1e1374a 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -39,7 +39,7 @@ struct ShaderClosure; struct ShaderData; struct IntegratorStateCPU; struct differential3; -struct KernelGlobals; +struct KernelGlobalsCPU; struct OSLGlobals; struct OSLShadingSystem; @@ -50,28 +50,28 @@ class OSLShader { static void register_closures(OSLShadingSystem *ss); /* per thread data */ - static void thread_init(KernelGlobals *kg, OSLGlobals *osl_globals); - static void thread_free(KernelGlobals *kg); + static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals); + static void thread_free(KernelGlobalsCPU *kg); /* eval */ - static void eval_surface(const KernelGlobals *kg, + static void eval_surface(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag); - static void eval_background(const KernelGlobals *kg, + static void eval_background(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag); - static void eval_volume(const KernelGlobals *kg, + static void eval_volume(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd, int path_flag); - static void eval_displacement(const KernelGlobals *kg, + static void eval_displacement(const KernelGlobalsCPU *kg, const IntegratorStateCPU *state, ShaderData *sd); /* attributes */ - static int find_attribute(const KernelGlobals *kg, + static int find_attribute(const KernelGlobalsCPU *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc); diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 871e370123e..9692308c496 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -107,15 +107,14 @@ ccl_device_inline bool stack_valid(uint a) /* Reading Nodes */ -ccl_device_inline uint4 read_node(ccl_global const KernelGlobals *kg, ccl_private int *offset) +ccl_device_inline uint4 read_node(KernelGlobals kg, ccl_private int *offset) { uint4 node = kernel_tex_fetch(__svm_nodes, *offset); (*offset)++; return node; } -ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg, - ccl_private int *offset) +ccl_device_inline float4 read_node_float(KernelGlobals kg, ccl_private int *offset) { uint4 node = kernel_tex_fetch(__svm_nodes, *offset); float4 f = make_float4(__uint_as_float(node.x), @@ -126,7 +125,7 @@ ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg, return f; } -ccl_device_inline float4 fetch_node_float(ccl_global const KernelGlobals *kg, int offset) +ccl_device_inline float4 fetch_node_float(KernelGlobals kg, int offset) { uint4 node = kernel_tex_fetch(__svm_nodes, offset); return make_float4(__uint_as_float(node.x), @@ -227,7 +226,8 @@ CCL_NAMESPACE_BEGIN /* Main Interpreter Loop */ template -ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void svm_eval_nodes(KernelGlobals kg, + ConstIntegratorState state, ShaderData *sd, ccl_global float *render_buffer, int path_flag) @@ -257,12 +257,14 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, kg, sd, stack, node, path_flag, offset); break; case NODE_CLOSURE_EMISSION: - if (KERNEL_NODES_FEATURE(EMISSION)) { + IF_KERNEL_NODES_FEATURE(EMISSION) + { svm_node_closure_emission(sd, stack, node); } break; case NODE_CLOSURE_BACKGROUND: - if (KERNEL_NODES_FEATURE(EMISSION)) { + IF_KERNEL_NODES_FEATURE(EMISSION) + { svm_node_closure_background(sd, stack, node); } break; @@ -273,7 +275,8 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, svm_node_closure_weight(sd, stack, node.y); break; case NODE_EMISSION_WEIGHT: - if (KERNEL_NODES_FEATURE(EMISSION)) { + IF_KERNEL_NODES_FEATURE(EMISSION) + { svm_node_emission_weight(kg, sd, stack, node); } break; @@ -310,27 +313,32 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); break; case NODE_GEOMETRY_BUMP_DX: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); } break; case NODE_GEOMETRY_BUMP_DY: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); } break; case NODE_SET_DISPLACEMENT: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_set_displacement(kg, sd, stack, node.y); } break; case NODE_DISPLACEMENT: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_displacement(kg, sd, stack, node); } break; case NODE_VECTOR_DISPLACEMENT: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { offset = svm_node_vector_displacement(kg, sd, stack, node, offset); } break; @@ -344,52 +352,62 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); break; case NODE_SET_BUMP: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_set_bump(kg, sd, stack, node); } break; case NODE_ATTR_BUMP_DX: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_attr_bump_dx(kg, sd, stack, node); } break; case NODE_ATTR_BUMP_DY: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_attr_bump_dy(kg, sd, stack, node); } break; case NODE_VERTEX_COLOR_BUMP_DX: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); } break; case NODE_VERTEX_COLOR_BUMP_DY: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); } break; case NODE_TEX_COORD_BUMP_DX: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); } break; case NODE_TEX_COORD_BUMP_DY: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); } break; case NODE_CLOSURE_SET_NORMAL: - if (KERNEL_NODES_FEATURE(BUMP)) { + IF_KERNEL_NODES_FEATURE(BUMP) + { svm_node_set_normal(kg, sd, stack, node.y, node.z); } break; case NODE_ENTER_BUMP_EVAL: - if (KERNEL_NODES_FEATURE(BUMP_STATE)) { + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { svm_node_enter_bump_eval(kg, sd, stack, node.y); } break; case NODE_LEAVE_BUMP_EVAL: - if (KERNEL_NODES_FEATURE(BUMP_STATE)) { + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { svm_node_leave_bump_eval(kg, sd, stack, node.y); } break; @@ -407,12 +425,14 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, svm_node_layer_weight(sd, stack, node); break; case NODE_CLOSURE_VOLUME: - if (KERNEL_NODES_FEATURE(VOLUME)) { + IF_KERNEL_NODES_FEATURE(VOLUME) + { svm_node_closure_volume(kg, sd, stack, node); } break; case NODE_PRINCIPLED_VOLUME: - if (KERNEL_NODES_FEATURE(VOLUME)) { + IF_KERNEL_NODES_FEATURE(VOLUME) + { offset = svm_node_principled_volume(kg, sd, stack, node, path_flag, offset); } break; @@ -432,7 +452,7 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, svm_node_brightness(sd, stack, node.y, node.z, node.w); break; case NODE_LIGHT_PATH: - svm_node_light_path(INTEGRATOR_STATE_PASS, sd, stack, node.y, node.z, path_flag); + svm_node_light_path(kg, state, sd, stack, node.y, node.z, path_flag); break; case NODE_OBJECT_INFO: svm_node_object_info(kg, sd, stack, node.y, node.z); @@ -442,7 +462,8 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, break; #if defined(__HAIR__) case NODE_HAIR_INFO: - if (KERNEL_NODES_FEATURE(HAIR)) { + IF_KERNEL_NODES_FEATURE(HAIR) + { svm_node_hair_info(kg, sd, stack, node.y, node.z); } break; @@ -554,15 +575,16 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, break; #ifdef __SHADER_RAYTRACE__ case NODE_BEVEL: - svm_node_bevel(INTEGRATOR_STATE_PASS, sd, stack, node); + svm_node_bevel(kg, state, sd, stack, node); break; case NODE_AMBIENT_OCCLUSION: - svm_node_ao(INTEGRATOR_STATE_PASS, sd, stack, node); + svm_node_ao(kg, state, sd, stack, node); break; #endif case NODE_TEX_VOXEL: - if (KERNEL_NODES_FEATURE(VOLUME)) { + IF_KERNEL_NODES_FEATURE(VOLUME) + { offset = svm_node_tex_voxel(kg, sd, stack, node, offset); } break; @@ -572,10 +594,10 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS, } break; case NODE_AOV_COLOR: - svm_node_aov_color(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer); + svm_node_aov_color(kg, state, sd, stack, node, render_buffer); break; case NODE_AOV_VALUE: - svm_node_aov_value(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer); + svm_node_aov_value(kg, state, sd, stack, node, render_buffer); break; default: kernel_assert(!"Unknown node type was passed to the SVM machine"); diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 092f3817fd8..18d60c43b12 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -21,9 +21,11 @@ CCL_NAMESPACE_BEGIN #ifdef __SHADER_RAYTRACE__ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, +extern "C" __device__ float __direct_callable__svm_node_ao(KernelGlobals kg, + ConstIntegratorState state, # else -ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS, +ccl_device float svm_ao(KernelGlobals kg, + ConstIntegratorState state, # endif ccl_private ShaderData *sd, float3 N, @@ -54,7 +56,7 @@ ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS, /* TODO: support ray-tracing in shadow shader evaluation? */ RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); int unoccluded = 0; for (int sample = 0; sample < num_samples; sample++) { @@ -96,7 +98,8 @@ ccl_device_inline ccl_device_noinline # endif void - svm_node_ao(INTEGRATOR_STATE_CONST_ARGS, + svm_node_ao(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -112,11 +115,12 @@ ccl_device_noinline float ao = 1.0f; - if (KERNEL_NODES_FEATURE(RAYTRACE)) { + IF_KERNEL_NODES_FEATURE(RAYTRACE) + { # ifdef __KERNEL_OPTIX__ - ao = optixDirectCall(0, INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags); + ao = optixDirectCall(0, kg, state, sd, normal, dist, samples, flags); # else - ao = svm_ao(INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags); + ao = svm_ao(kg, state, sd, normal, dist, samples, flags); # endif } diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h index 640bec87ac9..d09eaa61cc0 100644 --- a/intern/cycles/kernel/svm/svm_aov.h +++ b/intern/cycles/kernel/svm/svm_aov.h @@ -25,7 +25,9 @@ ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float return ((render_buffer != NULL) && is_primary); } -ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS, +template +ccl_device void svm_node_aov_color(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -33,8 +35,9 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS, { float3 val = stack_load_float3(stack, node.y); - if (render_buffer && !INTEGRATOR_STATE_IS_NULL) { - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + IF_KERNEL_NODES_FEATURE(AOV) + { + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; ccl_global float *buffer = render_buffer + render_buffer_offset + @@ -43,7 +46,9 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS, } } -ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS, +template +ccl_device void svm_node_aov_value(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -51,8 +56,9 @@ ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS, { float val = stack_load_float(stack, node.y); - if (render_buffer && !INTEGRATOR_STATE_IS_NULL) { - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + IF_KERNEL_NODES_FEATURE(AOV) + { + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; ccl_global float *buffer = render_buffer + render_buffer_offset + diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 9fd401ba1c3..b3c66d29f5c 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Attribute Node */ -ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals *kg, +ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals kg, ccl_private ShaderData *sd, uint4 node, ccl_private NodeAttributeOutputType *type, @@ -48,7 +48,7 @@ ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals } template -ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_attr(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -58,7 +58,8 @@ ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg, AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); #ifdef __VOLUME__ - if (KERNEL_NODES_FEATURE(VOLUME)) { + IF_KERNEL_NODES_FEATURE(VOLUME) + { /* Volumes * NOTE: moving this into its own node type might help improve performance. */ if (primitive_is_volume_attribute(sd, desc)) { @@ -148,7 +149,7 @@ ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg, } } -ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -244,7 +245,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *k } } -ccl_device_noinline void svm_node_attr_bump_dy(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index a76584e6bc8..197562434f9 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -99,9 +99,11 @@ ccl_device void svm_bevel_cubic_sample(const float radius, */ # ifdef __KERNEL_OPTIX__ -extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, +extern "C" __device__ float3 __direct_callable__svm_node_bevel(KernelGlobals kg, + ConstIntegratorState state, # else -ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, +ccl_device float3 svm_bevel(KernelGlobals kg, + ConstIntegratorState state, # endif ccl_private ShaderData *sd, float radius, @@ -118,15 +120,15 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, } /* Don't bevel for blurry indirect rays. */ - if (INTEGRATOR_STATE(path, min_ray_pdf) < 8.0f) { + if (INTEGRATOR_STATE(state, path, min_ray_pdf) < 8.0f) { return sd->N; } /* Setup for multi intersection. */ LocalIntersection isect; - uint lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash), - INTEGRATOR_STATE(path, rng_offset), - INTEGRATOR_STATE(path, sample), + uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash), + INTEGRATOR_STATE(state, path, rng_offset), + INTEGRATOR_STATE(state, path, sample), 0x64c6a40e); /* Sample normals from surrounding points on surface. */ @@ -134,7 +136,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, /* TODO: support ray-tracing in shadow shader evaluation? */ RNGState rng_state; - path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + path_state_rng_load(state, &rng_state); for (int sample = 0; sample < num_samples; sample++) { float disk_u, disk_v; @@ -287,7 +289,8 @@ ccl_device_inline ccl_device_noinline # endif void - svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS, + svm_node_bevel(KernelGlobals kg, + ConstIntegratorState state, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -299,11 +302,12 @@ ccl_device_noinline float3 bevel_N = sd->N; - if (KERNEL_NODES_FEATURE(RAYTRACE)) { + IF_KERNEL_NODES_FEATURE(RAYTRACE) + { # ifdef __KERNEL_OPTIX__ - bevel_N = optixDirectCall(1, INTEGRATOR_STATE_PASS, sd, radius, num_samples); + bevel_N = optixDirectCall(1, kg, state, sd, radius, num_samples); # else - bevel_N = svm_bevel(INTEGRATOR_STATE_PASS, sd, radius, num_samples); + bevel_N = svm_bevel(kg, state, sd, radius, num_samples); # endif if (stack_valid(normal_offset)) { diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index 521afb42adc..f1adb0e76af 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN /* Blackbody Node */ -ccl_device_noinline void svm_node_blackbody(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_blackbody(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint temperature_offset, diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 29a8350f1c1..9dc31ef37ec 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -72,11 +72,8 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p, return make_float2(tint, mortar); } -ccl_device_noinline int svm_node_tex_brick(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_brick( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint4 node2 = read_node(kg, &offset); uint4 node3 = read_node(kg, &offset); diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h index 70935c730f4..66e5b665532 100644 --- a/intern/cycles/kernel/svm/svm_bump.h +++ b/intern/cycles/kernel/svm/svm_bump.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Bump Eval Nodes */ -ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint offset) @@ -45,7 +45,7 @@ ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals } } -ccl_device_noinline void svm_node_leave_bump_eval(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint offset) diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h index 2b786757af8..787f11f38b5 100644 --- a/intern/cycles/kernel/svm/svm_camera.h +++ b/intern/cycles/kernel/svm/svm_camera.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline void svm_node_camera(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_camera(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint out_vector, diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index e22367f4f59..9251d90c0e1 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -32,7 +32,7 @@ ccl_device float svm_checker(float3 p) return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f; } -ccl_device_noinline void svm_node_tex_checker(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_tex_checker(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h index cb5224aebb2..5b5ea784f4a 100644 --- a/intern/cycles/kernel/svm/svm_clamp.h +++ b/intern/cycles/kernel/svm/svm_clamp.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Clamp Node */ -ccl_device_noinline int svm_node_clamp(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_clamp(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint value_stack_offset, diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 87be73bb2cc..fb10288da72 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -61,8 +61,21 @@ ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd, } } +ccl_device_inline int svm_node_closure_bsdf_skip(KernelGlobals kg, int offset, uint type) +{ + if (type == CLOSURE_BSDF_PRINCIPLED_ID) { + /* Read all principled BSDF extra data to get the right offset. */ + read_node(kg, &offset); + read_node(kg, &offset); + read_node(kg, &offset); + read_node(kg, &offset); + } + + return offset; +} + template -ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -80,16 +93,15 @@ ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg uint4 data_node = read_node(kg, &offset); /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ - if ((!KERNEL_NODES_FEATURE(BSDF) || shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) { - if (type == CLOSURE_BSDF_PRINCIPLED_ID) { - /* Read all principled BSDF extra data to get the right offset. */ - read_node(kg, &offset); - read_node(kg, &offset); - read_node(kg, &offset); - read_node(kg, &offset); + IF_KERNEL_NODES_FEATURE(BSDF) + { + if ((shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) { + return svm_node_closure_bsdf_skip(kg, offset, type); } - - return offset; + } + else + { + return svm_node_closure_bsdf_skip(kg, offset, type); } float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N; @@ -944,7 +956,7 @@ ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg } template -ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -999,7 +1011,7 @@ ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals } template -ccl_device_noinline int svm_node_principled_volume(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, @@ -1194,7 +1206,7 @@ ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd, svm_node_closure_store_weight(sd, weight); } -ccl_device_noinline void svm_node_emission_weight(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -1232,7 +1244,7 @@ ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd, /* (Bump) normal */ -ccl_device void svm_node_set_normal(ccl_global const KernelGlobals *kg, +ccl_device void svm_node_set_normal(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint in_direction, diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index 0d53779a5c8..ec5745dc78a 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Conversion Nodes */ -ccl_device_noinline void svm_node_convert(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_convert(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index 7a3c8a6d36d..f2446c3b3ef 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN /* Bump Node */ -ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_set_bump(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -88,7 +88,7 @@ ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg, /* Displacement Node */ -ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg, +ccl_device void svm_node_set_displacement(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint fac_offset) @@ -97,7 +97,7 @@ ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg, sd->P += dP; } -ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_displacement(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -127,11 +127,8 @@ ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *k stack_store_float3(stack, node.z, dP); } -ccl_device_noinline int svm_node_vector_displacement(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_vector_displacement( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint4 data_node = read_node(kg, &offset); uint space = data_node.x; diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index a94464d3a52..b29bfdbed07 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Geometry Node */ -ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_geometry(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -54,7 +54,7 @@ ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg, stack_store_float3(stack, out_offset, data); } -ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -81,7 +81,7 @@ ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobal #endif } -ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -110,7 +110,7 @@ ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobal /* Object Info */ -ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_object_info(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -152,7 +152,7 @@ ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg /* Particle Info */ -ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_particle_info(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -214,7 +214,7 @@ ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals * /* Hair Info */ -ccl_device_noinline void svm_node_hair_info(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_hair_info(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index feb85eda122..978c4c2d781 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline void svm_node_hsv(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_hsv(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h index 7d41205c9ef..0215670d062 100644 --- a/intern/cycles/kernel/svm/svm_ies.h +++ b/intern/cycles/kernel/svm/svm_ies.h @@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN /* IES Light */ ccl_device_inline float interpolate_ies_vertical( - ccl_global const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) + KernelGlobals kg, int ofs, int v, int v_num, float v_frac, int h) { /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing @@ -39,10 +39,7 @@ ccl_device_inline float interpolate_ies_vertical( return cubic_interp(a, b, c, d, v_frac); } -ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg, - int slot, - float h_angle, - float v_angle) +ccl_device_inline float kernel_ies_interp(KernelGlobals kg, int slot, float h_angle, float v_angle) { /* Find offset of the IES data in the table. */ int ofs = __float_as_int(kernel_tex_fetch(__ies, slot)); @@ -98,7 +95,7 @@ ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg, return max(cubic_interp(a, b, c, d, h_frac), 0.0f); } -ccl_device_noinline void svm_node_ies(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_ies(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 2de80d5fc29..68374fcfb0d 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,8 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device float4 -svm_image_texture(ccl_global const KernelGlobals *kg, int id, float x, float y, uint flags) +ccl_device float4 svm_image_texture(KernelGlobals kg, int id, float x, float y, uint flags) { if (id == -1) { return make_float4( @@ -45,11 +44,8 @@ ccl_device_inline float3 texco_remap_square(float3 co) return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f; } -ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_image( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint co_offset, out_offset, alpha_offset, flags; @@ -121,7 +117,7 @@ ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg, return offset; } -ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -223,7 +219,7 @@ ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals * stack_store_float(stack, alpha_offset, f.w); } -ccl_device_noinline void svm_node_tex_environment(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_tex_environment(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index aaff8376c7c..955a1f23379 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -18,7 +18,9 @@ CCL_NAMESPACE_BEGIN /* Light Path Node */ -ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS, +template +ccl_device_noinline void svm_node_light_path(KernelGlobals kg, + ConstIntegratorState state, ccl_private const ShaderData *sd, ccl_private float *stack, uint type, @@ -62,9 +64,12 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS, /* Read bounce from difference location depending if this is a shadow * path. It's a bit dubious to have integrate state details leak into * this function but hard to avoid currently. */ - int bounce = (INTEGRATOR_STATE_IS_NULL) ? 0 : - (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(shadow_path, bounce) : - INTEGRATOR_STATE(path, bounce); + int bounce = 0; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + bounce = (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) : + INTEGRATOR_STATE(state, path, bounce); + } /* For background, light emission and shadow evaluation we from a * surface or volume we are effective one bounce further. */ @@ -77,11 +82,13 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS, } /* TODO */ case NODE_LP_ray_transparent: { - const int bounce = (INTEGRATOR_STATE_IS_NULL) ? - 0 : - (path_flag & PATH_RAY_SHADOW) ? - INTEGRATOR_STATE(shadow_path, transparent_bounce) : - INTEGRATOR_STATE(path, transparent_bounce); + int bounce = 0; + IF_KERNEL_NODES_FEATURE(LIGHT_PATH) + { + bounce = (path_flag & PATH_RAY_SHADOW) ? + INTEGRATOR_STATE(state, shadow_path, transparent_bounce) : + INTEGRATOR_STATE(state, path, transparent_bounce); + } info = (float)bounce; break; diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h index 4c4f3bcf523..d3a429fec56 100644 --- a/intern/cycles/kernel/svm/svm_magic.h +++ b/intern/cycles/kernel/svm/svm_magic.h @@ -87,11 +87,8 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion) return make_float3(0.5f - x, 0.5f - y, 0.5f - z); } -ccl_device_noinline int svm_node_tex_magic(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_magic( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint depth; uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset; diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h index f4f7d3ca76f..5e89947c6c7 100644 --- a/intern/cycles/kernel/svm/svm_map_range.h +++ b/intern/cycles/kernel/svm/svm_map_range.h @@ -24,7 +24,7 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x) return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f); } -ccl_device_noinline int svm_node_map_range(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_map_range(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint value_stack_offset, diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h index 8102afc637e..ed420e5bc3d 100644 --- a/intern/cycles/kernel/svm/svm_mapping.h +++ b/intern/cycles/kernel/svm/svm_mapping.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Mapping Node */ -ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_mapping(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -43,7 +43,7 @@ ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg, /* Texture Mapping */ -ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_texture_mapping(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint vec_offset, @@ -62,7 +62,7 @@ ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals return offset; } -ccl_device_noinline int svm_node_min_max(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_min_max(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint vec_offset, diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 3897a453873..97f7d486c09 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_math(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, @@ -34,7 +34,7 @@ ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg, stack_store_float(stack, result_stack_offset, result); } -ccl_device_noinline int svm_node_vector_math(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_vector_math(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint type, diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index 0064c5e643c..568dda3dddc 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Node */ -ccl_device_noinline int svm_node_mix(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_mix(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint fac_offset, diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 8523f45b95f..decd29bbe13 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -700,7 +700,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d( return value; } -ccl_device_noinline int svm_node_tex_musgrave(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_musgrave(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint offsets1, diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 61da8227efa..3fe33f72b59 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -140,7 +140,7 @@ ccl_device void noise_texture_4d(float4 co, } } -ccl_device_noinline int svm_node_tex_noise(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_noise(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint dimensions, diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h index 0d1b4200d54..9bf64ed8823 100644 --- a/intern/cycles/kernel/svm/svm_normal.h +++ b/intern/cycles/kernel/svm/svm_normal.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline int svm_node_normal(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_normal(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint in_normal_offset, diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index ef8b0d103c1..d2dddf4c6eb 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -21,18 +21,14 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device_inline float fetch_float(ccl_global const KernelGlobals *kg, int offset) +ccl_device_inline float fetch_float(KernelGlobals kg, int offset) { uint4 node = kernel_tex_fetch(__svm_nodes, offset); return __uint_as_float(node.x); } -ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float float_ramp_lookup( + KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size) { if ((f < 0.0f || f > 1.0f) && extrapolate) { float t0, dy; @@ -63,12 +59,8 @@ ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg, return a; } -ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float4 rgb_ramp_lookup( + KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size) { if ((f < 0.0f || f > 1.0f) && extrapolate) { float4 t0, dy; @@ -99,11 +91,8 @@ ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg, return a; } -ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_rgb_ramp( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint fac_offset, color_offset, alpha_offset; uint interpolate = node.z; @@ -124,11 +113,8 @@ ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg, return offset; } -ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_curves( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint fac_offset, color_offset, out_offset; svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset); @@ -153,11 +139,8 @@ ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg, return offset; } -ccl_device_noinline int svm_node_curve(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_curve( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint fac_offset, value_in_offset, out_offset; svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset); diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index 3cd4ba87a55..bafa0456342 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_combine_hsv(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint hue_in, @@ -39,7 +39,7 @@ ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg, return offset; } -ccl_device_noinline int svm_node_separate_hsv(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_separate_hsv(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint color_in, diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index 04db8109170..3ab7bc89c66 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -37,7 +37,7 @@ ccl_device float sky_perez_function(ccl_private float *lam, float theta, float g (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma); } -ccl_device float3 sky_radiance_preetham(ccl_global const KernelGlobals *kg, +ccl_device float3 sky_radiance_preetham(KernelGlobals kg, float3 dir, float sunphi, float suntheta, @@ -90,7 +90,7 @@ ccl_device float sky_radiance_internal(ccl_private float *configuration, float t configuration[6] * mieM + configuration[7] * zenith); } -ccl_device float3 sky_radiance_hosek(ccl_global const KernelGlobals *kg, +ccl_device float3 sky_radiance_hosek(KernelGlobals kg, float3 dir, float sunphi, float suntheta, @@ -127,7 +127,7 @@ ccl_device float3 geographical_to_direction(float lat, float lon) return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat)); } -ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg, +ccl_device float3 sky_radiance_nishita(KernelGlobals kg, float3 dir, ccl_private float *nishita_data, uint texture_id) @@ -209,11 +209,8 @@ ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg, return xyz_to_rgb(kg, xyz); } -ccl_device_noinline int svm_node_tex_sky(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_sky( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { /* Load data */ uint dir_offset = node.y; diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 295d5e9f65b..657a4bb32a8 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN /* Texture Coordinate Node */ -ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_coord(KernelGlobals kg, ccl_private ShaderData *sd, int path_flag, ccl_private float *stack, @@ -103,7 +103,7 @@ ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg, return offset; } -ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, int path_flag, ccl_private float *stack, @@ -188,7 +188,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobal #endif } -ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, ccl_private ShaderData *sd, int path_flag, ccl_private float *stack, @@ -273,7 +273,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobal #endif } -ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_normal_map(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) @@ -366,7 +366,7 @@ ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg, stack_store_float3(stack, normal_offset, N); } -ccl_device_noinline void svm_node_tangent(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_tangent(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h index d1038bc072d..cc72961d0f6 100644 --- a/intern/cycles/kernel/svm/svm_value.h +++ b/intern/cycles/kernel/svm/svm_value.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Value Nodes */ -ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg, +ccl_device void svm_node_value_f(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint ivalue, @@ -27,7 +27,7 @@ ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg, stack_store_float(stack, out_offset, __uint_as_float(ivalue)); } -ccl_device int svm_node_value_v(ccl_global const KernelGlobals *kg, +ccl_device int svm_node_value_v(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint out_offset, diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index b6c898c3952..4e0d36647da 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Vector Transform */ -ccl_device_noinline void svm_node_vector_transform(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_vector_transform(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h index 3641f05ca43..a5fa15ee085 100644 --- a/intern/cycles/kernel/svm/svm_vertex_color.h +++ b/intern/cycles/kernel/svm/svm_vertex_color.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint layer_id, @@ -35,7 +35,7 @@ ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *k } } -ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint layer_id, @@ -56,7 +56,7 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGl } } -ccl_device_noinline void svm_node_vertex_color_bump_dy(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint layer_id, diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 062a8bde415..b8067520770 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -917,7 +917,7 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, } template -ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg, +ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint dimensions, @@ -1013,7 +1013,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg, &position_out_2d); break; case NODE_VORONOI_SMOOTH_F1: - if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA) + { voronoi_smooth_f1_2d(coord_2d, smoothness, exponent, @@ -1058,7 +1059,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg, &position_out); break; case NODE_VORONOI_SMOOTH_F1: - if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA) + { voronoi_smooth_f1_3d(coord, smoothness, exponent, @@ -1092,7 +1094,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg, } case 4: { - if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) { + IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA) + { float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w); float4 position_out_4d; switch (voronoi_feature) { diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 764fb71ba72..be4bb315145 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -19,11 +19,8 @@ CCL_NAMESPACE_BEGIN /* TODO(sergey): Think of making it more generic volume-type attribute * sampler. */ -ccl_device_noinline int svm_node_tex_voxel(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_voxel( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint co_offset, density_out_offset, color_out_offset, space; svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index 1ac130e2006..d04b7aa3476 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -82,11 +82,8 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type, } } -ccl_device_noinline int svm_node_tex_wave(ccl_global const KernelGlobals *kg, - ccl_private ShaderData *sd, - ccl_private float *stack, - uint4 node, - int offset) +ccl_device_noinline int svm_node_tex_wave( + KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset) { uint4 node2 = read_node(kg, &offset); uint4 node3 = read_node(kg, &offset); diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index e891744f276..4ef041f68d5 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN /* Wavelength to RGB */ -ccl_device_noinline void svm_node_wavelength(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_wavelength(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint wavelength, diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h index ccc49bf1a7c..6c2c3d6a683 100644 --- a/intern/cycles/kernel/svm/svm_white_noise.h +++ b/intern/cycles/kernel/svm/svm_white_noise.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -ccl_device_noinline void svm_node_tex_white_noise(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_tex_white_noise(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint dimensions, diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 70d1211aa4a..d75976d23e1 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg, +ccl_device_inline float wireframe(KernelGlobals kg, ccl_private ShaderData *sd, float size, int pixel_size, @@ -91,7 +91,7 @@ ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg, return 0.0f; } -ccl_device_noinline void svm_node_wireframe(ccl_global const KernelGlobals *kg, +ccl_device_noinline void svm_node_wireframe(KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node) -- cgit v1.2.3