diff options
Diffstat (limited to 'intern/cycles/kernel')
70 files changed, 4009 insertions, 3493 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index ccd694dfdfd..8ecdac6ee27 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -42,6 +42,7 @@ set(SRC_KERNEL_DEVICE_ONEAPI ) set(SRC_KERNEL_DEVICE_CPU_HEADERS + device/cpu/bvh.h device/cpu/compat.h device/cpu/image.h device/cpu/globals.h @@ -71,14 +72,17 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS ) set(SRC_KERNEL_DEVICE_OPTIX_HEADERS + device/optix/bvh.h device/optix/compat.h device/optix/globals.h ) set(SRC_KERNEL_DEVICE_METAL_HEADERS + device/metal/bvh.h device/metal/compat.h device/metal/context_begin.h device/metal/context_end.h + device/metal/function_constants.h device/metal/globals.h ) @@ -86,7 +90,6 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS device/oneapi/compat.h device/oneapi/context_begin.h device/oneapi/context_end.h - device/oneapi/device_id.h device/oneapi/globals.h device/oneapi/image.h device/oneapi/kernel.h @@ -155,6 +158,7 @@ set(SRC_KERNEL_SVM_HEADERS svm/math_util.h svm/mix.h svm/musgrave.h + svm/node_types_template.h svm/noise.h svm/noisetex.h svm/normal.h @@ -213,8 +217,6 @@ set(SRC_KERNEL_BVH_HEADERS bvh/util.h bvh/volume.h bvh/volume_all.h - bvh/embree.h - bvh/metal.h ) set(SRC_KERNEL_CAMERA_HEADERS @@ -283,6 +285,7 @@ set(SRC_KERNEL_UTIL_HEADERS set(SRC_KERNEL_TYPES_HEADERS data_arrays.h + data_template.h tables.h types.h ) @@ -314,6 +317,7 @@ set(SRC_UTIL_HEADERS ../util/math_float2.h ../util/math_float3.h ../util/math_float4.h + ../util/math_float8.h ../util/math_int2.h ../util/math_int3.h ../util/math_int4.h @@ -732,8 +736,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI) -O2 -o ${cycles_kernel_oneapi_lib} -I${CMAKE_CURRENT_SOURCE_DIR}/.. - -I${LEVEL_ZERO_INCLUDE_DIR} - ${LEVEL_ZERO_LIBRARY} ${SYCL_CPP_FLAGS} ) @@ -847,10 +849,9 @@ if(WITH_CYCLES_DEVICE_ONEAPI) else() list(APPEND sycl_compiler_flags -fPIC) - # avoid getting __FAST_MATH__ to be defined for the graphics compiler on CentOS 7 until the compile-time issue it triggers gets fixed. - if(WITH_CYCLES_ONEAPI_BINARIES) - list(APPEND sycl_compiler_flags -fhonor-nans) - endif() + # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash + # it triggers at either AoT or JIT stages gets fixed. + list(APPEND sycl_compiler_flags -fhonor-nans) # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and # libpi_level_zero.so can be placed next to it and get found. diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index a1d0e307170..bcefe5d970c 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -1,40 +1,46 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ -/* BVH - * - * Bounding volume hierarchy for ray tracing. We compile different variations - * of the same BVH traversal function for faster rendering when some types of - * primitives are not needed, using #includes to work around the lack of - * C++ templates in OpenCL. - * - * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", - * the code has been extended and modified to support more primitives and work - * with CPU/CUDA/OpenCL. */ - #pragma once -#ifdef __EMBREE__ -# include "kernel/bvh/embree.h" -#endif - -#ifdef __METALRT__ -# include "kernel/bvh/metal.h" -#endif - #include "kernel/bvh/types.h" #include "kernel/bvh/util.h" #include "kernel/integrator/state_util.h" +/* Device specific accleration structures for ray tracing. */ + +#if defined(__EMBREE__) +# include "kernel/device/cpu/bvh.h" +#elif defined(__METALRT__) +# include "kernel/device/metal/bvh.h" +#elif defined(__KERNEL_OPTIX__) +# include "kernel/device/optix/bvh.h" +#else +# define __BVH2__ +#endif + CCL_NAMESPACE_BEGIN -#if !defined(__KERNEL_GPU_RAYTRACING__) +#ifdef __BVH2__ -/* Regular BVH traversal */ +/* BVH2 + * + * Bounding volume hierarchy for ray tracing, when no native acceleration + * structure is available for the device. + + * We compile different variations of the same BVH traversal function for + * faster rendering when some types of primitives are not needed, using #includes + * to work around the lack of C++ templates in OpenCL. + * + * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", + * the code has been extended and modified to support more primitives and work + * with CPU and various GPU kernel languages. */ # include "kernel/bvh/nodes.h" +/* Regular BVH traversal */ + # define BVH_FUNCTION_NAME bvh_intersect # define BVH_FUNCTION_FEATURES BVH_POINTCLOUD # include "kernel/bvh/traversal.h" @@ -57,261 +63,15 @@ CCL_NAMESPACE_BEGIN # include "kernel/bvh/traversal.h" # endif -/* Subsurface scattering BVH traversal */ - -# if defined(__BVH_LOCAL__) -# define BVH_FUNCTION_NAME bvh_intersect_local -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/local.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_local_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/local.h" -# endif -# endif /* __BVH_LOCAL__ */ - -/* Volume BVH traversal */ - -# if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume.h" -# endif -# endif /* __VOLUME__ */ - -/* Record all intersections - Shadow BVH traversal */ - -# if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" - -# if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# endif /* __SHADOW_RECORD_ALL__ */ - -/* Record all intersections - Volume BVH traversal. */ - -# if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume_all.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume_all.h" -# endif -# endif /* __VOLUME_RECORD_ALL__ */ - -# undef BVH_FEATURE -# undef BVH_NAME_JOIN -# undef BVH_NAME_EVAL -# undef BVH_FUNCTION_FULL_NAME - -#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */ - -ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray) -{ - /* NOTE: Due to some vectorization code non-finite origin point might - * cause lots of false-positive intersections which will overflow traversal - * stack. - * This code is a quick way to perform early output, to avoid crashes in - * such cases. - * From production scenes so far it seems it's enough to test first element - * only. - * Scene intersection may also called with empty rays for conditional trace - * calls that evaluate to false, so filter those out. - */ - return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; -} - ccl_device_intersect bool scene_intersect(KernelGlobals kg, ccl_private const Ray *ray, const uint visibility, ccl_private Intersection *isect) { -#ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - ray_flags, - 0, /* SBT offset for PG_HITD */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -#elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; + if (!intersection_ray_valid(ray)) { return false; } -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid ift_default"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.u = 0.0f; - payload.v = 0.0f; - payload.visibility = visibility; - - typename metalrt_intersector_type::result_type intersection; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - /* No further intersector setup required: Default MetalRT behavior is any-hit. */ - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - /* No further intersector setup required: Shadow ray early termination is controlled by the - * intersection handler */ - } - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); -# endif - - if (intersection.type == intersection_type::none) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - - return false; - } - - isect->t = intersection.distance; - - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -#else - - if (!scene_intersect_valid(ray)) { - return false; - } - -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - isect->t = ray->t; - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); - IntersectContext rtc_ctx(&ctx); - RTCRayHit ray_hit; - ctx.ray = ray; - kernel_embree_setup_rayhit(*ray, ray_hit, visibility); - rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); - if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && - ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { - kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); - return true; - } - return false; - } -# endif /* __EMBREE__ */ - # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ @@ -322,7 +82,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, return bvh_intersect_motion(kg, ray, isect, visibility); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -331,10 +91,22 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, # endif /* __HAIR__ */ return bvh_intersect(kg, ray, isect, visibility); -#endif /* __KERNEL_OPTIX__ */ } -#ifdef __BVH_LOCAL__ +/* Single object BVH traversal, for SSS/AO/bevel. */ + +# ifdef __BVH_LOCAL__ + +# define BVH_FUNCTION_NAME bvh_intersect_local +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/local.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_local_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/local.h" +# endif + ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, @@ -342,180 +114,48 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private uint *lcg_state, int max_hits) { -# ifdef __KERNEL_OPTIX__ - uint p0 = pointer_pack_to_uint_0(lcg_state); - uint p1 = pointer_pack_to_uint_1(lcg_state); - uint p2 = pointer_pack_to_uint_0(local_isect); - uint p3 = pointer_pack_to_uint_1(local_isect); - uint p4 = local_object; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ - uint p5 = max_hits; - - if (local_isect) { - local_isect->num_hits = 0; /* Initialize hit count to zero. */ - } - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - 0xFF, - /* Need to always call into __anyhit__kernel_optix_local_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 2, /* SBT offset for PG_HITL */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - return p5; -# elif defined(__METALRT__) - if (!scene_intersect_valid(ray)) { - if (local_isect) { - local_isect->num_hits = 0; - } - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + if (!intersection_ray_valid(ray)) { if (local_isect) { local_isect->num_hits = 0; } - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); return false; } - if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { - if (local_isect) { - local_isect->num_hits = 0; - } - kernel_assert(!"Invalid ift_local"); - return false; +# ifdef __OBJECT_MOTION__ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; +# endif /* __OBJECT_MOTION__ */ + return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); +} +# endif - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } +/* Transparent shadow BVH traversal, recording multiple intersections. */ - MetalRTIntersectionLocalPayload payload; - payload.self = ray->self; - payload.local_object = local_object; - payload.max_hits = max_hits; - payload.local_isect.num_hits = 0; - if (lcg_state) { - payload.has_lcg_state = true; - payload.lcg_state = *lcg_state; - } - payload.result = false; +# ifdef __SHADOW_RECORD_ALL__ - typename metalrt_intersector_type::result_type intersection; +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all +# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" -# if defined(__METALRT_MOTION__) - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); +# if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" # endif - if (lcg_state) { - *lcg_state = payload.lcg_state; - } - *local_isect = payload.local_isect; - - return payload.result; - -# else - - if (!scene_intersect_valid(ray)) { - if (local_isect) { - local_isect->num_hits = 0; - } - return false; - } - -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) & - SD_OBJECT_TRANSFORM_APPLIED); - CCLIntersectContext ctx( - kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); - ctx.lcg_state = lcg_state; - ctx.max_hits = max_hits; - ctx.ray = ray; - ctx.local_isect = local_isect; - if (local_isect) { - local_isect->num_hits = 0; - } - ctx.local_object_id = local_object; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); - - /* If this object has its own BVH, use it. */ - if (has_bvh) { - RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); - if (geom) { - float3 P = ray->P; - float3 dir = ray->D; - float3 idir = ray->D; - Transform ob_itfm; - rtc_ray.tfar = ray->t * - bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); - /* bvh_instance_motion_push() returns the inverse transform but - * it's not needed here. */ - (void)ob_itfm; - - rtc_ray.org_x = P.x; - rtc_ray.org_y = P.y; - rtc_ray.org_z = P.z; - rtc_ray.dir_x = dir.x; - rtc_ray.dir_y = dir.y; - rtc_ray.dir_z = dir.z; - RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); - kernel_assert(scene); - if (scene) { - rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); - } - } - } - else { - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - } - - /* rtcOccluded1 sets tfar to -inf if a hit was found. */ - return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); - ; - } -# endif /* __EMBREE__ */ +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" +# endif -# ifdef __OBJECT_MOTION__ - if (kernel_data.bvh.have_motion) { - return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); - } -# endif /* __OBJECT_MOTION__ */ - return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); -# endif /* __KERNEL_OPTIX__ */ -} -#endif +# if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" +# endif -#ifdef __SHADOW_RECORD_ALL__ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, IntegratorShadowState state, ccl_private const Ray *ray, @@ -524,132 +164,12 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, ccl_private uint *num_recorded_hits, ccl_private float *throughput) { -# ifdef __KERNEL_OPTIX__ - uint p0 = state; - uint p1 = __float_as_uint(1.0f); /* Throughput. */ - uint p2 = 0; /* Number of hits. */ - uint p3 = max_hits; - uint p4 = visibility; - uint p5 = false; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 1, /* SBT offset for PG_HITS */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - *num_recorded_hits = uint16_unpack_from_uint_0(p2); - *throughput = __uint_as_float(p1); - - return p5; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { - kernel_assert(!"Invalid ift_shadow"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionShadowPayload payload; - payload.self = ray->self; - payload.visibility = visibility; - payload.max_hits = max_hits; - payload.num_hits = 0; - payload.num_recorded_hits = 0; - payload.throughput = 1.0f; - payload.result = false; - payload.state = state; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - typename metalrt_intersector_type::result_type intersection; - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_shadow, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); -# endif - - *num_recorded_hits = payload.num_recorded_hits; - *throughput = payload.throughput; - - return payload.result; - -# else - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { *num_recorded_hits = 0; *throughput = 1.0f; return false; } -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); - Intersection *isect_array = (Intersection *)state->shadow_isect; - ctx.isect_s = isect_array; - ctx.max_hits = max_hits; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - - *num_recorded_hits = ctx.num_recorded_hits; - *throughput = ctx.throughput; - return ctx.opaque_hit; - } -# endif /* __EMBREE__ */ - # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ @@ -662,7 +182,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all_motion( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -673,180 +193,83 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); -# endif /* __KERNEL_OPTIX__ */ } -#endif /* __SHADOW_RECORD_ALL__ */ +# endif /* __SHADOW_RECORD_ALL__ */ + +/* Volume BVH traversal, for initializing or updating the volume stack. */ + +# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) + +# define BVH_FUNCTION_NAME bvh_intersect_volume +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume.h" +# endif -#ifdef __VOLUME__ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) { -# ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_volume_test. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 3, /* SBT offset for PG_HITV */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + if (!intersection_ray_valid(ray)) { return false; } - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - kernel_assert(!"Invalid ift_default"); - return false; +# ifdef __OBJECT_MOTION__ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_volume_motion(kg, ray, isect, visibility); } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; +# endif /* __OBJECT_MOTION__ */ - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } + return bvh_intersect_volume(kg, ray, isect, visibility); +} +# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */ - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.visibility = visibility; +/* Volume BVH traversal, for initializing or updating the volume stack. + * Variation that records multiple intersections at once. */ - typename metalrt_intersector_type::result_type intersection; +# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } +# define BVH_FUNCTION_NAME bvh_intersect_volume_all +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume_all.h" -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume_all.h" # endif - if (intersection.type == intersection_type::none) { - return false; - } - - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -# else - if (!scene_intersect_valid(ray)) { +ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint max_hits, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { return false; } # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); } # endif /* __OBJECT_MOTION__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __KERNEL_OPTIX__ */ + return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } -#endif /* __VOLUME__ */ -#ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg, - ccl_private const Ray *ray, - ccl_private Intersection *isect, - const uint max_hits, - const uint visibility) -{ - if (!scene_intersect_valid(ray)) { - return false; - } +# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */ -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); - ctx.isect_s = isect; - ctx.max_hits = max_hits; - ctx.num_hits = 0; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - return ctx.num_hits; - } -# endif /* __EMBREE__ */ - -# ifdef __OBJECT_MOTION__ - if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ +# undef BVH_FEATURE +# undef BVH_NAME_JOIN +# undef BVH_NAME_EVAL +# undef BVH_FUNCTION_FULL_NAME - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); -} -#endif /* __VOLUME_RECORD_ALL__ */ +#endif /* __BVH2__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h deleted file mode 100644 index 1c6b9bc1e62..00000000000 --- a/intern/cycles/kernel/bvh/embree.h +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2022 Blender Foundation. */ - -#pragma once - -#include <embree3/rtcore_ray.h> -#include <embree3/rtcore_scene.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/device/cpu/globals.h" - -#include "kernel/bvh/util.h" - -#include "util/vector.h" - -CCL_NAMESPACE_BEGIN - -struct CCLIntersectContext { - typedef enum { - RAY_REGULAR = 0, - RAY_SHADOW_ALL = 1, - RAY_LOCAL = 2, - RAY_SSS = 3, - RAY_VOLUME_ALL = 4, - } RayType; - - KernelGlobals kg; - RayType type; - - /* For avoiding self intersections */ - const Ray *ray; - - /* for shadow rays */ - Intersection *isect_s; - uint max_hits; - uint num_hits; - uint num_recorded_hits; - float throughput; - float max_t; - bool opaque_hit; - - /* for SSS Rays: */ - LocalIntersection *local_isect; - int local_object_id; - uint *lcg_state; - - CCLIntersectContext(KernelGlobals kg_, RayType type_) - { - kg = kg_; - type = type_; - ray = NULL; - max_hits = 1; - num_hits = 0; - num_recorded_hits = 0; - throughput = 1.0f; - max_t = FLT_MAX; - opaque_hit = false; - isect_s = NULL; - local_isect = NULL; - local_object_id = -1; - lcg_state = NULL; - } -}; - -class IntersectContext { - public: - IntersectContext(CCLIntersectContext *ctx) - { - rtcInitIntersectContext(&context); - userRayExt = ctx; - } - RTCIntersectContext context; - CCLIntersectContext *userRayExt; -}; - -ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, - RTCRay &rtc_ray, - const uint visibility) -{ - rtc_ray.org_x = ray.P.x; - rtc_ray.org_y = ray.P.y; - rtc_ray.org_z = ray.P.z; - rtc_ray.dir_x = ray.D.x; - rtc_ray.dir_y = ray.D.y; - rtc_ray.dir_z = ray.D.z; - rtc_ray.tnear = 0.0f; - rtc_ray.tfar = ray.t; - rtc_ray.time = ray.time; - rtc_ray.mask = visibility; -} - -ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, - RTCRayHit &rayhit, - const uint visibility) -{ - kernel_embree_setup_ray(ray, rayhit.ray, visibility); - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; -} - -ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, - const RTCHit *hit, - const Ray *ray) -{ - bool status = false; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - const int oID = hit->instID[0] / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - const int pID = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - else { - const int oID = hit->geomID / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - - return status; -} - -ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, - const RTCRay *ray, - const RTCHit *hit, - Intersection *isect) -{ - isect->t = ray->tfar; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = hit->instID[0] / 2; - } - else { - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = hit->geomID / 2; - } - - const bool is_hair = hit->geomID & 1; - if (is_hair) { - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim); - isect->type = segment.type; - isect->prim = segment.prim; - isect->u = hit->u; - isect->v = hit->v; - } - else { - isect->type = kernel_data_fetch(objects, isect->object).primitive_type; - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - } -} - -ccl_device_inline void kernel_embree_convert_sss_hit( - KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) -{ - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - isect->t = ray->tfar; - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, object * 2)); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = object; - isect->type = kernel_data_fetch(objects, object).primitive_type; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h index 3b6b30ea93d..add61adc126 100644 --- a/intern/cycles/kernel/bvh/local.h +++ b/intern/cycles/kernel/bvh/local.h @@ -47,8 +47,9 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = ray->t; + float isect_t = ray->tmax; if (local_isect != NULL) { local_isect->num_hits = 0; @@ -58,10 +59,9 @@ ccl_device_inline const int object_flag = kernel_data_fetch(object_flag, local_object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); + bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); #endif object = local_object; } @@ -81,6 +81,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, PATH_RAY_ALL_VISIBILITY, @@ -155,6 +156,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { @@ -191,6 +193,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h deleted file mode 100644 index 04289e259a7..00000000000 --- a/intern/cycles/kernel/bvh/metal.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2021-2022 Blender Foundation */ - -struct MetalRTIntersectionPayload { - RaySelfPrimitives self; - uint visibility; - float u, v; - int prim; - int type; -#if defined(__METALRT_MOTION__) - float time; -#endif -}; - -struct MetalRTIntersectionLocalPayload { - RaySelfPrimitives self; - uint local_object; - uint lcg_state; - short max_hits; - bool has_lcg_state; - bool result; - LocalIntersection local_isect; -}; - -struct MetalRTIntersectionShadowPayload { - RaySelfPrimitives self; - uint visibility; -#if defined(__METALRT_MOTION__) - float time; -#endif - int state; - float throughput; - short max_hits; - short num_hits; - short num_recorded_hits; - bool result; -}; diff --git a/intern/cycles/kernel/bvh/nodes.h b/intern/cycles/kernel/bvh/nodes.h index c19dea9223b..e02841fad16 100644 --- a/intern/cycles/kernel/bvh/nodes.h +++ b/intern/cycles/kernel/bvh/nodes.h @@ -18,7 +18,8 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, const float3 P, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) @@ -39,8 +40,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c0hiy = (node1.z - P.y) * idir.y; float c0loz = (node2.x - P.z) * idir.z; float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); + float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); + float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); float c1lox = (node0.y - P.x) * idir.x; float c1hix = (node0.w - P.x) * idir.x; @@ -48,8 +49,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c1hiy = (node1.w - P.y) * idir.y; float c1loz = (node2.y - P.z) * idir.z; float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); + float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); + float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); dist[0] = c0min; dist[1] = c1min; @@ -66,7 +67,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float3 P, const float3 dir, - const float t, + const float tmin, + const float tmax, int node_addr, int child, float dist[2]) @@ -83,8 +85,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float far_x = max(lower_xyz.x, upper_xyz.x); const float far_y = max(lower_xyz.y, upper_xyz.y); const float far_z = max(lower_xyz.z, upper_xyz.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); + const float tnear = max4(tmin, near_x, near_y, near_z); + const float tfar = min4(tmax, far_x, far_y, far_z); *dist = tnear; return tnear <= tfar; } @@ -93,7 +95,8 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) @@ -102,7 +105,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, #ifdef __VISIBILITY_FLAG__ float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); #endif - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.x) & visibility)) #endif @@ -110,7 +113,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, mask |= 1; } } - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.y) & visibility)) #endif @@ -125,16 +128,17 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) { float4 node = kernel_data_fetch(bvh_nodes, node_addr); if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist); + return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist); } else { - return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist); + return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist); } } diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h index e86fe867eac..f37af2a1e65 100644 --- a/intern/cycles/kernel/bvh/shadow_all.h +++ b/intern/cycles/kernel/bvh/shadow_all.h @@ -49,26 +49,15 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; uint num_hits = 0; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - /* Max distance in world space. May be dynamically reduced when max number of * recorded hits is exceeded and we no longer need to find hits beyond the max * distance found. */ - float t_max_world = ray->t; - - /* Current maximum distance to the intersection. - * Is calculated as a ray length, transformed to an object space when entering - * instance node. */ - float t_max_current = ray->t; - - /* Conversion from world to local space for the current instance if any, 1.0 - * otherwise. */ - float t_world_to_instance = 1.0f; + const float tmax = ray->tmax; + float tmax_hits = tmax; *r_num_recorded_hits = 0; *r_throughput = 1.0f; @@ -88,7 +77,8 @@ ccl_device_inline dir, #endif idir, - t_max_current, + tmin, + tmax, node_addr, visibility, dist); @@ -157,7 +147,7 @@ ccl_device_inline switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { hit = triangle_intersect( - kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr); + kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr); break; } #if BVH_FEATURE(BVH_MOTION) @@ -166,7 +156,8 @@ ccl_device_inline &isect, P, dir, - t_max_current, + tmin, + tmax, ray->time, visibility, prim_object, @@ -190,7 +181,7 @@ ccl_device_inline const int curve_type = kernel_data_fetch(prim_type, prim_addr); hit = curve_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type); break; } @@ -208,7 +199,7 @@ ccl_device_inline const int point_type = kernel_data_fetch(prim_type, prim_addr); hit = point_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type); break; } #endif /* BVH_FEATURE(BVH_POINTCLOUD) */ @@ -220,9 +211,6 @@ ccl_device_inline /* shadow ray early termination */ if (hit) { - /* Convert intersection distance to world space. */ - isect.t /= t_world_to_instance; - /* detect if this surface has a shader with transparent shadows */ /* todo: optimize so primitive visibility flag indicates if * the primitive has a transparent shadow shader? */ @@ -254,7 +242,7 @@ ccl_device_inline if (record_intersection) { /* Test if we need to record this transparent intersection. */ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); - if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) { + if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) { /* If maximum number of hits was reached, replace the intersection with the * highest distance. We want to find the N closest intersections. */ const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits); @@ -276,7 +264,7 @@ ccl_device_inline } /* Limit the ray distance and stop counting hits beyond this. */ - t_max_world = max(isect.t, max_t); + tmax_hits = max(isect.t, max_t); } integrator_state_write_shadow_isect(state, &isect, isect_index); @@ -294,15 +282,11 @@ ccl_device_inline object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - t_world_to_instance = bvh_instance_motion_push( - kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif - /* Convert intersection to object space. */ - t_max_current *= t_world_to_instance; - ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; @@ -317,16 +301,12 @@ ccl_device_inline /* Instance pop. */ #if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir); #else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir); #endif - /* Restore world space ray length. */ - t_max_current = ray->t; - object = OBJECT_NONE; - t_world_to_instance = 1.0f; node_addr = traversal_stack[stack_ptr]; --stack_ptr; } diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h index 784fbf4fd11..9069d16912b 100644 --- a/intern/cycles/kernel/bvh/traversal.h +++ b/intern/cycles/kernel/bvh/traversal.h @@ -43,13 +43,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -71,6 +68,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -133,8 +131,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - if (triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) { + if (triangle_intersect(kg, + isect, + P, + dir, + tmin, + isect->t, + visibility, + prim_object, + prim, + prim_addr)) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; @@ -147,6 +153,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -174,7 +181,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, const int curve_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = curve_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -195,7 +202,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, const int point_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = point_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -212,9 +219,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; @@ -231,9 +238,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, /* instance pop */ #if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir); #else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir); #endif object = OBJECT_NONE; diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index 572e023db25..b67c9394bea 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -5,7 +5,35 @@ CCL_NAMESPACE_BEGIN -#if defined(__KERNEL_CPU__) +ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray) +{ + /* NOTE: Due to some vectorization code non-finite origin point might + * cause lots of false-positive intersections which will overflow traversal + * stack. + * This code is a quick way to perform early output, to avoid crashes in + * such cases. + * From production scenes so far it seems it's enough to test first element + * only. + * Scene intersection may also called with empty rays for conditional trace + * calls that evaluate to false, so filter those out. + */ + return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; +} + +/* Offset intersection distance by the smallest possible amount, to skip + * intersections at this distance. This works in cases where the ray start + * position is unchanged and only tmin is updated, since for self + * intersection we'll be comparing against the exact same distances. */ +ccl_device_forceinline float intersection_t_offset(const float t) +{ + /* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with + * non-negative and finite t. */ + kernel_assert(t >= 0.0f && isfinite_safe(t)); + const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1; + return __uint_as_float(bits); +} + +#ifndef __KERNEL_GPU__ ccl_device int intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h index 9715712a8f2..cc3915b4bf7 100644 --- a/intern/cycles/kernel/bvh/volume.h +++ b/intern/cycles/kernel/bvh/volume.h @@ -46,13 +46,10 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -73,6 +70,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -140,7 +138,7 @@ ccl_device_inline continue; } triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr); + kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr); } break; } @@ -165,6 +163,7 @@ ccl_device_inline isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -186,9 +185,9 @@ ccl_device_inline int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; @@ -212,9 +211,9 @@ ccl_device_inline /* instance pop */ #if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir); #else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir); #endif object = OBJECT_NONE; diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h index d06ea8fe557..5cdea3e354c 100644 --- a/intern/cycles/kernel/bvh/volume_all.h +++ b/intern/cycles/kernel/bvh/volume_all.h @@ -44,21 +44,17 @@ ccl_device_inline int node_addr = kernel_data.bvh.root; /* ray parameters in registers */ - const float tmax = ray->t; float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif + float isect_t = ray->tmax; int num_hits_in_instance = 0; uint num_hits = 0; - isect_array->t = tmax; + isect_array->t = ray->tmax; /* traversal loop */ do { @@ -75,6 +71,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, visibility, @@ -141,8 +138,16 @@ ccl_device_inline if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } - hit = triangle_intersect( - kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr); + hit = triangle_intersect(kg, + isect_array, + P, + dir, + tmin, + isect_t, + visibility, + prim_object, + prim, + prim_addr); if (hit) { /* Move on to next entry in intersections array. */ isect_array++; @@ -150,18 +155,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -#if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -#else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -#endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -189,6 +182,7 @@ ccl_device_inline isect_array, P, dir, + tmin, isect_t, ray->time, visibility, @@ -202,18 +196,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -232,9 +214,9 @@ ccl_device_inline int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif num_hits_in_instance = 0; @@ -260,28 +242,11 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* Instance pop. */ - if (num_hits_in_instance) { - float t_fac; #if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir); #else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir); #endif - /* Scale isect->t to adjust for instancing. */ - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } - else { -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); -#endif - } - - isect_t = tmax; - isect_array->t = isect_t; object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/camera/camera.h b/intern/cycles/kernel/camera/camera.h index 25960a94ddb..926ccf7b86f 100644 --- a/intern/cycles/kernel/camera/camera.h +++ b/intern/cycles/kernel/camera/camera.h @@ -165,9 +165,11 @@ ccl_device void camera_sample_perspective(KernelGlobals kg, float nearclip = kernel_data.cam.nearclip * z_inv; ray->P += nearclip * ray->D; ray->dP += nearclip * ray->dD; - ray->t = kernel_data.cam.cliplength * z_inv; + ray->tmin = 0.0f; + ray->tmax = kernel_data.cam.cliplength * z_inv; #else - ray->t = FLT_MAX; + ray->tmin = 0.0f; + ray->tmax = FLT_MAX; #endif } @@ -231,9 +233,11 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg, #ifdef __CAMERA_CLIPPING__ /* clipping */ - ray->t = kernel_data.cam.cliplength; + ray->tmin = 0.0f; + ray->tmax = kernel_data.cam.cliplength; #else - ray->t = FLT_MAX; + ray->tmin = 0.0f; + ray->tmax = FLT_MAX; #endif } @@ -258,7 +262,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, /* indicates ray should not receive any light, outside of the lens */ if (is_zero(D)) { - ray->t = 0.0f; + ray->tmax = 0.0f; return; } @@ -349,9 +353,11 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, float nearclip = cam->nearclip; ray->P += nearclip * ray->D; ray->dP += nearclip * ray->dD; - ray->t = cam->cliplength; + ray->tmin = 0.0f; + ray->tmax = cam->cliplength; #else - ray->t = FLT_MAX; + ray->tmin = 0.0f; + ray->tmax = FLT_MAX; #endif } @@ -368,7 +374,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg, ccl_private Ray *ray) { /* pixel filter */ - int filter_table_offset = kernel_data.film.filter_table_offset; + int filter_table_offset = kernel_data.tables.filter_table_offset; float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE); float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE); diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 2cdf6c9f349..e7f24b89458 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -3,7 +3,7 @@ #pragma once -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # include <fenv.h> #endif diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h new file mode 100644 index 00000000000..807d0650fc3 --- /dev/null +++ b/intern/cycles/kernel/data_template.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef KERNEL_STRUCT_BEGIN +# define KERNEL_STRUCT_BEGIN(name, parent) +#endif +#ifndef KERNEL_STRUCT_END +# define KERNEL_STRUCT_END(name) +#endif +#ifndef KERNEL_STRUCT_MEMBER +# define KERNEL_STRUCT_MEMBER(parent, type, name) +#endif + +/* Background. */ + +KERNEL_STRUCT_BEGIN(KernelBackground, background) +/* xyz store direction, w the angle. float4 instead of float3 is used + * to ensure consistent padding/alignment across devices. */ +KERNEL_STRUCT_MEMBER(background, float4, sun) +/* Only shader index. */ +KERNEL_STRUCT_MEMBER(background, int, surface_shader) +KERNEL_STRUCT_MEMBER(background, int, volume_shader) +KERNEL_STRUCT_MEMBER(background, float, volume_step_size) +KERNEL_STRUCT_MEMBER(background, int, transparent) +KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold) +/* Portal sampling. */ +KERNEL_STRUCT_MEMBER(background, float, portal_weight) +KERNEL_STRUCT_MEMBER(background, int, num_portals) +KERNEL_STRUCT_MEMBER(background, int, portal_offset) +/* Sun sampling. */ +KERNEL_STRUCT_MEMBER(background, float, sun_weight) +/* Importance map sampling. */ +KERNEL_STRUCT_MEMBER(background, float, map_weight) +KERNEL_STRUCT_MEMBER(background, int, map_res_x) +KERNEL_STRUCT_MEMBER(background, int, map_res_y) +/* Multiple importance sampling. */ +KERNEL_STRUCT_MEMBER(background, int, use_mis) +/* Lightgroup. */ +KERNEL_STRUCT_MEMBER(background, int, lightgroup) +/* Padding. */ +KERNEL_STRUCT_MEMBER(background, int, pad1) +KERNEL_STRUCT_MEMBER(background, int, pad2) +KERNEL_STRUCT_MEMBER(background, int, pad3) +KERNEL_STRUCT_END(KernelBackground) + +/* BVH: own BVH2 if no native device acceleration struct used. */ + +KERNEL_STRUCT_BEGIN(KernelBVH, bvh) +KERNEL_STRUCT_MEMBER(bvh, int, root) +KERNEL_STRUCT_MEMBER(bvh, int, have_motion) +KERNEL_STRUCT_MEMBER(bvh, int, have_curves) +KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout) +KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps) +KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions) +KERNEL_STRUCT_MEMBER(bvh, int, pad1) +KERNEL_STRUCT_MEMBER(bvh, int, pad2) +KERNEL_STRUCT_END(KernelBVH) + +/* Film. */ + +KERNEL_STRUCT_BEGIN(KernelFilm, film) +/* XYZ to rendering color space transform. float4 instead of float3 to + * ensure consistent padding/alignment across devices. */ +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_r) +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_g) +KERNEL_STRUCT_MEMBER(film, float4, xyz_to_b) +KERNEL_STRUCT_MEMBER(film, float4, rgb_to_y) +/* Rec709 to rendering color space. */ +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r) +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g) +KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b) +KERNEL_STRUCT_MEMBER(film, int, is_rec709) +/* Exposure. */ +KERNEL_STRUCT_MEMBER(film, float, exposure) +/* Passed used. */ +KERNEL_STRUCT_MEMBER(film, int, pass_flag) +KERNEL_STRUCT_MEMBER(film, int, light_pass_flag) +/* Pass offsets. */ +KERNEL_STRUCT_MEMBER(film, int, pass_stride) +KERNEL_STRUCT_MEMBER(film, int, pass_combined) +KERNEL_STRUCT_MEMBER(film, int, pass_depth) +KERNEL_STRUCT_MEMBER(film, int, pass_position) +KERNEL_STRUCT_MEMBER(film, int, pass_normal) +KERNEL_STRUCT_MEMBER(film, int, pass_roughness) +KERNEL_STRUCT_MEMBER(film, int, pass_motion) +KERNEL_STRUCT_MEMBER(film, int, pass_motion_weight) +KERNEL_STRUCT_MEMBER(film, int, pass_uv) +KERNEL_STRUCT_MEMBER(film, int, pass_object_id) +KERNEL_STRUCT_MEMBER(film, int, pass_material_id) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_color) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_color) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_color) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_indirect) +KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct) +KERNEL_STRUCT_MEMBER(film, int, pass_emission) +KERNEL_STRUCT_MEMBER(film, int, pass_background) +KERNEL_STRUCT_MEMBER(film, int, pass_ao) +KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow) +KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count) +KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte) +/* Cryptomatte. */ +KERNEL_STRUCT_MEMBER(film, int, cryptomatte_passes) +KERNEL_STRUCT_MEMBER(film, int, cryptomatte_depth) +KERNEL_STRUCT_MEMBER(film, int, pass_cryptomatte) +/* Adaptive sampling. */ +KERNEL_STRUCT_MEMBER(film, int, pass_adaptive_aux_buffer) +KERNEL_STRUCT_MEMBER(film, int, pass_sample_count) +/* Mist. */ +KERNEL_STRUCT_MEMBER(film, int, pass_mist) +KERNEL_STRUCT_MEMBER(film, float, mist_start) +KERNEL_STRUCT_MEMBER(film, float, mist_inv_depth) +KERNEL_STRUCT_MEMBER(film, float, mist_falloff) +/* Denoising. */ +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_normal) +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_albedo) +KERNEL_STRUCT_MEMBER(film, int, pass_denoising_depth) +/* AOVs. */ +KERNEL_STRUCT_MEMBER(film, int, pass_aov_color) +KERNEL_STRUCT_MEMBER(film, int, pass_aov_value) +/* Light groups. */ +KERNEL_STRUCT_MEMBER(film, int, pass_lightgroup) +/* Baking. */ +KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive) +KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential) +/* Shadow catcher. */ +KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher) +/* Padding. */ +KERNEL_STRUCT_MEMBER(film, int, pad1) +KERNEL_STRUCT_MEMBER(film, int, pad2) +KERNEL_STRUCT_END(KernelFilm) + +/* Integrator. */ + +KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator) +/* Emission. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light) +KERNEL_STRUCT_MEMBER(integrator, int, num_distribution) +KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights) +KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles) +KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights) +KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold) +/* Bounces. */ +KERNEL_STRUCT_MEMBER(integrator, int, min_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_diffuse_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_glossy_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_transmission_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, max_volume_bounce) +/* AO bounces. */ +KERNEL_STRUCT_MEMBER(integrator, int, ao_bounces) +KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_distance) +KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_factor) +KERNEL_STRUCT_MEMBER(integrator, float, ao_additive_factor) +/* Transparency. */ +KERNEL_STRUCT_MEMBER(integrator, int, transparent_min_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, transparent_max_bounce) +KERNEL_STRUCT_MEMBER(integrator, int, transparent_shadows) +/* Caustics. */ +KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective) +KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive) +KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy) +/* Seed. */ +KERNEL_STRUCT_MEMBER(integrator, int, seed) +/* Clamp. */ +KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct) +KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect) +/* MIS. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis) +/* Caustics. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_caustics) +/* Sampling pattern. */ +KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern) +KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance) +/* Volume render. */ +KERNEL_STRUCT_MEMBER(integrator, int, use_volumes) +KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps) +KERNEL_STRUCT_MEMBER(integrator, float, volume_step_rate) +/* Shadow catcher. */ +KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher) +/* Closure filter. */ +KERNEL_STRUCT_MEMBER(integrator, int, filter_closures) +/* MIS debugging. */ +KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type) +/* Padding */ +KERNEL_STRUCT_MEMBER(integrator, int, pad1) +KERNEL_STRUCT_END(KernelIntegrator) + +/* SVM. For shader specialization. */ + +KERNEL_STRUCT_BEGIN(KernelSVMUsage, svm_usage) +#define SHADER_NODE_TYPE(type) KERNEL_STRUCT_MEMBER(svm_usage, int, type) +#include "kernel/svm/node_types_template.h" +KERNEL_STRUCT_END(KernelSVMUsage) + +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_END diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h new file mode 100644 index 00000000000..b5ea3d831f4 --- /dev/null +++ b/intern/cycles/kernel/device/cpu/bvh.h @@ -0,0 +1,609 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* CPU Embree implementation of ray-scene intersection. */ + +#pragma once + +#include <embree3/rtcore_ray.h> +#include <embree3/rtcore_scene.h> + +#include "kernel/device/cpu/compat.h" +#include "kernel/device/cpu/globals.h" + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" +#include "kernel/geom/object.h" +#include "kernel/integrator/state.h" +#include "kernel/sample/lcg.h" + +#include "util/vector.h" + +CCL_NAMESPACE_BEGIN + +#define EMBREE_IS_HAIR(x) (x & 1) + +/* Intersection context. */ + +struct CCLIntersectContext { + typedef enum { + RAY_REGULAR = 0, + RAY_SHADOW_ALL = 1, + RAY_LOCAL = 2, + RAY_SSS = 3, + RAY_VOLUME_ALL = 4, + } RayType; + + KernelGlobals kg; + RayType type; + + /* For avoiding self intersections */ + const Ray *ray; + + /* for shadow rays */ + Intersection *isect_s; + uint max_hits; + uint num_hits; + uint num_recorded_hits; + float throughput; + float max_t; + bool opaque_hit; + + /* for SSS Rays: */ + LocalIntersection *local_isect; + int local_object_id; + uint *lcg_state; + + CCLIntersectContext(KernelGlobals kg_, RayType type_) + { + kg = kg_; + type = type_; + ray = NULL; + max_hits = 1; + num_hits = 0; + num_recorded_hits = 0; + throughput = 1.0f; + max_t = FLT_MAX; + opaque_hit = false; + isect_s = NULL; + local_isect = NULL; + local_object_id = -1; + lcg_state = NULL; + } +}; + +class IntersectContext { + public: + IntersectContext(CCLIntersectContext *ctx) + { + rtcInitIntersectContext(&context); + userRayExt = ctx; + } + RTCIntersectContext context; + CCLIntersectContext *userRayExt; +}; + +/* Utilities. */ + +ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, + RTCRay &rtc_ray, + const uint visibility) +{ + rtc_ray.org_x = ray.P.x; + rtc_ray.org_y = ray.P.y; + rtc_ray.org_z = ray.P.z; + rtc_ray.dir_x = ray.D.x; + rtc_ray.dir_y = ray.D.y; + rtc_ray.dir_z = ray.D.z; + rtc_ray.tnear = ray.tmin; + rtc_ray.tfar = ray.tmax; + rtc_ray.time = ray.time; + rtc_ray.mask = visibility; +} + +ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, + RTCRayHit &rayhit, + const uint visibility) +{ + kernel_embree_setup_ray(ray, rayhit.ray, visibility); + rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; +} + +ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, + const RTCHit *hit, + const Ray *ray) +{ + bool status = false; + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + const int oID = hit->instID[0] / 2; + if ((ray->self.object == oID) || (ray->self.light_object == oID)) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->instID[0])); + const int pID = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + status = intersection_skip_self_shadow(ray->self, oID, pID); + } + } + else { + const int oID = hit->geomID / 2; + if ((ray->self.object == oID) || (ray->self.light_object == oID)) { + const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->geomID)); + status = intersection_skip_self_shadow(ray->self, oID, pID); + } + } + + return status; +} + +ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, + const RTCRay *ray, + const RTCHit *hit, + Intersection *isect) +{ + isect->t = ray->tfar; + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->instID[0])); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + isect->object = hit->instID[0] / 2; + } + else { + isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, hit->geomID)); + isect->object = hit->geomID / 2; + } + + const bool is_hair = hit->geomID & 1; + if (is_hair) { + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim); + isect->type = segment.type; + isect->prim = segment.prim; + isect->u = hit->u; + isect->v = hit->v; + } + else { + isect->type = kernel_data_fetch(objects, isect->object).primitive_type; + isect->u = 1.0f - hit->v - hit->u; + isect->v = hit->u; + } +} + +ccl_device_inline void kernel_embree_convert_sss_hit( + KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) +{ + isect->u = 1.0f - hit->v - hit->u; + isect->v = hit->u; + isect->t = ray->tfar; + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.device_bvh, object * 2)); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + isect->object = object; + isect->type = kernel_data_fetch(objects, object).primitive_type; +} + +/* Ray filter functions. */ + +/* This gets called by Embree at every valid ray/object intersection. + * Things like recording subsurface or shadow hits for later evaluation + * as well as filtering for volume objects happen here. + * Cycles' own BVH does that directly inside the traversal calls. */ +ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args) +{ + /* Current implementation in Cycles assumes only single-ray intersection queries. */ + assert(args->N == 1); + + RTCHit *hit = (RTCHit *)args->hit; + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } +} + +/* This gets called by Embree at every valid ray/object intersection. + * Things like recording subsurface or shadow hits for later evaluation + * as well as filtering for volume objects happen here. + * Cycles' own BVH does that directly inside the traversal calls. + */ +ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args) +{ + /* Current implementation in Cycles assumes only single-ray intersection queries. */ + assert(args->N == 1); + + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + switch (ctx->type) { + case CCLIntersectContext::RAY_SHADOW_ALL: { + Intersection current_isect; + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } + /* If no transparent shadows or max number of hits exceeded, all light is blocked. */ + const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type); + if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) { + ctx->opaque_hit = true; + return; + } + + ++ctx->num_hits; + + /* Always use baked shadow transparency for curves. */ + if (current_isect.type & PRIMITIVE_CURVE) { + ctx->throughput *= intersection_curve_shadow_transparency( + kg, current_isect.object, current_isect.prim, current_isect.u); + + if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + ctx->opaque_hit = true; + return; + } + else { + *args->valid = 0; + return; + } + } + + /* Test if we need to record this transparent intersection. */ + const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); + if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) { + /* If maximum number of hits was reached, replace the intersection with the + * highest distance. We want to find the N closest intersections. */ + const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits); + uint isect_index = num_recorded_hits; + if (num_recorded_hits + 1 >= max_record_hits) { + float max_t = ctx->isect_s[0].t; + uint max_recorded_hit = 0; + + for (uint i = 1; i < num_recorded_hits; ++i) { + if (ctx->isect_s[i].t > max_t) { + max_recorded_hit = i; + max_t = ctx->isect_s[i].t; + } + } + + if (num_recorded_hits >= max_record_hits) { + isect_index = max_recorded_hit; + } + + /* Limit the ray distance and stop counting hits beyond this. + * TODO: is there some way we can tell Embree to stop intersecting beyond + * this distance when max number of hits is reached?. Or maybe it will + * become irrelevant if we make max_hits a very high number on the CPU. */ + ctx->max_t = max(current_isect.t, max_t); + } + + ctx->isect_s[isect_index] = current_isect; + } + + /* Always increase the number of recorded hits, even beyond the maximum, + * so that we can detect this and trace another ray if needed. */ + ++ctx->num_recorded_hits; + + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + case CCLIntersectContext::RAY_LOCAL: + case CCLIntersectContext::RAY_SSS: { + /* Check if it's hitting the correct object. */ + Intersection current_isect; + if (ctx->type == CCLIntersectContext::RAY_SSS) { + kernel_embree_convert_sss_hit(kg, ray, hit, ¤t_isect, ctx->local_object_id); + } + else { + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (ctx->local_object_id != current_isect.object) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + } + if (intersection_skip_self_local(cray->self, current_isect.prim)) { + *args->valid = 0; + return; + } + + /* No intersection information requested, just return a hit. */ + if (ctx->max_hits == 0) { + break; + } + + /* Ignore curves. */ + if (EMBREE_IS_HAIR(hit->geomID)) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + + LocalIntersection *local_isect = ctx->local_isect; + int hit_idx = 0; + + if (ctx->lcg_state) { + /* See triangle_intersect_subsurface() for the native equivalent. */ + for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == ray->tfar) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + return; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= ctx->max_hits) { + hit_idx = local_isect->num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits; + + if (hit_idx >= ctx->max_hits) { + /* This tells Embree to continue tracing. */ + *args->valid = 0; + return; + } + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) { + *args->valid = 0; + return; + } + + local_isect->num_hits = 1; + } + + /* record intersection */ + local_isect->hits[hit_idx] = current_isect; + local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)); + /* This tells Embree to continue tracing. */ + *args->valid = 0; + break; + } + case CCLIntersectContext::RAY_VOLUME_ALL: { + /* Append the intersection to the end of the array. */ + if (ctx->num_hits < ctx->max_hits) { + Intersection current_isect; + kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } + + Intersection *isect = &ctx->isect_s[ctx->num_hits]; + ++ctx->num_hits; + *isect = current_isect; + /* Only primitives from volume object. */ + uint tri_object = isect->object; + int object_flag = kernel_data_fetch(object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + --ctx->num_hits; + } + /* This tells Embree to continue tracing. */ + *args->valid = 0; + } + break; + } + case CCLIntersectContext::RAY_REGULAR: + default: + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + return; + } + break; + } +} + +ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args) +{ + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + + /* Always ignore back-facing intersections. */ + if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), + make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) { + *args->valid = 0; + return; + } + + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } +} + +ccl_device void kernel_embree_filter_occluded_func_backface_cull( + const RTCFilterFunctionNArguments *args) +{ + const RTCRay *ray = (RTCRay *)args->ray; + RTCHit *hit = (RTCHit *)args->hit; + + /* Always ignore back-facing intersections. */ + if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), + make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) { + *args->valid = 0; + return; + } + + kernel_embree_filter_occluded_func(args); +} + +/* Scene intersection. */ + +ccl_device_intersect bool scene_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + + if (!kernel_data.device_bvh) { + return false; + } + + isect->t = ray->tmax; + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); + IntersectContext rtc_ctx(&ctx); + RTCRayHit ray_hit; + ctx.ray = ray; + kernel_embree_setup_rayhit(*ray, ray_hit, visibility); + rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit); + if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID || + ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) { + return false; + } + + kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); + return true; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + if (!intersection_ray_valid(ray)) { + if (local_isect) { + local_isect->num_hits = 0; + } + return false; + } + + if (!kernel_data.device_bvh) { + return false; + } + + const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) & + SD_OBJECT_TRANSFORM_APPLIED); + CCLIntersectContext ctx(kg, + has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); + ctx.lcg_state = lcg_state; + ctx.max_hits = max_hits; + ctx.ray = ray; + ctx.local_isect = local_isect; + if (local_isect) { + local_isect->num_hits = 0; + } + ctx.local_object_id = local_object; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); + + /* If this object has its own BVH, use it. */ + if (has_bvh) { + RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2); + if (geom) { + float3 P = ray->P; + float3 dir = ray->D; + float3 idir = ray->D; + bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir); + + rtc_ray.org_x = P.x; + rtc_ray.org_y = P.y; + rtc_ray.org_z = P.z; + rtc_ray.dir_x = dir.x; + rtc_ray.dir_y = dir.y; + rtc_ray.dir_z = dir.z; + rtc_ray.tnear = ray->tmin; + rtc_ray.tfar = ray->tmax; + RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); + kernel_assert(scene); + if (scene) { + rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); + } + } + } + else { + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + } + + /* rtcOccluded1 sets tfar to -inf if a hit was found. */ + return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowStateCPU *state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + if (!intersection_ray_valid(ray)) { + *num_recorded_hits = 0; + *throughput = 1.0f; + return false; + } + + if (!kernel_data.device_bvh) { + return false; + } + + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); + Intersection *isect_array = (Intersection *)state->shadow_isect; + ctx.isect_s = isect_array; + ctx.max_hits = max_hits; + ctx.ray = ray; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, visibility); + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + + *num_recorded_hits = ctx.num_recorded_hits; + *throughput = ctx.throughput; + return ctx.opaque_hit; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint max_hits, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + + if (!kernel_data.device_bvh) { + return false; + } + + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); + ctx.isect_s = isect; + ctx.max_hits = max_hits; + ctx.num_hits = 0; + ctx.ray = ray; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, visibility); + rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray); + return ctx.num_hits; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h index 3bfc37e98ee..631e55e0d42 100644 --- a/intern/cycles/kernel/device/cpu/compat.h +++ b/intern/cycles/kernel/device/cpu/compat.h @@ -3,8 +3,6 @@ #pragma once -#define __KERNEL_CPU__ - /* Release kernel has too much false-positive maybe-uninitialized warnings, * which makes it possible to miss actual warnings. */ diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index b9a44ccad02..e1ab802aa80 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -246,7 +246,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_postfix #if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__) -constant int __dummy_constant [[function_constant(0)]]; +constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]]; #endif ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h new file mode 100644 index 00000000000..d3a0ab1b519 --- /dev/null +++ b/intern/cycles/kernel/device/metal/bvh.h @@ -0,0 +1,1123 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* MetalRT implementation of ray-scene intersection. */ + +#pragma once + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" + +CCL_NAMESPACE_BEGIN + +/* Payload types. */ + +struct MetalRTIntersectionPayload { + RaySelfPrimitives self; + uint visibility; + float u, v; + int prim; + int type; +#if defined(__METALRT_MOTION__) + float time; +#endif +}; + +struct MetalRTIntersectionLocalPayload { + RaySelfPrimitives self; + uint local_object; + uint lcg_state; + short max_hits; + bool has_lcg_state; + bool result; + LocalIntersection local_isect; +}; + +struct MetalRTIntersectionShadowPayload { + RaySelfPrimitives self; + uint visibility; +#if defined(__METALRT_MOTION__) + float time; +#endif + int state; + float throughput; + short max_hits; + short num_hits; + short num_recorded_hits; + bool result; +}; + +/* Intersection return types. */ + +/* For a bounding box intersection function. */ +struct BoundingBoxIntersectionResult { + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; + float distance [[distance]]; +}; + +/* For a triangle intersection function. */ +struct TriangleIntersectionResult { + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; +}; + +enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; + +/* Utilities. */ + +ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return (self.prim == prim) && (self.object == object); +} + +ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return ((self.prim == prim) && (self.object == object)) || + ((self.light_prim == prim) && (self.light_object == object)); +} + +ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self, + const int prim) +{ + return (self.prim == prim); +} + +/* Hit functions. */ + +template<typename TReturn, uint intersection_type> +TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, + const uint object, + const uint primitive_id, + const float2 barycentrics, + const float ray_tmax) +{ + TReturn result; + +#ifdef __BVH_LOCAL__ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + + if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { + /* Only intersect with matching object and skip self-intersecton. */ + result.accept = false; + result.continue_search = true; + return result; + } + + const short max_hits = payload.max_hits; + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + payload.result = true; + result.accept = true; + result.continue_search = false; + return result; + } + + int hit = 0; + if (payload.has_lcg_state) { + for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { + if (ray_tmax == payload.local_isect.hits[i].t) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + hit = payload.local_isect.num_hits++; + + if (payload.local_isect.num_hits > max_hits) { + hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; + if (hit >= max_hits) { + result.accept = false; + result.continue_search = true; + return result; + } + } + } + else { + if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { + /* Record closest intersection only. Do not terminate ray here, since there is no guarantee + * about distance ordering in any-hit */ + result.accept = false; + result.continue_search = true; + return result; + } + + payload.local_isect.num_hits = 1; + } + + ray_data Intersection *isect = &payload.local_isect.hits[hit]; + isect->t = ray_tmax; + isect->prim = prim; + isect->object = object; + isect->type = kernel_data_fetch(objects, object).primitive_type; + + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + + /* Record geometric normal */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w; + const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0)); + const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1)); + const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2)); + payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit) */ + result.accept = false; + result.continue_search = true; + return result; +#endif +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_local_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint instance_id [[user_instance_id]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<uint intersection_type> +bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + uint object, + uint prim, + const float2 barycentrics, + const float ray_tmax) +{ +#ifdef __SHADOW_RECORD_ALL__ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + /* continue search */ + return true; + } +# endif + + if (intersection_skip_self_shadow(payload.self, object, prim)) { + /* continue search */ + return true; + } + + float u = 0.0f, v = 0.0f; + int type = 0; + if (intersection_type == METALRT_HIT_TRIANGLE) { + u = 1.0f - barycentrics.y - barycentrics.x; + v = barycentrics.x; + type = kernel_data_fetch(objects, object).primitive_type; + } +# ifdef __HAIR__ + else { + u = barycentrics.x; + v = barycentrics.y; + + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + type = segment.type; + prim = segment.prim; + + /* Filter out curve endcaps */ + if (u == 0.0f || u == 1.0f) { + /* continue search */ + return true; + } + } +# endif + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + payload.result = true; + /* terminate ray */ + return false; +# else + short max_hits = payload.max_hits; + short num_hits = payload.num_hits; + short num_recorded_hits = payload.num_recorded_hits; + + MetalKernelContext context(launch_params_metal); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + payload.result = true; + /* terminate ray */ + return false; + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_CURVE) { + float throughput = payload.throughput; + throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); + payload.throughput = throughput; + payload.num_hits += 1; + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + /* Accept result and terminate if throughput is sufficiently low */ + payload.result = true; + return false; + } + else { + return true; + } + } + + payload.num_hits += 1; + payload.num_recorded_hits += 1; + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = payload.state; + + const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (ray_tmax >= max_recorded_t) { + /* Accept hit, so that we don't consider any more hits beyond the distance of the + * current hit anymore. */ + payload.result = true; + return true; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ + + return true; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + + TriangleIntersectionResult result; + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, barycentrics, ray_tmax); + result.accept = !result.continue_search; + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<typename TReturnType, uint intersection_type> +inline TReturnType metalrt_visibility_test( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const float u) +{ + TReturnType result; + +#ifdef __HAIR__ + if (intersection_type == METALRT_HIT_BOUNDING_BOX) { + /* Filter out curve endcaps. */ + if (u == 0.0f || u == 1.0f) { + result.accept = false; + result.continue_search = true; + return result; + } + } +#endif + + uint visibility = payload.visibility; +#ifdef __VISIBILITY_FLAG__ + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + result.accept = false; + result.continue_search = true; + return result; + } +#endif + + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + if (intersection_skip_self_shadow(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } + else { + result.accept = true; + result.continue_search = false; + return result; + } + } + else { + if (intersection_skip_self(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + result.accept = true; + result.continue_search = true; + return result; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult +__anyhit__cycles_metalrt_visibility_test_tri( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + TriangleIntersectionResult result = + metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, 0.0f); + if (result.accept) { + payload.prim = prim; + payload.type = kernel_data_fetch(objects, object).primitive_type; + } + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) +{ + /* Unused function */ + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + return result; +} + +/* Primitive intersection functions. */ + +#ifdef __HAIR__ +ccl_device_inline void metalrt_intersection_curve( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline void metalrt_intersection_curve_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & PRIMITIVE_CURVE_RIBBON) { + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_ribbon_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & PRIMITIVE_CURVE_RIBBON) { + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + metalrt_intersection_curve(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__curve_all_shadow( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_P [[origin]], + const float3 ray_D [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_curve_shadow(launch_params_metal, + payload, + object, + segment.prim, + segment.type, + ray_P, + ray_D, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} +#endif /* __HAIR__ */ + +#ifdef __POINTCLOUD__ +ccl_device_inline void metalrt_intersection_point( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline void metalrt_intersection_point_shadow( + constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_P, + const float3 ray_D, + float time, + const float ray_tmin, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + Intersection isect; + isect.t = ray_tmax; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect( + NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult +__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload + [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmin [[min_distance]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); + const int type = kernel_data_fetch(objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point_shadow(launch_params_metal, + payload, + object, + prim, + type, + ray_origin, + ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmin, + ray_tmax, + result); + + return result; +} +#endif /* __POINTCLOUD__ */ + +/* Scene intersection. */ + +ccl_device_intersect bool scene_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + if (!scene_intersect_valid(ray)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + return false; + } + +#if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid ift_default"); + return false; + } +#endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.self = ray->self; + payload.u = 0.0f; + payload.v = 0.0f; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + /* No further intersector setup required: Default MetalRT behavior is any-hit. */ + } + else if (visibility & PATH_RAY_SHADOW_OPAQUE) { + /* No further intersector setup required: Shadow ray early termination is controlled by the + * intersection handler */ + } + +#if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +#else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +#endif + + if (intersection.type == intersection_type::none) { + isect->t = ray->tmax; + isect->type = PRIMITIVE_NONE; + + return false; + } + + isect->t = intersection.distance; + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = 1.0f - intersection.triangle_barycentric_coord.y - + intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.x; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + if (!intersection_ray_valid(ray)) { + if (local_isect) { + local_isect->num_hits = 0; + } + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid ift_local"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionLocalPayload payload; + payload.self = ray->self; + payload.local_object = local_object; + payload.max_hits = max_hits; + payload.local_isect.num_hits = 0; + if (lcg_state) { + payload.has_lcg_state = true; + payload.lcg_state = *lcg_state; + } + payload.result = false; + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); +# endif + + if (lcg_state) { + *lcg_state = payload.lcg_state; + } + *local_isect = payload.local_isect; + + return payload.result; +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { + kernel_assert(!"Invalid ift_shadow"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionShadowPayload payload; + payload.self = ray->self; + payload.visibility = visibility; + payload.max_hits = max_hits; + payload.num_hits = 0; + payload.num_recorded_hits = 0; + payload.throughput = 1.0f; + payload.result = false; + payload.state = state; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_shadow, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); +# endif + + *num_recorded_hits = payload.num_recorded_hits; + *throughput = payload.throughput; + + return payload.result; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + kernel_assert(!"Invalid ift_default"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.self = ray->self; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# endif + + if (intersection.type == intersection_type::none) { + return false; + } + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = 1.0f - intersection.triangle_barycentric_coord.y - + intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.x; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 0ed52074a90..80ee8ef5b57 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -260,8 +260,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ #ifdef __METALRT__ -# define __KERNEL_GPU_RAYTRACING__ - # if defined(__METALRT_MOTION__) # define METALRT_TAGS instancing, instance_motion, primitive_motion # else diff --git a/intern/cycles/kernel/device/metal/function_constants.h b/intern/cycles/kernel/device/metal/function_constants.h new file mode 100644 index 00000000000..3adf390c7f6 --- /dev/null +++ b/intern/cycles/kernel/device/metal/function_constants.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +enum { + Kernel_DummyConstant, +#define KERNEL_STRUCT_MEMBER(parent, type, name) KernelData_##parent##_##name, +#include "kernel/data_template.h" +}; + +#ifdef __KERNEL_METAL__ +# define KERNEL_STRUCT_MEMBER(parent, type, name) \ + constant type kernel_data_##parent##_##name \ + [[function_constant(KernelData_##parent##_##name)]]; +# include "kernel/data_template.h" +#endif diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index 3c31dc3354c..3df81fcf369 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -5,748 +5,5 @@ #include "kernel/device/metal/compat.h" #include "kernel/device/metal/globals.h" +#include "kernel/device/metal/function_constants.h" #include "kernel/device/gpu/kernel.h" - -/* MetalRT intersection handlers */ -#ifdef __METALRT__ - -/* Return type for a bounding box intersection function. */ -struct BoundingBoxIntersectionResult -{ - bool accept [[accept_intersection]]; - bool continue_search [[continue_search]]; - float distance [[distance]]; -}; - -/* Return type for a triangle intersection function. */ -struct TriangleIntersectionResult -{ - bool accept [[accept_intersection]]; - bool continue_search [[continue_search]]; -}; - -enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; - -ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self, - const int object, - const int prim) -{ - return (self.prim == prim) && (self.object == object); -} - -ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self, - const int object, - const int prim) -{ - return ((self.prim == prim) && (self.object == object)) || - ((self.light_prim == prim) && (self.light_object == object)); -} - -ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self, - const int prim) -{ - return (self.prim == prim); -} - -template<typename TReturn, uint intersection_type> -TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, - const uint object, - const uint primitive_id, - const float2 barycentrics, - const float ray_tmax) -{ - TReturn result; - -#ifdef __BVH_LOCAL__ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - - if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { - /* Only intersect with matching object and skip self-intersecton. */ - result.accept = false; - result.continue_search = true; - return result; - } - - const short max_hits = payload.max_hits; - if (max_hits == 0) { - /* Special case for when no hit information is requested, just report that something was hit */ - payload.result = true; - result.accept = true; - result.continue_search = false; - return result; - } - - int hit = 0; - if (payload.has_lcg_state) { - for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { - if (ray_tmax == payload.local_isect.hits[i].t) { - result.accept = false; - result.continue_search = true; - return result; - } - } - - hit = payload.local_isect.num_hits++; - - if (payload.local_isect.num_hits > max_hits) { - hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; - if (hit >= max_hits) { - result.accept = false; - result.continue_search = true; - return result; - } - } - } - else { - if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { - /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */ - result.accept = false; - result.continue_search = true; - return result; - } - - payload.local_isect.num_hits = 1; - } - - ray_data Intersection *isect = &payload.local_isect.hits[hit]; - isect->t = ray_tmax; - isect->prim = prim; - isect->object = object; - isect->type = kernel_data_fetch(objects, object).primitive_type; - - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - - /* Record geometric normal */ - const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w; - const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0)); - const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1)); - const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2)); - payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - - /* Continue tracing (without this the trace call would return after the first hit) */ - result.accept = false; - result.continue_search = true; - return result; -#endif -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], - uint instance_id [[user_instance_id]], - uint primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) -{ - return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) -{ - /* unused function */ - BoundingBoxIntersectionResult result; - result.distance = ray_tmax; - result.accept = false; - result.continue_search = false; - return result; -} - -template<uint intersection_type> -bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - uint object, - uint prim, - const float2 barycentrics, - const float ray_tmax) -{ -#ifdef __SHADOW_RECORD_ALL__ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - /* continue search */ - return true; - } -# endif - - if (intersection_skip_self_shadow(payload.self, object, prim)) { - /* continue search */ - return true; - } - - float u = 0.0f, v = 0.0f; - int type = 0; - if (intersection_type == METALRT_HIT_TRIANGLE) { - u = 1.0f - barycentrics.y - barycentrics.x; - v = barycentrics.x; - type = kernel_data_fetch(objects, object).primitive_type; - } -# ifdef __HAIR__ - else { - u = barycentrics.x; - v = barycentrics.y; - - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - type = segment.type; - prim = segment.prim; - - /* Filter out curve endcaps */ - if (u == 0.0f || u == 1.0f) { - /* continue search */ - return true; - } - } -# endif - -# ifndef __TRANSPARENT_SHADOWS__ - /* No transparent shadows support compiled in, make opaque. */ - payload.result = true; - /* terminate ray */ - return false; -# else - short max_hits = payload.max_hits; - short num_hits = payload.num_hits; - short num_recorded_hits = payload.num_recorded_hits; - - MetalKernelContext context(launch_params_metal); - - /* If no transparent shadows, all light is blocked and we can stop immediately. */ - if (num_hits >= max_hits || - !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { - payload.result = true; - /* terminate ray */ - return false; - } - - /* Always use baked shadow transparency for curves. */ - if (type & PRIMITIVE_CURVE) { - float throughput = payload.throughput; - throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); - payload.throughput = throughput; - payload.num_hits += 1; - - if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { - /* Accept result and terminate if throughput is sufficiently low */ - payload.result = true; - return false; - } - else { - return true; - } - } - - payload.num_hits += 1; - payload.num_recorded_hits += 1; - - uint record_index = num_recorded_hits; - - const IntegratorShadowState state = payload.state; - - const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); - if (record_index >= max_record_hits) { - /* If maximum number of hits reached, find a hit to replace. */ - float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); - uint max_recorded_hit = 0; - - for (int i = 1; i < max_record_hits; i++) { - const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); - if (isect_t > max_recorded_t) { - max_recorded_t = isect_t; - max_recorded_hit = i; - } - } - - if (ray_tmax >= max_recorded_t) { - /* Accept hit, so that we don't consider any more hits beyond the distance of the - * current hit anymore. */ - payload.result = true; - return true; - } - - record_index = max_recorded_hit; - } - - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; - - /* Continue tracing. */ -# endif /* __TRANSPARENT_SHADOWS__ */ -#endif /* __SHADOW_RECORD_ALL__ */ - - return true; -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]], - float2 barycentrics [[barycentric_coord]], - float ray_tmax [[distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - - TriangleIntersectionResult result; - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, barycentrics, ray_tmax); - result.accept = !result.continue_search; - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) -{ - /* unused function */ - BoundingBoxIntersectionResult result; - result.distance = ray_tmax; - result.accept = false; - result.continue_search = false; - return result; -} - -template<typename TReturnType, uint intersection_type> -inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const float u) -{ - TReturnType result; - -# ifdef __HAIR__ - if (intersection_type == METALRT_HIT_BOUNDING_BOX) { - /* Filter out curve endcaps. */ - if (u == 0.0f || u == 1.0f) { - result.accept = false; - result.continue_search = true; - return result; - } - } -# endif - - uint visibility = payload.visibility; -# ifdef __VISIBILITY_FLAG__ - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - result.accept = false; - result.continue_search = true; - return result; - } -# endif - - /* Shadow ray early termination. */ - if (visibility & PATH_RAY_SHADOW_OPAQUE) { - if (intersection_skip_self_shadow(payload.self, object, prim)) { - result.accept = false; - result.continue_search = true; - return result; - } - else { - result.accept = true; - result.continue_search = false; - return result; - } - } - else { - if (intersection_skip_self(payload.self, object, prim)) { - result.accept = false; - result.continue_search = true; - return result; - } - } - - result.accept = true; - result.continue_search = true; - return result; -} - -[[intersection(triangle, triangle_data, METALRT_TAGS)]] -TriangleIntersectionResult -__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - unsigned int object [[user_instance_id]], - unsigned int primitive_id [[primitive_id]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( - launch_params_metal, payload, object, prim, 0.0f); - if (result.accept) { - payload.prim = prim; - payload.type = kernel_data_fetch(objects, object).primitive_type; - } - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) -{ - /* Unused function */ - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - return result; -} - -#ifdef __HAIR__ -ccl_device_inline -void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - - Intersection isect; - isect.t = ray_tmax; - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; - - MetalKernelContext context(launch_params_metal); - if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); - if (result.accept) { - result.distance = isect.t / len; - payload.u = isect.u; - payload.v = isect.v; - payload.prim = prim; - payload.type = type; - } - } -} - -ccl_device_inline -void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ - const uint visibility = payload.visibility; - - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - - Intersection isect; - isect.t = ray_tmax; - /* Transform maximum distance into object space */ - if (isect.t != FLT_MAX) - isect.t *= len; - - MetalKernelContext context(launch_params_metal); - if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); - result.accept = !result.continue_search; - - if (result.accept) { - result.distance = isect.t / len; - } - } -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - } - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - if (segment.type & PRIMITIVE_CURVE_RIBBON) { - metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - } - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - - return result; -} -#endif /* __HAIR__ */ - -#ifdef __POINTCLOUD__ -ccl_device_inline -void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ -# ifdef __VISIBILITY_FLAG__ - const uint visibility = payload.visibility; - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the point intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - - Intersection isect; - isect.t = ray_tmax; - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; - - MetalKernelContext context(launch_params_metal); - if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, isect.u); - if (result.accept) { - result.distance = isect.t / len; - payload.u = isect.u; - payload.v = isect.v; - payload.prim = prim; - payload.type = type; - } - } -} - -ccl_device_inline -void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal, - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, - const uint object, - const uint prim, - const uint type, - const float3 ray_origin, - const float3 ray_direction, - float time, - const float ray_tmax, - thread BoundingBoxIntersectionResult &result) -{ - const uint visibility = payload.visibility; - - float3 P = ray_origin; - float3 dir = ray_direction; - - /* The direction is not normalized by default, but the point intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - - Intersection isect; - isect.t = ray_tmax; - /* Transform maximum distance into object space */ - if (isect.t != FLT_MAX) - isect.t *= len; - - MetalKernelContext context(launch_params_metal); - if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( - launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); - result.accept = !result.continue_search; - - if (result.accept) { - result.distance = isect.t / len; - } - } -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const int type = kernel_data_fetch(objects, object).primitive_type; - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - - return result; -} - -[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] -BoundingBoxIntersectionResult -__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], - ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], - const uint object [[user_instance_id]], - const uint primitive_id [[primitive_id]], - const float3 ray_origin [[origin]], - const float3 ray_direction [[direction]], - const float ray_tmax [[max_distance]]) -{ - const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object); - const int type = kernel_data_fetch(objects, object).primitive_type; - - BoundingBoxIntersectionResult result; - result.accept = false; - result.continue_search = true; - result.distance = ray_tmax; - - metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, -# if defined(__METALRT_MOTION__) - payload.time, -# else - 0.0f, -# endif - ray_tmax, result); - - return result; -} -#endif /* __POINTCLOUD__ */ -#endif /* __METALRT__ */ diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h index 30b0f088ede..1b25259bcf5 100644 --- a/intern/cycles/kernel/device/oneapi/compat.h +++ b/intern/cycles/kernel/device/oneapi/compat.h @@ -193,7 +193,7 @@ ccl_always_inline float3 make_float3(float x) #include "util/types.h" /* NOTE(@nsirgien): Declaring these functions after types headers is very important because they - * include oneAPI headers, which transitively include math.h headers which will cause redefintions + * include oneAPI headers, which transitively include math.h headers which will cause redefinitions * of the math defines because math.h also uses them and having them defined before math.h include * is actually UB. */ /* Use fast math functions - get them from sycl::native namespace for native math function diff --git a/intern/cycles/kernel/device/oneapi/device_id.h b/intern/cycles/kernel/device/oneapi/device_id.h deleted file mode 100644 index b4c94ac27a2..00000000000 --- a/intern/cycles/kernel/device/oneapi/device_id.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2021-2022 Intel Corporation */ - -#pragma once - -/* from public source : - * https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/include/pci_ids/iris_pci_ids.h */ -const static std::set<uint32_t> intel_arc_alchemist_device_ids = { - 0x4f80, 0x4f81, 0x4f82, 0x4f83, 0x4f84, 0x4f87, 0x4f88, 0x5690, 0x5691, - 0x5692, 0x5693, 0x5694, 0x5695, 0x5696, 0x5697, 0x56a0, 0x56a1, 0x56a2, - 0x56a3, 0x56a4, 0x56a5, 0x56a6, 0x56b0, 0x56b1, 0x56b2, 0x56b3}; diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h index 2d740b4c64a..662068c0fed 100644 --- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h +++ b/intern/cycles/kernel/device/oneapi/dll_interface_template.h @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2022 Intel Corporation */ + /* device_capabilities() returns a C string that must be free'd with oneapi_free(). */ DLL_INTERFACE_CALL(oneapi_device_capabilities, char *) DLL_INTERFACE_CALL(oneapi_free, void, void *) @@ -27,7 +30,7 @@ DLL_INTERFACE_CALL(oneapi_usm_memset, DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue) /* Operation with Kernel globals structure - map of global/constant allocation - filled before - * render/kernel execution As we don't know in cycles sizeof this - Cycles will manage just as + * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as * pointer. */ DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size) DLL_INTERFACE_CALL(oneapi_set_global_memory, diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h index 892558d40bf..6681977a675 100644 --- a/intern/cycles/kernel/device/oneapi/image.h +++ b/intern/cycles/kernel/device/oneapi/image.h @@ -216,7 +216,7 @@ template<typename T> struct NanoVDBInterpolator { int nix, niy, niz; int pix, piy, piz; int nnix, nniy, nniz; - /* Tricubic b-spline interpolation. */ + /* Tri-cubic b-spline interpolation. */ const float tx = svm_image_texture_frac(x - 0.5f, &ix); const float ty = svm_image_texture_frac(y - 0.5f, &iy); const float tz = svm_image_texture_frac(z - 0.5f, &iz); @@ -355,7 +355,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, in return r; } else { - /* Tricubic interpolation. */ + /* Tri-cubic interpolation. */ int ix, iy, iz; float tx = svm_image_texture_frac(x - 0.5f, &ix); float ty = svm_image_texture_frac(y - 0.5f, &iy); diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp index 62affe6e58e..300e201600c 100644 --- a/intern/cycles/kernel/device/oneapi/kernel.cpp +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -9,12 +9,9 @@ # include <map> # include <set> -# include <level_zero/ze_api.h> # include <CL/sycl.hpp> -# include <ext/oneapi/backend/level_zero.hpp> # include "kernel/device/oneapi/compat.h" -# include "kernel/device/oneapi/device_id.h" # include "kernel/device/oneapi/globals.h" # include "kernel/device/oneapi/kernel_templates.h" @@ -103,8 +100,12 @@ bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_byte sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); oneapi_check_usm(queue_, dest, true); oneapi_check_usm(queue_, src, true); + sycl::event mem_event = queue->memcpy(dest, src, num_bytes); +# ifdef WITH_CYCLES_DEBUG try { - sycl::event mem_event = queue->memcpy(dest, src, num_bytes); + /* NOTE(@nsirgien) Waiting on memory operation may give more precise error + * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. + */ mem_event.wait_and_throw(); return true; } @@ -114,6 +115,20 @@ bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_byte } return false; } +# else + sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context()); + sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context()); + bool from_device_to_host = dest_type == sycl::usm::alloc::host && + src_type == sycl::usm::alloc::device; + bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown || + src_type == sycl::usm::alloc::unknown; + /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host + * may not wait until the end of the transfer before using the memory. + */ + if (from_device_to_host || host_or_device_memop_with_offset) + mem_event.wait(); + return true; +# endif } bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes) @@ -121,8 +136,12 @@ bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, si assert(queue_); sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); oneapi_check_usm(queue_, usm_ptr, true); + sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes); +# ifdef WITH_CYCLES_DEBUG try { - sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes); + /* NOTE(@nsirgien) Waiting on memory operation may give more precise error + * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug. + */ mem_event.wait_and_throw(); return true; } @@ -132,6 +151,10 @@ bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, si } return false; } +# else + (void)mem_event; + return true; +# endif } bool oneapi_queue_synchronize(SyclQueue *queue_) @@ -328,8 +351,8 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, int num_states = *((int *)(args[0])); /* Round up to the next work-group. */ size_t groups_count = (num_states + local_size - 1) / local_size; - /* NOTE(@nsirgien): As for now non-uniform workgroups don't work on most oneAPI devices, we - * extend work size to fit uniformity requirements. */ + /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices, + * we extend work size to fit uniformity requirements. */ global_size = groups_count * local_size; # ifdef WITH_ONEAPI_SYCL_HOST_ENABLED @@ -647,7 +670,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, } static const int lowest_supported_driver_version_win = 1011660; -static const int lowest_supported_driver_version_neo = 20066; +static const int lowest_supported_driver_version_neo = 23570; static int parse_driver_build_version(const sycl::device &device) { @@ -726,21 +749,25 @@ static std::vector<sycl::device> oneapi_available_devices() else { bool filter_out = false; - /* For now we support all Intel(R) Arc(TM) devices - * and any future GPU with more than 128 execution units - * official support can be broaden to older and smaller GPUs once ready. */ + /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU, + * assuming they have either more than 96 Execution Units or not 7 threads per EU. + * Official support can be broaden to older and smaller GPUs once ready. */ if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) { - ze_device_handle_t ze_device = sycl::get_native<sycl::backend::ext_oneapi_level_zero>( - device); - ze_device_properties_t props = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; - zeDeviceGetProperties(ze_device, &props); - bool is_dg2 = (intel_arc_alchemist_device_ids.find(props.deviceId) != - intel_arc_alchemist_device_ids.end()); - int number_of_eus = props.numEUsPerSubslice * props.numSubslicesPerSlice * - props.numSlices; - if (!is_dg2 || number_of_eus < 128) + /* Filtered-out defaults in-case these values aren't available through too old L0 + * runtime. */ + int number_of_eus = 96; + int threads_per_eu = 7; + if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) { + number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>(); + } + if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) { + threads_per_eu = + device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>(); + } + /* This filters out all Level-Zero supported GPUs from older generation than Arc. */ + if (number_of_eus <= 96 && threads_per_eu == 7) { filter_out = true; - + } /* if not already filtered out, check driver version. */ if (!filter_out) { int driver_build_version = parse_driver_build_version(device); @@ -862,6 +889,9 @@ void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr) device.get_platform().get_info<sycl::info::platform::name>(); std::string name = device.get_info<sycl::info::device::name>(); std::string id = "ONEAPI_" + platform_name + "_" + name; + if (device.has(sycl::aspect::ext_intel_pci_address)) { + id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>()); + } (cb)(id.c_str(), name.c_str(), num, user_ptr); num++; } diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h index 2dfc96292ed..d8964d9b672 100644 --- a/intern/cycles/kernel/device/oneapi/kernel_templates.h +++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h @@ -1,10 +1,12 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Intel Corporation */ + #pragma once /* Some macro magic to generate templates for kernel arguments. - The resulting oneapi_call() template allows to call a SYCL/C++ kernel - with typed arguments by only giving it a void **args as given by Cycles. - The template will automatically cast from void* to the expectd type. - */ + * The resulting oneapi_call() template allows to call a SYCL/C++ kernel + * with typed arguments by only giving it a void `**args` as given by Cycles. + * The template will automatically cast from void* to the expected type. */ /* When expanded by the preprocessor, the generated templates will look like this example: */ #if 0 diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h new file mode 100644 index 00000000000..a1621277ec7 --- /dev/null +++ b/intern/cycles/kernel/device/optix/bvh.h @@ -0,0 +1,646 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2021-2022 Blender Foundation */ + +/* OptiX implementation of ray-scene intersection. */ + +#pragma once + +#include "kernel/bvh/types.h" +#include "kernel/bvh/util.h" + +#define OPTIX_DEFINE_ABI_VERSION_ONLY +#include <optix_function_table.h> + +CCL_NAMESPACE_BEGIN + +/* Utilities. */ + +template<typename T> ccl_device_forceinline T *get_payload_ptr_0() +{ + return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1()); +} +template<typename T> ccl_device_forceinline T *get_payload_ptr_2() +{ + return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3()); +} + +template<typename T> ccl_device_forceinline T *get_payload_ptr_6() +{ + return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6()); +} + +ccl_device_forceinline int get_object_id() +{ +#ifdef __OBJECT_MOTION__ + /* Always get the instance ID from the TLAS + * There might be a motion transform node between TLAS and BLAS which does not have one. */ + return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); +#else + return optixGetInstanceId(); +#endif +} + +/* Hit/miss functions. */ + +extern "C" __global__ void __miss__kernel_optix_miss() +{ + /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */ + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); + optixSetPayload_5(PRIMITIVE_NONE); +} + +extern "C" __global__ void __anyhit__kernel_optix_local_hit() +{ +#if defined(__HAIR__) || defined(__POINTCLOUD__) + if (!optixIsTriangleHit()) { + /* Ignore curves and points. */ + return optixIgnoreIntersection(); + } +#endif + +#ifdef __BVH_LOCAL__ + const int object = get_object_id(); + if (object != optixGetPayload_4() /* local_object */) { + /* Only intersect with matching object. */ + return optixIgnoreIntersection(); + } + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_local(ray->self, prim)) { + return optixIgnoreIntersection(); + } + + const uint max_hits = optixGetPayload_5(); + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + optixSetPayload_5(true); + return optixTerminateRay(); + } + + int hit = 0; + uint *const lcg_state = get_payload_ptr_0<uint>(); + LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); + + if (lcg_state) { + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (optixGetRayTmax() == local_isect->hits[i].t) { + return optixIgnoreIntersection(); + } + } + + hit = local_isect->num_hits++; + + if (local_isect->num_hits > max_hits) { + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + if (hit >= max_hits) { + return optixIgnoreIntersection(); + } + } + } + else { + if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { + /* Record closest intersection only. + * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit. + */ + return optixIgnoreIntersection(); + } + + local_isect->num_hits = 1; + } + + Intersection *isect = &local_isect->hits[hit]; + isect->t = optixGetRayTmax(); + isect->prim = prim; + isect->object = get_object_id(); + isect->type = kernel_data_fetch(objects, isect->object).primitive_type; + + const float2 barycentrics = optixGetTriangleBarycentrics(); + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + + /* Record geometric normal. */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w; + const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0); + const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1); + const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); + local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit). */ + optixIgnoreIntersection(); +#endif +} + +extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() +{ +#ifdef __SHADOW_RECORD_ALL__ + int prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +# endif + + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + + float u = 0.0f, v = 0.0f; + int type = 0; + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + u = 1.0f - barycentrics.y - barycentrics.x; + v = barycentrics.x; + type = kernel_data_fetch(objects, object).primitive_type; + } +# ifdef __HAIR__ + else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { + u = __uint_as_float(optixGetAttribute_0()); + v = __uint_as_float(optixGetAttribute_1()); + + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + type = segment.type; + prim = segment.prim; + +# if OPTIX_ABI_VERSION < 55 + /* Filter out curve endcaps. */ + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } +# endif + } +# endif + else { + type = kernel_data_fetch(objects, object).primitive_type; + u = 0.0f; + v = 0.0f; + } + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + optixSetPayload_5(true); + return optixTerminateRay(); +# else + const uint max_hits = optixGetPayload_3(); + const uint num_hits_packed = optixGetPayload_2(); + const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed); + const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + optixSetPayload_5(true); + return optixTerminateRay(); + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_CURVE) { + float throughput = __uint_as_float(optixGetPayload_1()); + throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u); + optixSetPayload_1(__float_as_uint(throughput)); + optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + optixSetPayload_5(true); + return optixTerminateRay(); + } + else { + /* Continue tracing. */ + optixIgnoreIntersection(); + return; + } + } + + /* Record transparent intersection. */ + optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1)); + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = optixGetPayload_0(); + + const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (optixGetRayTmax() >= max_recorded_t) { + /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the + * current hit anymore. */ + return; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax(); + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ + optixIgnoreIntersection(); +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ +} + +extern "C" __global__ void __anyhit__kernel_optix_volume_test() +{ +#if defined(__HAIR__) || defined(__POINTCLOUD__) + if (!optixIsTriangleHit()) { + /* Ignore curves. */ + return optixIgnoreIntersection(); + } +#endif + + const uint object = get_object_id(); +#ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +#endif + + if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { + return optixIgnoreIntersection(); + } + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } +} + +extern "C" __global__ void __anyhit__kernel_optix_visibility_test() +{ +#ifdef __HAIR__ +# if OPTIX_ABI_VERSION < 55 + if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) { + /* Filter out curve endcaps. */ + const float u = __uint_as_float(optixGetAttribute_0()); + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } + } +# endif +#endif + + const uint object = get_object_id(); + const uint visibility = optixGetPayload_4(); +#ifdef __VISIBILITY_FLAG__ + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return optixIgnoreIntersection(); + } +#endif + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + else { + /* Shadow ray early termination. */ + return optixTerminateRay(); + } + } + else { + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + } +} + +extern "C" __global__ void __closesthit__kernel_optix_hit() +{ + const int object = get_object_id(); + const int prim = optixGetPrimitiveIndex(); + + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ + optixSetPayload_4(object); + + if (optixIsTriangleHit()) { + const float2 barycentrics = optixGetTriangleBarycentrics(); + optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); + optixSetPayload_2(__float_as_uint(barycentrics.x)); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); + } + else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); + optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ + optixSetPayload_2(optixGetAttribute_1()); + optixSetPayload_3(segment.prim); + optixSetPayload_5(segment.type); + } + else { + optixSetPayload_1(0); + optixSetPayload_2(0); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); + } +} + +/* Custom primitive intersection functions. */ + +#ifdef __HAIR__ +ccl_device_inline void optix_intersection_curve(const int prim, const int type) +{ + const int object = get_object_id(); + +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + const float3 ray_P = optixGetObjectRayOrigin(); + const float3 ray_D = optixGetObjectRayDirection(); + const float ray_tmin = optixGetRayTmin(); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + + if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); + optixReportIntersection(isect.t, + type & PRIMITIVE_ALL, + __float_as_int(isect.u), /* Attribute_0 */ + __float_as_int(isect.v)); /* Attribute_1 */ + } +} + +extern "C" __global__ void __intersection__curve_ribbon() +{ + const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex()); + const int prim = segment.prim; + const int type = segment.type; + if (type & PRIMITIVE_CURVE_RIBBON) { + optix_intersection_curve(prim, type); + } +} + +#endif + +#ifdef __POINTCLOUD__ +extern "C" __global__ void __intersection__point() +{ + const int prim = optixGetPrimitiveIndex(); + const int object = get_object_id(); + const int type = kernel_data_fetch(objects, object).primitive_type; + +# ifdef __VISIBILITY_FLAG__ + const uint visibility = optixGetPayload_4(); + if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { + return; + } +# endif + + const float3 ray_P = optixGetObjectRayOrigin(); + const float3 ray_D = optixGetObjectRayDirection(); + const float ray_tmin = optixGetRayTmin(); + +# ifdef __OBJECT_MOTION__ + const float time = optixGetRayTime(); +# else + const float time = 0.0f; +# endif + + Intersection isect; + isect.t = optixGetRayTmax(); + + if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) { + static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); + optixReportIntersection(isect.t, type & PRIMITIVE_ALL); + } +} +#endif + +/* Scene intersection. */ + +ccl_device_intersect bool scene_intersect(KernelGlobals kg, + ccl_private const Ray *ray, + const uint visibility, + ccl_private Intersection *isect) +{ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + else if (visibility & PATH_RAY_SHADOW_OPAQUE) { + ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + ray_flags, + 0, /* SBT offset for PG_HITD */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +} + +#ifdef __BVH_LOCAL__ +ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private LocalIntersection *local_isect, + int local_object, + ccl_private uint *lcg_state, + int max_hits) +{ + uint p0 = pointer_pack_to_uint_0(lcg_state); + uint p1 = pointer_pack_to_uint_1(lcg_state); + uint p2 = pointer_pack_to_uint_0(local_isect); + uint p3 = pointer_pack_to_uint_1(local_isect); + uint p4 = local_object; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ + uint p5 = max_hits; + + if (local_isect) { + local_isect->num_hits = 0; /* Initialize hit count to zero. */ + } + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + 0xFF, + /* Need to always call into __anyhit__kernel_optix_local_hit. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 2, /* SBT offset for PG_HITL */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + return p5; +} +#endif + +#ifdef __SHADOW_RECORD_ALL__ +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + uint p0 = state; + uint p1 = __float_as_uint(1.0f); /* Throughput. */ + uint p2 = 0; /* Number of hits. */ + uint p3 = max_hits; + uint p4 = visibility; + uint p5 = false; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 1, /* SBT offset for PG_HITS */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + *num_recorded_hits = uint16_unpack_from_uint_0(p2); + *throughput = __uint_as_float(p1); + + return p5; +} +#endif + +#ifdef __VOLUME__ +ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint visibility) +{ + uint p0 = 0; + uint p1 = 0; + uint p2 = 0; + uint p3 = 0; + uint p4 = visibility; + uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0, + ray->P, + ray->D, + ray->tmin, + ray->tmax, + ray->time, + ray_mask, + /* Need to always call into __anyhit__kernel_optix_volume_test. */ + OPTIX_RAY_FLAG_ENFORCE_ANYHIT, + 3, /* SBT offset for PG_HITV */ + 0, + 0, + p0, + p1, + p2, + p3, + p4, + p5, + p6, + p7); + + isect->t = __uint_as_float(p0); + isect->u = __uint_as_float(p1); + isect->v = __uint_as_float(p2); + isect->prim = p3; + isect->object = p4; + isect->type = p5; + + return p5 != PRIMITIVE_NONE; +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index aa4a6321a8b..1a11a533b7e 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -8,7 +8,6 @@ #include <optix.h> #define __KERNEL_GPU__ -#define __KERNEL_GPU_RAYTRACING__ #define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */ #define __KERNEL_OPTIX__ #define CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 949bf41d171..6abb5aeacb9 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -20,469 +20,39 @@ #include "kernel/integrator/intersect_volume_stack.h" // clang-format on -#define OPTIX_DEFINE_ABI_VERSION_ONLY -#include <optix_function_table.h> - -template<typename T> ccl_device_forceinline T *get_payload_ptr_0() -{ - return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1()); -} -template<typename T> ccl_device_forceinline T *get_payload_ptr_2() -{ - return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3()); -} - -template<typename T> ccl_device_forceinline T *get_payload_ptr_6() -{ - return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6()); -} - -ccl_device_forceinline int get_object_id() -{ -#ifdef __OBJECT_MOTION__ - /* Always get the instance ID from the TLAS - * There might be a motion transform node between TLAS and BLAS which does not have one. */ - return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); -#else - return optixGetInstanceId(); -#endif -} - extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_shadow(nullptr, path_index); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_subsurface(nullptr, path_index); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack() { const int global_index = optixGetLaunchIndex().x; - const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] : - global_index; + const int path_index = (kernel_params.path_index_array) ? + kernel_params.path_index_array[global_index] : + global_index; integrator_intersect_volume_stack(nullptr, path_index); } -extern "C" __global__ void __miss__kernel_optix_miss() -{ - /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */ - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); - optixSetPayload_5(PRIMITIVE_NONE); -} - -extern "C" __global__ void __anyhit__kernel_optix_local_hit() -{ -#if defined(__HAIR__) || defined(__POINTCLOUD__) - if (!optixIsTriangleHit()) { - /* Ignore curves and points. */ - return optixIgnoreIntersection(); - } -#endif - -#ifdef __BVH_LOCAL__ - const int object = get_object_id(); - if (object != optixGetPayload_4() /* local_object */) { - /* Only intersect with matching object. */ - return optixIgnoreIntersection(); - } - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self_local(ray->self, prim)) { - return optixIgnoreIntersection(); - } - - const uint max_hits = optixGetPayload_5(); - if (max_hits == 0) { - /* Special case for when no hit information is requested, just report that something was hit */ - optixSetPayload_5(true); - return optixTerminateRay(); - } - - int hit = 0; - uint *const lcg_state = get_payload_ptr_0<uint>(); - LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); - - if (lcg_state) { - for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if (optixGetRayTmax() == local_isect->hits[i].t) { - return optixIgnoreIntersection(); - } - } - - hit = local_isect->num_hits++; - - if (local_isect->num_hits > max_hits) { - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - if (hit >= max_hits) { - return optixIgnoreIntersection(); - } - } - } - else { - if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { - /* Record closest intersection only. - * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit. - */ - return optixIgnoreIntersection(); - } - - local_isect->num_hits = 1; - } - - Intersection *isect = &local_isect->hits[hit]; - isect->t = optixGetRayTmax(); - isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_data_fetch(objects, isect->object).primitive_type; - - const float2 barycentrics = optixGetTriangleBarycentrics(); - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - - /* Record geometric normal. */ - const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w; - const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0); - const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1); - const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); - local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - - /* Continue tracing (without this the trace call would return after the first hit). */ - optixIgnoreIntersection(); -#endif -} - -extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() -{ -#ifdef __SHADOW_RECORD_ALL__ - int prim = optixGetPrimitiveIndex(); - const uint object = get_object_id(); -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -# endif - - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self_shadow(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - - float u = 0.0f, v = 0.0f; - int type = 0; - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - u = 1.0f - barycentrics.y - barycentrics.x; - v = barycentrics.x; - type = kernel_data_fetch(objects, object).primitive_type; - } -# ifdef __HAIR__ - else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { - u = __uint_as_float(optixGetAttribute_0()); - v = __uint_as_float(optixGetAttribute_1()); - - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - type = segment.type; - prim = segment.prim; - -# if OPTIX_ABI_VERSION < 55 - /* Filter out curve endcaps. */ - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } -# endif - } -# endif - else { - type = kernel_data_fetch(objects, object).primitive_type; - u = 0.0f; - v = 0.0f; - } - -# ifndef __TRANSPARENT_SHADOWS__ - /* No transparent shadows support compiled in, make opaque. */ - optixSetPayload_5(true); - return optixTerminateRay(); -# else - const uint max_hits = optixGetPayload_3(); - const uint num_hits_packed = optixGetPayload_2(); - const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed); - const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed); - - /* If no transparent shadows, all light is blocked and we can stop immediately. */ - if (num_hits >= max_hits || - !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { - optixSetPayload_5(true); - return optixTerminateRay(); - } - - /* Always use baked shadow transparency for curves. */ - if (type & PRIMITIVE_CURVE) { - float throughput = __uint_as_float(optixGetPayload_1()); - throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u); - optixSetPayload_1(__float_as_uint(throughput)); - optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); - - if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { - optixSetPayload_5(true); - return optixTerminateRay(); - } - else { - /* Continue tracing. */ - optixIgnoreIntersection(); - return; - } - } - - /* Record transparent intersection. */ - optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1)); - - uint record_index = num_recorded_hits; - - const IntegratorShadowState state = optixGetPayload_0(); - - const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); - if (record_index >= max_record_hits) { - /* If maximum number of hits reached, find a hit to replace. */ - float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); - uint max_recorded_hit = 0; - - for (int i = 1; i < max_record_hits; i++) { - const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); - if (isect_t > max_recorded_t) { - max_recorded_t = isect_t; - max_recorded_hit = i; - } - } - - if (optixGetRayTmax() >= max_recorded_t) { - /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the - * current hit anymore. */ - return; - } - - record_index = max_recorded_hit; - } - - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax(); - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; - - /* Continue tracing. */ - optixIgnoreIntersection(); -# endif /* __TRANSPARENT_SHADOWS__ */ -#endif /* __SHADOW_RECORD_ALL__ */ -} - -extern "C" __global__ void __anyhit__kernel_optix_volume_test() -{ -#if defined(__HAIR__) || defined(__POINTCLOUD__) - if (!optixIsTriangleHit()) { - /* Ignore curves. */ - return optixIgnoreIntersection(); - } -#endif - - const uint object = get_object_id(); -#ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -#endif - - if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { - return optixIgnoreIntersection(); - } - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - if (intersection_skip_self(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } -} - -extern "C" __global__ void __anyhit__kernel_optix_visibility_test() -{ -#ifdef __HAIR__ -# if OPTIX_ABI_VERSION < 55 - if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) { - /* Filter out curve endcaps. */ - const float u = __uint_as_float(optixGetAttribute_0()); - if (u == 0.0f || u == 1.0f) { - return optixIgnoreIntersection(); - } - } -# endif -#endif - - const uint object = get_object_id(); - const uint visibility = optixGetPayload_4(); -#ifdef __VISIBILITY_FLAG__ - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return optixIgnoreIntersection(); - } -#endif - - const int prim = optixGetPrimitiveIndex(); - ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - - if (visibility & PATH_RAY_SHADOW_OPAQUE) { - if (intersection_skip_self_shadow(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - else { - /* Shadow ray early termination. */ - return optixTerminateRay(); - } - } - else { - if (intersection_skip_self(ray->self, object, prim)) { - return optixIgnoreIntersection(); - } - } -} - -extern "C" __global__ void __closesthit__kernel_optix_hit() -{ - const int object = get_object_id(); - const int prim = optixGetPrimitiveIndex(); - - optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ - optixSetPayload_4(object); - - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); - optixSetPayload_2(__float_as_uint(barycentrics.x)); - optixSetPayload_3(prim); - optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); - } - else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) { - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim); - optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ - optixSetPayload_2(optixGetAttribute_1()); - optixSetPayload_3(segment.prim); - optixSetPayload_5(segment.type); - } - else { - optixSetPayload_1(0); - optixSetPayload_2(0); - optixSetPayload_3(prim); - optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type); - } -} - -#ifdef __HAIR__ -ccl_device_inline void optix_intersection_curve(const int prim, const int type) -{ - const int object = get_object_id(); - -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = optixGetObjectRayOrigin(); - float3 dir = optixGetObjectRayDirection(); - - /* The direction is not normalized by default, but the curve intersection routine expects that */ - float len; - dir = normalize_len(dir, &len); - -# ifdef __OBJECT_MOTION__ - const float time = optixGetRayTime(); -# else - const float time = 0.0f; -# endif - - Intersection isect; - isect.t = optixGetRayTmax(); - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) - isect.t *= len; - - if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); - optixReportIntersection(isect.t / len, - type & PRIMITIVE_ALL, - __float_as_int(isect.u), /* Attribute_0 */ - __float_as_int(isect.v)); /* Attribute_1 */ - } -} - -extern "C" __global__ void __intersection__curve_ribbon() -{ - const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex()); - const int prim = segment.prim; - const int type = segment.type; - if (type & PRIMITIVE_CURVE_RIBBON) { - optix_intersection_curve(prim, type); - } -} - -#endif - -#ifdef __POINTCLOUD__ -extern "C" __global__ void __intersection__point() -{ - const int prim = optixGetPrimitiveIndex(); - const int object = get_object_id(); - const int type = kernel_data_fetch(objects, object).primitive_type; - -# ifdef __VISIBILITY_FLAG__ - const uint visibility = optixGetPayload_4(); - if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) { - return; - } -# endif - - float3 P = optixGetObjectRayOrigin(); - float3 dir = optixGetObjectRayDirection(); - - /* The direction is not normalized by default, the point intersection routine expects that. */ - float len; - dir = normalize_len(dir, &len); - -# ifdef __OBJECT_MOTION__ - const float time = optixGetRayTime(); -# else - const float time = 0.0f; -# endif - - Intersection isect; - isect.t = optixGetRayTmax(); - /* Transform maximum distance into object space. */ - if (isect.t != FLT_MAX) { - isect.t *= len; - } - - if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { - static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use"); - optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL); - } -} -#endif diff --git a/intern/cycles/kernel/geom/curve_intersect.h b/intern/cycles/kernel/geom/curve_intersect.h index 001bec01749..97644aacaa8 100644 --- a/intern/cycles/kernel/geom/curve_intersect.h +++ b/intern/cycles/kernel/geom/curve_intersect.h @@ -72,7 +72,7 @@ ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const floa ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, const float3 cylinder_end, const float cylinder_radius, - const float3 ray_dir, + const float3 ray_D, ccl_private float2 *t_o, ccl_private float *u0_o, ccl_private float3 *Ng0_o, @@ -82,7 +82,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculate quadratic equation to solve. */ const float rl = 1.0f / len(cylinder_end - cylinder_start); const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl; - const float3 O = -P0, dO = ray_dir; + const float3 O = -P0, dO = ray_D; const float dOdO = dot(dO, dO); const float OdO = dot(dO, O); @@ -123,7 +123,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculates u and Ng for near hit. */ { *u0_o = (t0 * dOz + Oz) * rl; - const float3 Pr = t0 * ray_dir; + const float3 Pr = t0 * ray_D; const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start; *Ng0_o = Pr - Pl; } @@ -131,7 +131,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, /* Calculates u and Ng for far hit. */ { *u1_o = (t1 * dOz + Oz) * rl; - const float3 Pr = t1 * ray_dir; + const float3 Pr = t1 * ray_D; const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start; *Ng1_o = Pr - Pl; } @@ -141,10 +141,10 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start, return true; } -ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir) +ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_D) { const float3 O = -P; - const float3 D = ray_dir; + const float3 D = ray_D; const float ON = dot(O, N); const float DN = dot(D, N); const float min_rcp_input = 1e-18f; @@ -155,8 +155,9 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co return make_float2(lower, upper); } -ccl_device bool curve_intersect_iterative(const float3 ray_dir, - ccl_private float *ray_tfar, +ccl_device bool curve_intersect_iterative(const float3 ray_D, + const float ray_tmin, + ccl_private float *ray_tmax, const float dt, const float4 curve[4], float u, @@ -164,7 +165,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const bool use_backfacing, ccl_private Intersection *isect) { - const float length_ray_dir = len(ray_dir); + const float length_ray_D = len(ray_D); /* Error of curve evaluations is proportional to largest coordinate. */ const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3])); @@ -175,9 +176,9 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const float radius_max = box_max.w; for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) { - const float3 Q = ray_dir * t; - const float3 dQdt = ray_dir; - const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t; + const float3 Q = ray_D * t; + const float3 dQdt = ray_D; + const float Q_err = 16.0f * FLT_EPSILON * length_ray_D * t; const float4 P4 = catmull_rom_basis_eval(curve, u); const float4 dPdu4 = catmull_rom_basis_derivative(curve, u); @@ -220,7 +221,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, if (fabsf(f) < f_err && fabsf(g) < g_err) { t += dt; - if (!(0.0f <= t && t <= *ray_tfar)) { + if (!(t >= ray_tmin && t <= *ray_tmax)) { return false; /* Rejects NaNs */ } if (!(u >= 0.0f && u <= 1.0f)) { @@ -232,12 +233,12 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, const float3 U = dradiusdu * R + dPdu; const float3 V = cross(dPdu, R); const float3 Ng = cross(V, U); - if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) { + if (!use_backfacing && dot(ray_D, Ng) > 0.0f) { return false; } /* Record intersection. */ - *ray_tfar = t; + *ray_tmax = t; isect->t = t; isect->u = u; isect->v = 0.0f; @@ -248,16 +249,17 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir, return false; } -ccl_device bool curve_intersect_recursive(const float3 ray_orig, - const float3 ray_dir, - float ray_tfar, +ccl_device bool curve_intersect_recursive(const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + float ray_tmax, float4 curve[4], ccl_private Intersection *isect) { /* Move ray closer to make intersection stable. */ const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3])); - const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir); - const float3 ref = ray_orig + ray_dir * dt; + const float dt = dot(center - ray_P, ray_D) / dot(ray_D, ray_D); + const float3 ref = ray_P + ray_D * dt; const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f); curve[0] -= ref4; curve[1] -= ref4; @@ -320,7 +322,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, valid = cylinder_intersect(float4_to_float3(P0), float4_to_float3(P3), r_outer, - ray_dir, + ray_D, &tc_outer, &u_outer0, &Ng_outer0, @@ -331,13 +333,12 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, } /* Intersect with cap-planes. */ - float2 tp = make_float2(-dt, ray_tfar - dt); + float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt); tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y)); - const float2 h0 = half_plane_intersect( - float4_to_float3(P0), float4_to_float3(dP0du), ray_dir); + const float2 h0 = half_plane_intersect(float4_to_float3(P0), float4_to_float3(dP0du), ray_D); tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y)); const float2 h1 = half_plane_intersect( - float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir); + float4_to_float3(P3), -float4_to_float3(dP3du), ray_D); tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y)); valid = tp.x <= tp.y; if (!valid) { @@ -357,7 +358,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, const bool valid_inner = cylinder_intersect(float4_to_float3(P0), float4_to_float3(P3), r_inner, - ray_dir, + ray_D, &tc_inner, &u_inner0, &Ng_inner0, @@ -367,9 +368,9 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, /* At the unstable area we subdivide deeper. */ # if 0 const bool unstable0 = (!valid_inner) | - (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f); + (fabsf(dot(normalize(ray_D), normalize(Ng_inner0))) < 0.3f); const bool unstable1 = (!valid_inner) | - (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f); + (fabsf(dot(normalize(ray_D), normalize(Ng_inner1))) < 0.3f); # else /* On the GPU appears to be a little faster if always enabled. */ (void)valid_inner; @@ -394,19 +395,20 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig, CURVE_NUM_BEZIER_SUBDIVISIONS; if (depth >= termDepth) { found |= curve_intersect_iterative( - ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect); + ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect); } else { recurse = true; } } - if (valid1 && (tp1.x + dt <= ray_tfar)) { + const float t1 = tp1.x + dt; + if (valid1 && (t1 >= ray_tmin && t1 <= ray_tmax)) { const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE : CURVE_NUM_BEZIER_SUBDIVISIONS; if (depth >= termDepth) { found |= curve_intersect_iterative( - ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect); + ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect); } else { recurse = true; @@ -456,7 +458,8 @@ ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, c * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two * triangles gets intersected. */ -ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, +ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin, + const float ray_tmax, const float3 quad_v0, const float3 quad_v1, const float3 quad_v2, @@ -497,7 +500,7 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, /* Perform depth test? */ const float t = rcpDen * dot(v0, Ng); - if (!(0.0f <= t && t <= ray_tfar)) { + if (!(t >= ray_tmin && t <= ray_tmax)) { return false; } @@ -515,13 +518,16 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar, return true; } -ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3]) +ccl_device_inline void ribbon_ray_space(const float3 ray_D, + const float ray_D_invlen, + float3 ray_space[3]) { - const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y); - const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x); + const float3 D = ray_D * ray_D_invlen; + const float3 dx0 = make_float3(0, D.z, -D.y); + const float3 dx1 = make_float3(-D.z, 0, D.x); ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1); - ray_space[1] = normalize(cross(ray_dir, ray_space[0])); - ray_space[2] = ray_dir; + ray_space[1] = normalize(cross(D, ray_space[0])); + ray_space[2] = D * ray_D_invlen; } ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3], @@ -533,15 +539,17 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3], } ccl_device_inline bool ribbon_intersect(const float3 ray_org, - const float3 ray_dir, - float ray_tfar, + const float3 ray_D, + const float ray_tmin, + float ray_tmax, const int N, float4 curve[4], ccl_private Intersection *isect) { /* Transform control points into ray space. */ + const float ray_D_invlen = 1.0f / len(ray_D); float3 ray_space[3]; - ribbon_ray_space(ray_dir, ray_space); + ribbon_ray_space(ray_D, ray_D_invlen, ray_space); curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]); curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]); @@ -582,21 +590,21 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, /* Intersect quad. */ float vu, vv, vt; - bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt); + bool valid0 = ribbon_intersect_quad(ray_tmin, ray_tmax, lp0, lp1, up1, up0, &vu, &vv, &vt); if (valid0) { /* ignore self intersections */ const float avoidance_factor = 2.0f; if (avoidance_factor != 0.0f) { float r = mix(p0.w, p1.w, vu); - valid0 = vt > avoidance_factor * r; + valid0 = vt > avoidance_factor * r * ray_D_invlen; } if (valid0) { vv = 2.0f * vv - 1.0f; /* Record intersection. */ - ray_tfar = vt; + ray_tmax = vt; isect->t = vt; isect->u = u + vu * step_size; isect->v = vv; @@ -614,8 +622,9 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org, ccl_device_forceinline bool curve_intersect(KernelGlobals kg, ccl_private Intersection *isect, - const float3 P, - const float3 dir, + const float3 ray_P, + const float3 ray_D, + const float tmin, const float tmax, int object, int prim, @@ -645,7 +654,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, if (type & PRIMITIVE_CURVE_RIBBON) { /* todo: adaptive number of subdivisions could help performance here. */ const int subdivisions = kernel_data.bvh.curve_subdivisions; - if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) { + if (ribbon_intersect(ray_P, ray_D, tmin, tmax, subdivisions, curve, isect)) { isect->prim = prim; isect->object = object; isect->type = type; @@ -655,7 +664,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg, return false; } else { - if (curve_intersect_recursive(P, dir, tmax, curve, isect)) { + if (curve_intersect_recursive(ray_P, ray_D, tmin, tmax, curve, isect)) { isect->prim = prim; isect->object = object; isect->type = type; diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h index 6eea5096567..b59c5c43c20 100644 --- a/intern/cycles/kernel/geom/motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h @@ -46,6 +46,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, + float tmin, float tmax, float time, uint visibility, @@ -58,7 +59,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg, motion_triangle_vertices(kg, object, prim, time, verts); /* Ray-triangle intersection, unoptimized. */ float t, u, v; - if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { + if (ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { #ifdef __VISIBILITY_FLAG__ /* Visibility flag test. we do it here under the assumption * that most triangles are culled by node flags. @@ -92,6 +93,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg, int object, int prim, int prim_addr, + float tmin, float tmax, ccl_private uint *lcg_state, int max_hits) @@ -101,7 +103,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg, motion_triangle_vertices(kg, object, prim, time, verts); /* Ray-triangle intersection, unoptimized. */ float t, u, v; - if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { + if (!ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) { return false; } diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h index b15f6b5dda5..badfd311985 100644 --- a/intern/cycles/kernel/geom/object.h +++ b/intern/cycles/kernel/geom/object.h @@ -86,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg, Transform tfm = object_fetch_transform_motion(kg, object, time); if (itfm) - *itfm = transform_quick_inverse(tfm); + *itfm = transform_inverse(tfm); return tfm; } @@ -488,59 +488,30 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline float bvh_instance_push(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir) +ccl_device_inline void bvh_instance_push(KernelGlobals kg, + int object, + ccl_private const Ray *ray, + ccl_private float3 *P, + ccl_private float3 *dir, + ccl_private float3 *idir) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); *P = transform_point(&tfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D)); *idir = bvh_inverse_direction(*dir); - - return len; } /* Transform ray to exit static object in BVH. */ -ccl_device_inline float bvh_instance_pop(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - float t) +ccl_device_inline void bvh_instance_pop(KernelGlobals kg, + int object, + ccl_private const Ray *ray, + ccl_private float3 *P, + ccl_private float3 *dir, + ccl_private float3 *idir) { - if (t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - t /= len(transform_direction(&tfm, ray->D)); - } - - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); - - return t; -} - -/* Same as above, but returns scale factor to apply to multiple intersection distances */ - -ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private float *t_fac) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); - *P = ray->P; *dir = bvh_clamp_direction(ray->D); *idir = bvh_inverse_direction(*dir); @@ -549,59 +520,31 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg, #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private Transform *itfm) -{ - object_fetch_transform_motion_test(kg, object, ray->time, itfm); - - *P = transform_point(itfm, ray->P); - - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); - - return len; -} - -/* Transform ray to exit motion blurred object in BVH. */ - -ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg, +ccl_device_inline void bvh_instance_motion_push(KernelGlobals kg, int object, ccl_private const Ray *ray, ccl_private float3 *P, ccl_private float3 *dir, - ccl_private float3 *idir, - float t, - ccl_private Transform *itfm) + ccl_private float3 *idir) { - if (t != FLT_MAX) { - t /= len(transform_direction(itfm, ray->D)); - } + Transform tfm; + object_fetch_transform_motion_test(kg, object, ray->time, &tfm); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = transform_point(&tfm, ray->P); - return t; + *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D)); + *idir = bvh_inverse_direction(*dir); } -/* Same as above, but returns scale factor to apply to multiple intersection distances */ +/* Transform ray to exit motion blurred object in BVH. */ -ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg, - int object, - ccl_private const Ray *ray, - ccl_private float3 *P, - ccl_private float3 *dir, - ccl_private float3 *idir, - ccl_private float *t_fac, - ccl_private Transform *itfm) +ccl_device_inline void bvh_instance_motion_pop(KernelGlobals kg, + int object, + ccl_private const Ray *ray, + ccl_private float3 *P, + ccl_private float3 *dir, + ccl_private float3 *idir) { - *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); *P = ray->P; *dir = bvh_clamp_direction(ray->D); *idir = bvh_inverse_direction(*dir); diff --git a/intern/cycles/kernel/geom/point_intersect.h b/intern/cycles/kernel/geom/point_intersect.h index dfd9d9a015b..15fb814c58d 100644 --- a/intern/cycles/kernel/geom/point_intersect.h +++ b/intern/cycles/kernel/geom/point_intersect.h @@ -9,17 +9,21 @@ CCL_NAMESPACE_BEGIN #ifdef __POINTCLOUD__ -ccl_device_forceinline bool point_intersect_test( - const float4 point, const float3 P, const float3 dir, const float tmax, ccl_private float *t) +ccl_device_forceinline bool point_intersect_test(const float4 point, + const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + const float ray_tmax, + ccl_private float *t) { const float3 center = float4_to_float3(point); const float radius = point.w; - const float rd2 = 1.0f / dot(dir, dir); + const float rd2 = 1.0f / dot(ray_D, ray_D); - const float3 c0 = center - P; - const float projC0 = dot(c0, dir) * rd2; - const float3 perp = c0 - projC0 * dir; + const float3 c0 = center - ray_P; + const float projC0 = dot(c0, ray_D) * rd2; + const float3 perp = c0 - projC0 * ray_D; const float l2 = dot(perp, perp); const float r2 = radius * radius; if (!(l2 <= r2)) { @@ -28,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test( const float td = sqrt((r2 - l2) * rd2); const float t_front = projC0 - td; - const bool valid_front = (0.0f <= t_front) & (t_front <= tmax); + const bool valid_front = (ray_tmin <= t_front) & (t_front <= ray_tmax); /* Always back-face culling for now. */ # if 0 const float t_back = projC0 + td; - const bool valid_back = (0.0f <= t_back) & (t_back <= tmax); + const bool valid_back = (ray_tmin <= t_back) & (t_back <= ray_tmax); /* check if there is a first hit */ const bool valid_first = valid_front | valid_back; @@ -54,9 +58,10 @@ ccl_device_forceinline bool point_intersect_test( ccl_device_forceinline bool point_intersect(KernelGlobals kg, ccl_private Intersection *isect, - const float3 P, - const float3 dir, - const float tmax, + const float3 ray_P, + const float3 ray_D, + const float ray_tmin, + const float ray_tmax, const int object, const int prim, const float time, @@ -65,7 +70,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg, const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) : kernel_data_fetch(points, prim); - if (!point_intersect_test(point, P, dir, tmax, &isect->t)) { + if (!point_intersect_test(point, ray_P, ray_D, ray_tmin, ray_tmax, &isect->t)) { return false; } diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h index e5dbeac5e66..5af89b45f20 100644 --- a/intern/cycles/kernel/geom/shader_data.h +++ b/intern/cycles/kernel/geom/shader_data.h @@ -18,7 +18,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg, { if (sd->object_flag & SD_OBJECT_MOTION) { sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time); - sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion); + sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion); } } #endif @@ -407,7 +407,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg, { /* vectors */ - sd->P = ray->P; + sd->P = ray->P + ray->D * ray->tmin; sd->N = -ray->D; sd->Ng = -ray->D; sd->I = -ray->D; @@ -441,7 +441,6 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg, /* for NDC coordinates */ sd->ray_P = ray->P; - sd->ray_dP = ray->dP; } #endif /* __VOLUME__ */ diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h index 0c76de9ccc7..f968e537cfa 100644 --- a/intern/cycles/kernel/geom/triangle_intersect.h +++ b/intern/cycles/kernel/geom/triangle_intersect.h @@ -17,6 +17,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg, ccl_private Intersection *isect, float3 P, float3 dir, + float tmin, float tmax, uint visibility, int object, @@ -28,7 +29,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg, tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); float t, u, v; - if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { + if (ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { #ifdef __VISIBILITY_FLAG__ /* Visibility flag test. we do it here under the assumption * that most triangles are culled by node flags. @@ -62,6 +63,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, int object, int prim, int prim_addr, + float tmin, float tmax, ccl_private uint *lcg_state, int max_hits) @@ -71,7 +73,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); float t, u, v; - if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { + if (!ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) { return false; } diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index c63684d58e6..bf3f41b52b9 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -174,14 +174,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = zero_float3(); ray.D = normalize(P); - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); integrator_state_write_ray(kg, state, &ray); /* Setup next kernel to execute. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } else { /* Surface baking. */ @@ -210,7 +211,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = P + N; ray.D = -N; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; /* Setup differentials. */ @@ -247,13 +249,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); } } diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index 9fe27cdda9a..e89ab3991c7 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -86,7 +86,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Generate camera ray. */ Ray ray; integrate_camera_sample(kg, sample, x, y, rng_hash, &ray); - if (ray.t == 0.0f) { + if (ray.tmax == 0.0f) { return true; } @@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Continue with intersect_closest kernel, optionally initializing volume * stack before that if the camera may be inside a volume. */ if (kernel_data.cam.is_inside_volume) { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); } else { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } return true; diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 621aa05f46b..60299f2cb2f 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher( /* If using background pass, schedule background shading kernel so that we have a background * to alpha-over on. The background kernel will then continue the path afterwards. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -128,18 +128,19 @@ ccl_device_forceinline void integrator_split_shadow_catcher( const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after updating volume stack for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume( KernelGlobals kg, IntegratorState state) { @@ -156,20 +157,21 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after executing background shader for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background( KernelGlobals kg, IntegratorState state) { @@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche * * Note that current_kernel is a template value since making this a variable * leads to poor performance with CUDA atomics. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel( KernelGlobals kg, IntegratorState state, @@ -206,10 +209,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0; if (!integrator_intersect_terminate(kg, state, flags)) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } return; } @@ -218,7 +221,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (hit) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); } else { /* Hit a surface, continue with surface kernel unless terminated. */ @@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel( (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #endif } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } } @@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( * * The logic here matches integrator_intersect_next_kernel, except that * volume shading and termination testing have already been done. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( KernelGlobals kg, IntegratorState state, @@ -273,7 +276,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( if (isect->prim != PRIM_NONE) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { @@ -286,16 +289,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } } @@ -321,7 +324,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; integrator_state_read_ray(kg, state, &ray); - kernel_assert(ray.t != 0.0f); + kernel_assert(ray.tmax != 0.0f); const uint visibility = path_state_ray_visibility(state); const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim); @@ -329,12 +332,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Trick to use short AO rays to approximate indirect light at the end of the path. */ if (path_state_ao_bounce(kg, state)) { - ray.t = kernel_data.integrator.ao_bounces_distance; + ray.tmax = kernel_data.integrator.ao_bounces_distance; if (last_isect_object != OBJECT_NONE) { const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance; if (object_ao_distance != 0.0f) { - ray.t = object_ao_distance; + ray.tmax = object_ao_distance; } } } diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 3e746998225..25ff3d5b23f 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k } #ifdef __TRANSPARENT_SHADOWS__ -# if defined(__KERNEL_CPU__) +# ifndef __KERNEL_GPU__ ccl_device int shadow_intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt if (opaque_hit) { /* Hit an opaque surface, shadow path ends here. */ - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { @@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt * * TODO: could also write to render buffer directly if no transparent shadows? * Could save a kernel execution for the common case. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h index 0a2c4ad680d..f439d6905a0 100644 --- a/intern/cycles/kernel/integrator/intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/intersect_subsurface.h @@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 49ef01dc870..b53bee11312 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -24,7 +24,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, Ray volume_ray ccl_optional_struct_init; volume_ray.P = from_P; - volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t); + volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax); + volume_ray.tmin = 0.0f; volume_ray.self.object = INTEGRATOR_STATE(state, isect, object); volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim); volume_ray.self.light_object = OBJECT_NONE; @@ -37,8 +38,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { Intersection *isect = hits; @@ -58,12 +58,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, volume_stack_enter_exit(kg, state, stack_sd); /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; - if (volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t); - } ++step; } #endif @@ -82,7 +79,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s /* Trace ray in random direction. Any direction works, Z up is a guess to get the * fewest hits. */ volume_ray.D = make_float3(0.0f, 0.0f, 1.0f); - volume_ray.t = FLT_MAX; + volume_ray.tmin = 0.0f; + volume_ray.tmax = FLT_MAX; volume_ray.self.object = OBJECT_NONE; volume_ray.self.prim = PRIM_NONE; volume_ray.self.light_object = OBJECT_NONE; @@ -109,8 +107,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { int enclosed_volumes[MAX_VOLUME_STACK_SIZE]; Intersection *isect = hits; @@ -199,7 +196,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s } /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; ++step; @@ -222,7 +219,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt } else { /* Volume stack init for camera rays, continue with intersection of camera ray. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h index 67505b9b612..7a6f866b1a0 100644 --- a/intern/cycles/kernel/integrator/mnee.h +++ b/intern/cycles/kernel/integrator/mnee.h @@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, } } else if (ls->type == LIGHT_AREA) { + float invarea = fabsf(klight->area.invarea); ls->D = normalize_len(ls->P - P, &ls->t); - ls->pdf = fabsf(klight->area.invarea); + ls->pdf = invarea; + if (klight->area.tan_spread > 0.f) { + ls->eval_fac = 0.25f * invarea; + ls->eval_fac *= light_spread_attenuation( + ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread); + } } ls->pdf *= kernel_data.integrator.pdf_lights; @@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.light_prim = PRIM_NONE; projection_ray.dP = differential_make_compact(sd->dP); projection_ray.dD = differential_zero_compact(); + projection_ray.tmin = 0.0f; projection_ray.time = sd->time; Intersection projection_isect; @@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.prim = pv.prim; projection_ray.P = pv.p; } - projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t); - projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; + projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax); + projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; bool projection_success = false; for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) { @@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.object = projection_isect.object; projection_ray.self.prim = projection_isect.prim; - projection_ray.P += projection_isect.t * projection_ray.D; - projection_ray.t -= projection_isect.t; + projection_ray.tmin = intersection_t_offset(projection_isect.t); } if (!projection_success) { reduce_stepsize = true; @@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, Ray probe_ray; probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; + probe_ray.tmin = 0.0f; probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); probe_ray.time = sd->time; @@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, ccl_private const ManifoldVertex &v = vertices[vi]; /* Check visibility. */ - probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t); + probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax); if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) { int hit_object = (probe_isect.object == OBJECT_NONE) ? kernel_data_fetch(prim_object, probe_isect.prim) : probe_isect.object; /* Test whether the ray hit the appropriate object at its intended location. */ - if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE) + if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE) return false; } probe_ray.self.object = v.object; @@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; probe_ray.P = sd->P; + probe_ray.tmin = 0.0f; if (ls->t == FLT_MAX) { /* Distant / env light. */ probe_ray.D = ls->D; - probe_ray.t = ls->t; + probe_ray.tmax = ls->t; } else { /* Other lights, avoid self-intersection. */ probe_ray.D = ls->P - probe_ray.P; - probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t); + probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax); } probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); @@ -1042,9 +1050,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.object = probe_isect.object; probe_ray.self.prim = probe_isect.prim; - probe_ray.P += probe_isect.t * probe_ray.D; - if (ls->t != FLT_MAX) - probe_ray.t -= probe_isect.t; + probe_ray.tmin = intersection_t_offset(probe_isect.t); }; /* Mark the manifold walk invalid to keep mollification on by default. */ diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index 1a085506a70..b09bc117d78 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state) { INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0; #endif @@ -52,7 +52,6 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND; INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 4791a963ae6..a7edfffd175 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -62,11 +62,10 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, const float3 ray_P = INTEGRATOR_STATE(state, ray, P); const float3 ray_D = INTEGRATOR_STATE(state, ray, D); const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); /* multiple importance sampling, get background light pdf for ray * direction, and compute weight with respect to BSDF pdf */ - const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D); + const float pdf = background_light_pdf(kg, ray_P, ray_D); const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); L *= mis_weight; } @@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg, } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index be926c78439..910e3383f51 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg, const float3 ray_D = INTEGRATOR_STATE(state, ray, D); const float ray_time = INTEGRATOR_STATE(state, ray, time); - /* Advance ray beyond light. */ - /* TODO: can we make this more numerically robust to avoid reintersecting the - * same light in some cases? Ray should not intersect surface anymore as the - * object and prim ids will prevent self intersection. */ - const float3 new_ray_P = ray_P + ray_D * isect.t; - INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P; - INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t; - - /* Set position to where the BSDF was sampled, for correct MIS PDF. */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - ray_P -= ray_D * mis_ray_t; - isect.t += mis_ray_t; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t; + /* Advance ray to new start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t); LightSample ls ccl_optional_struct_init; const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); @@ -99,11 +88,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 2b929b7b62e..4b002a47bee 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; /* Modify ray position and length to match current segment. */ - const float start_t = (hit == 0) ? 0.0f : - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); - const float end_t = (hit < num_recorded_hits) ? - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : - ray.t; - ray.P += start_t * ray.D; - ray.t = end_t - start_t; + ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); + ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : + ray.tmax; shader_setup_from_volume(kg, shadow_sd, &ray); @@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* There are more hits that we could not recorded due to memory usage, * adjust ray to intersect again from the last hit. */ const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t); - const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D); - INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t); } return false; @@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, /* Evaluate transparent shadows. */ const bool opaque = integrate_transparent_shadow(kg, state, num_hits); if (opaque) { - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } #endif if (shadow_intersections_has_remaining(num_hits)) { /* More intersections to find, continue shadow ray. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { kernel_accum_light(kg, state, render_buffer); - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 57b88b806a4..1514b3956ad 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -77,7 +77,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, # endif { const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t); + const float t = sd->ray_length; /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ @@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const bool is_light = light_sample_is_light(&ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -323,16 +323,21 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( return LABEL_NONE; } - /* Setup ray. Note that clipping works through transparent bounces. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ? - INTEGRATOR_STATE(state, ray, t) - sd->ray_length : - FLT_MAX; + if (label & LABEL_TRANSPARENT) { + /* Only need to modify start distance for transparent. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); + } + else { + /* Setup ray with changed origin and direction. */ + INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; + INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); #endif + } /* Update throughput. */ float3 throughput = INTEGRATOR_STATE(state, path, throughput); @@ -349,12 +354,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } /* Update path state */ - if (label & LABEL_TRANSPARENT) { - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; - } - else { + if (!(label & LABEL_TRANSPARENT)) { INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); } @@ -371,17 +372,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState return LABEL_NONE; } - /* Setup ray position, direction stays unchanged. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - - /* Clipping works through transparent. */ - INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length; - -# ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); -# endif - - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; + /* Only modify start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); return LABEL_TRANSMIT | LABEL_TRANSPARENT; } @@ -432,7 +424,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self); ray.D = ao_D; - ray.t = kernel_data.integrator.ao_bounces_distance; + ray.tmin = 0.0f; + ray.tmax = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; ray.self.object = (skip_self) ? sd->object : OBJECT_NONE; ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE; @@ -442,7 +435,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, ray.dD = differential_zero_compact(); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -604,22 +598,23 @@ ccl_device bool integrate_surface(KernelGlobals kg, } template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE, - int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> + DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) { if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) { if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } else { - kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f); - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 6cf80f4ddc5..4aab097a7d8 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -114,7 +114,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_private const RNGState *rng_state, const float object_step_size, - float t, + const float tmin, + const float tmax, ccl_private float *step_size, ccl_private float *step_shade_offset, ccl_private float *steps_offset, @@ -122,7 +123,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, { if (object_step_size == FLT_MAX) { /* Homogeneous volume. */ - *step_size = t; + *step_size = tmax - tmin; *step_shade_offset = 0.0f; *steps_offset = 1.0f; *max_steps = 1; @@ -130,6 +131,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, else { /* Heterogeneous volume. */ *max_steps = kernel_data.integrator.volume_max_steps; + const float t = tmax - tmin; float step = min(object_step_size, t); /* compute exact steps in advance for malloc */ @@ -165,7 +167,7 @@ ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState stat float3 sigma_t = zero_float3(); if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { - *throughput *= volume_color_transmittance(sigma_t, ray->t); + *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin); } } # endif @@ -194,7 +196,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, volume_step_init(kg, &rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &unused, @@ -202,13 +205,13 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, const float steps_offset = 1.0f; /* compute extinction at the start */ - float t = 0.0f; + float t = ray->tmin; float3 sum = zero_float3(); for (int i = 0; i < max_steps; i++) { /* advance to new position */ - float new_t = min(ray->t, (i + steps_offset) * step_size); + float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); float dt = new_t - t; float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset); @@ -233,7 +236,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, /* stop if at the end of the volume */ t = new_t; - if (t == ray->t) { + if (t == ray->tmax) { /* Update throughput in case we haven't done it above */ tp = *throughput * exp(sum); break; @@ -257,15 +260,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r const float xi, ccl_private float *pdf) { - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float delta = dot((light_P - ray->P), ray->D); const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { *pdf = 0.0f; return 0.0f; } - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); if (UNLIKELY(theta_b == theta_a)) { *pdf = 0.0f; @@ -273,7 +277,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r } *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); - return min(t, delta + t_); /* min is only for float precision errors */ + return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */ } ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, @@ -286,11 +290,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -310,11 +315,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -390,8 +396,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients typedef struct VolumeIntegrateState { /* Volume segment extents. */ - float start_t; - float end_t; + float tmin; + float tmax; /* If volume is absorption-only up to this point, and no probabilistic * scattering or termination has been used yet. */ @@ -426,9 +432,9 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* Equiangular sampling for direct lighting. */ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) { - if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t && + if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax && vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { - const float new_dt = result.direct_t - vstate.start_t; + const float new_dt = result.direct_t - vstate.tmin; const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); result.direct_scatter = true; @@ -458,7 +464,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* compute sampling distance */ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel); const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t; - const float new_t = vstate.start_t + new_dt; + const float new_t = vstate.tmin + new_dt; /* transmittance and pdf */ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); @@ -528,7 +534,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( volume_step_init(kg, rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &steps_offset, @@ -536,8 +543,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Initialize volume integration state. */ VolumeIntegrateState vstate ccl_optional_struct_init; - vstate.start_t = 0.0f; - vstate.end_t = 0.0f; + vstate.tmin = ray->tmin; + vstate.tmax = ray->tmin; vstate.absorption_only = true; vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE); vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL); @@ -578,8 +585,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( for (int i = 0; i < max_steps; i++) { /* Advance to new position */ - vstate.end_t = min(ray->t, (i + steps_offset) * step_size); - const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset; + vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); + const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset; sd->P = ray->P + ray->D * shade_t; /* compute segment */ @@ -588,7 +595,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( const int closure_flag = sd->flag; /* Evaluate transmittance over segment. */ - const float dt = (vstate.end_t - vstate.start_t); + const float dt = (vstate.tmax - vstate.tmin); const float3 transmittance = (closure_flag & SD_EXTINCTION) ? volume_color_transmittance(coeff.sigma_t, dt) : one_float3(); @@ -645,8 +652,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( } /* Stop if at the end of the volume. */ - vstate.start_t = vstate.end_t; - if (vstate.start_t == ray->t) { + vstate.tmin = vstate.tmax; + if (vstate.tmin == ray->tmax) { break; } } @@ -774,8 +781,8 @@ ccl_device_forceinline void integrate_volume_direct_light( const bool is_light = light_sample_is_light(ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); @@ -880,7 +887,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( /* Setup ray. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); @@ -901,7 +909,6 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( /* Update path state */ INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); @@ -1021,7 +1028,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, integrator_state_read_isect(kg, state, &isect); /* Set ray length to current segment. */ - ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; + ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; /* Clean volume stack for background rays. */ if (isect.prim == PRIM_NONE) { @@ -1032,13 +1039,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, if (event == VOLUME_PATH_SCATTERED) { /* Queue intersect_closest kernel. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } else if (event == VOLUME_PATH_MISSED) { /* End path. */ - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); return; } else { diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 42d44580f80..ff63625aceb 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state) { - if (INTEGRATOR_PATH_IS_TERMINATED) { + if (integrator_path_is_terminated(state)) { return false; } diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index eaee65ada40..c340467606d 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path) KERNEL_STRUCT_BEGIN(shadow_ray) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING) diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h index d6fef27f344..d1907bd6e16 100644 --- a/intern/cycles/kernel/integrator/state.h +++ b/intern/cycles/kernel/integrator/state.h @@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU { /* Index of main path which will be used by a next shadow catcher split. */ ccl_global int *next_main_path_index; + + /* Divisor used to partition active indices by locality when sorting by material. */ + uint sort_partition_divisor; } IntegratorStateGPU; /* Abstraction @@ -137,7 +140,7 @@ typedef struct IntegratorStateGPU { * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors * from a kernel which operates on a shadow catcher state will cause bad memory access. */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ /* Scalar access on CPU. */ @@ -156,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ ((state)->nested_struct[array_index].member) -#else /* __KERNEL_CPU__ */ +#else /* !__KERNEL_GPU__ */ /* Array access on GPU with Structure-of-Arrays. */ @@ -177,6 +180,6 @@ typedef int ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index fed74d49434..4b03c665e17 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN /* Control Flow * - * Utilities for control flow between kernels. The implementation may differ per device - * or even be handled on the host side. To abstract such differences, experiment with - * different implementations and for debugging, this is abstracted using macros. + * Utilities for control flow between kernels. The implementation is different between CPU and + * GPU devices. For the latter part of the logic is handled on the host side with wavefronts. * * There is a main path for regular path tracing camera for path tracing. Shadows for next * event estimation branch off from this into their own path, that may be computed in - * parallel while the main path continues. + * parallel while the main path continues. Additionally, shading kernels are sorted using + * a key for coherence. * * Each kernel on the main path must call one of these functions. These may not be called * multiple times from the same kernel. * - * INTEGRATOR_PATH_INIT(next_kernel) - * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) - * INTEGRATOR_PATH_TERMINATE(current_kernel) + * integrator_path_init(kg, state, next_kernel) + * integrator_path_next(kg, state, current_kernel, next_kernel) + * integrator_path_terminate(kg, state, current_kernel) * * For the shadow path similar functions are used, and again each shadow kernel must call * one of them, and only once. */ -#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0) -#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \ - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0) +ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state) +{ + return INTEGRATOR_STATE(state, path, queued_kernel) == 0; +} + +ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state) +{ + return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0; +} #ifdef __KERNEL_GPU__ -# define INTEGRATOR_PATH_INIT(next_kernel) \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.next_shadow_path_index[0], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; - -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( + &kernel_integrator_state.next_shadow_path_index[0], 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +} + +/* Sort first by truncated state index (for good locality), then by key (for good coherence). */ +# define INTEGRATOR_SORT_KEY(key, state) \ + (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor)) + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} #else -# define INTEGRATOR_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - } -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - (void)current_kernel; \ - } - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = &state->shadow_type; \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \ - (void)current_kernel; \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; + (void)current_kernel; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; + (void)current_kernel; +} #endif diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index e7e6db037b0..5c2af131945 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -37,11 +37,10 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayMNEE */ KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling - * The PDF of BSDF sampling at the last scatter point, and distance to the - * last scatter point minus the last ray segment. This distance lets us - * compute the complete distance through transparent surfaces and volumes. */ + * The PDF of BSDF sampling at the last scatter point, which is at ray distance + * zero and distance. Note that transparency and volume attenuation increase + * the ray tmin but keep P unmodified so that this works. */ KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING) /* Filter glossy. */ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) /* Continuation probability for path termination. */ @@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path) KERNEL_STRUCT_BEGIN(ray) KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING) diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h index 280db2d1aac..168122d3a78 100644 --- a/intern/cycles/kernel/integrator/state_util.h +++ b/intern/cycles/kernel/integrator/state_util.h @@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg, { INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP; INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD; @@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, ray, P); ray->D = INTEGRATOR_STATE(state, ray, D); - ray->t = INTEGRATOR_STATE(state, ray, t); + ray->tmin = INTEGRATOR_STATE(state, ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, ray, tmax); ray->time = INTEGRATOR_STATE(state, ray, time); ray->dP = INTEGRATOR_STATE(state, ray, dP); ray->dD = INTEGRATOR_STATE(state, ray, dD); @@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray( { INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP; } @@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, shadow_ray, P); ray->D = INTEGRATOR_STATE(state, shadow_ray, D); - ray->t = INTEGRATOR_STATE(state, shadow_ray, t); + ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax); ray->time = INTEGRATOR_STATE(state, shadow_ray, time); ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP); ray->dD = differential_zero_compact(); @@ -334,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl return to_state; } -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int) { return INTEGRATOR_STATE(state, path, bounce); diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 1e6fcf4aff0..2f96f215d8a 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, /* Setup ray into surface. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N; - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact(); @@ -160,7 +161,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat /* Pretend ray is coming from the outside towards the exit point. This ensures * correct front/back facing normals. * TODO: find a more elegant solution? */ - ray.P += ray.D * ray.t * 2.0f; + ray.P += ray.D * ray.tmax * 2.0f; ray.D = -ray.D; integrator_state_write_isect(kg, state, &ss_isect.hits[0]); @@ -177,17 +178,23 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index ae857c50493..60b63c075a0 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -82,7 +82,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, /* Create ray. */ ray.P = P + disk_N * disk_height + disk_P; ray.D = -disk_N; - ray.t = 2.0f * disk_height; + ray.tmin = 0.0f; + ray.tmax = 2.0f * disk_height; ray.dP = ray_dP; ray.dD = differential_zero_compact(); ray.time = time; @@ -125,17 +126,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { /* Transform normal to world space. */ Transform itfm; - Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); + object_fetch_transform_motion_test(kg, object, time, &itfm); hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); - - /* Transform t to world space, except for OptiX and MetalRT where it already is. */ -#ifdef __KERNEL_GPU_RAYTRACING__ - (void)tfm; -#else - float3 D = transform_direction(&itfm, ray.D); - D = normalize(D) * ss_isect.hits[hit].t; - ss_isect.hits[hit].t = len(transform_direction(&tfm, D)); -#endif } /* Quickly retrieve P and Ng without setting up ShaderData. */ @@ -188,7 +180,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ray.P = ray.P + ray.D * ss_isect.hits[hit].t; ray.D = ss_isect.Ng[hit]; - ray.t = 1.0f; + ray.tmin = 0.0f; + ray.tmax = 1.0f; return true; } diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index 8094bf7159e..e43bbb3c50a 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -195,7 +195,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Setup ray. */ ray.P = P; ray.D = D; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = time; ray.dP = ray_dP; ray.dD = differential_zero_compact(); @@ -204,12 +205,6 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; -#ifndef __KERNEL_GPU_RAYTRACING__ - /* Compute or fetch object transforms. */ - Transform ob_itfm ccl_optional_struct_init; - Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); -#endif - /* Convert subsurface to volume coefficients. * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo); @@ -370,10 +365,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * chance of connecting to it. * TODO: Maybe use less than 10 times the mean free path? */ if (bounce == 0) { - ray.t = max(t, 10.0f / (reduce_min(sigma_t))); + ray.tmax = max(t, 10.0f / (reduce_min(sigma_t))); } else { - ray.t = t; + ray.tmax = t; /* After the first bounce the object can intersect the same surface again */ ray.self.object = OBJECT_NONE; ray.self.prim = PRIM_NONE; @@ -382,31 +377,23 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, hit = (ss_isect.num_hits > 0); if (hit) { -#ifdef __KERNEL_GPU_RAYTRACING__ - /* t is always in world space with OptiX and MetalRT. */ - ray.t = ss_isect.hits[0].t; -#else - /* Compute world space distance to surface hit. */ - float3 D = transform_direction(&ob_itfm, ray.D); - D = normalize(D) * ss_isect.hits[0].t; - ray.t = len(transform_direction(&ob_tfm, D)); -#endif + ray.tmax = ss_isect.hits[0].t; } if (bounce == 0) { /* Check if we hit the opposite side. */ if (hit) { have_opposite_interface = true; - opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); + opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N); } /* Apart from the opposite side check, we were supposed to only trace up to distance t, * so check if there would have been a hit in that case. */ - hit = ray.t < t; + hit = ray.tmax < t; } /* Use the distance to the exit point for the throughput update if we found one. */ if (hit) { - t = ray.t; + t = ray.tmax; } /* Advance to new scatter location. */ diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h index 1e7a333d013..b939489bb18 100644 --- a/intern/cycles/kernel/light/light.h +++ b/intern/cycles/kernel/light/light.h @@ -270,31 +270,26 @@ ccl_device bool lights_intersect(KernelGlobals kg, if (type == LIGHT_SPOT) { /* Spot/Disk light. */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - const float3 ray_P = ray->P - ray->D * mis_ray_t; - const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); const float radius = klight->spot.radius; if (radius == 0.0f) { continue; } /* disk oriented normal */ - const float3 lightN = normalize(ray_P - lightP); + const float3 lightN = normalize(ray->P - lightP); /* One sided. */ if (dot(ray->D, lightN) >= 0.0f) { continue; } float3 P; - if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) { + if (!ray_disk_intersect( + ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) { continue; } } else if (type == LIGHT_POINT) { /* Sphere light (aka, aligned disk light). */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - const float3 ray_P = ray->P - ray->D * mis_ray_t; - const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); const float radius = klight->spot.radius; if (radius == 0.0f) { @@ -302,9 +297,10 @@ ccl_device bool lights_intersect(KernelGlobals kg, } /* disk oriented normal */ - const float3 lightN = normalize(ray_P - lightP); + const float3 lightN = normalize(ray->P - lightP); float3 P; - if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) { + if (!ray_disk_intersect( + ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) { continue; } } @@ -330,8 +326,19 @@ ccl_device bool lights_intersect(KernelGlobals kg, const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]); float3 P; - if (!ray_quad_intersect( - ray->P, ray->D, 0.0f, ray->t, light_P, axisu, axisv, Ng, &P, &t, &u, &v, is_round)) { + if (!ray_quad_intersect(ray->P, + ray->D, + ray->tmin, + ray->tmax, + light_P, + axisu, + axisv, + Ng, + &P, + &t, + &u, + &v, + is_round)) { continue; } } @@ -775,7 +782,8 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg, ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B); /* calculate intersection with the planar triangle */ - if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) { + if (!ray_triangle_intersect( + P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) { ls->pdf = 0.0f; return; } diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h index 5cf7dce683a..210bb1b35c2 100644 --- a/intern/cycles/kernel/light/sample.h +++ b/intern/cycles/kernel/light/sample.h @@ -227,23 +227,24 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri if (ls->shader & SHADER_CAST_SHADOW) { /* setup ray */ ray->P = P; + ray->tmin = 0.0f; if (ls->t == FLT_MAX) { /* distant light */ ray->D = ls->D; - ray->t = ls->t; + ray->tmax = ls->t; } else { /* other lights, avoid self-intersection */ ray->D = ls->P - P; - ray->D = normalize_len(ray->D, &ray->t); + ray->D = normalize_len(ray->D, &ray->tmax); } } else { /* signal to not cast shadow ray */ ray->P = zero_float3(); ray->D = zero_float3(); - ray->t = 0.0f; + ray->tmax = 0.0f; } ray->dP = differential_make_compact(sd->dP); diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 6e75ae54f33..6b7981b7f3a 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -1094,10 +1094,8 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); if (derivatives) { - ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) - - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) - - ndc[0]; + ndc[1] = zero_float3(); + ndc[2] = zero_float3(); } } else { @@ -1671,7 +1669,8 @@ bool OSLRenderServices::trace(TraceOpt &options, ray.P = TO_FLOAT3(P); ray.D = TO_FLOAT3(R); - ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; + ray.tmin = 0.0f; + ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; ray.time = sd->time; ray.self.object = OBJECT_NONE; ray.self.prim = PRIM_NONE; @@ -1710,12 +1709,12 @@ bool OSLRenderServices::trace(TraceOpt &options, const KernelGlobalsCPU *kg = sd->osl_globals; - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return false; } - /* Raytrace, leaving out shadow opaque to avoid early exit. */ + /* Ray-trace, leaving out shadow opaque to avoid early exit. */ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; tracedata->hit = scene_intersect(kg, &ray, visibility, &tracedata->isect); return tracedata->hit; diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h index b477855dca3..c57c68d6230 100644 --- a/intern/cycles/kernel/svm/ao.h +++ b/intern/cycles/kernel/svm/ao.h @@ -31,7 +31,7 @@ ccl_device float svm_ao( return 1.0f; } - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return 1.0f; } @@ -59,7 +59,8 @@ ccl_device float svm_ao( Ray ray; ray.P = sd->P; ray.D = D.x * T + D.y * B + D.z * N; - ray.t = max_dist; + ray.tmin = 0.0f; + ray.tmax = max_dist; ray.time = sd->time; ray.self.object = sd->object; ray.self.prim = sd->prim; diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h index f79bcae5cd2..4617a056a52 100644 --- a/intern/cycles/kernel/svm/bevel.h +++ b/intern/cycles/kernel/svm/bevel.h @@ -103,7 +103,7 @@ ccl_device float3 svm_bevel( return sd->N; } - /* Can't raytrace from shaders like displacement, before BVH exists. */ + /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { return sd->N; } @@ -179,7 +179,8 @@ ccl_device float3 svm_bevel( Ray ray ccl_optional_struct_init; ray.P = sd->P + disk_N * disk_height + disk_P; ray.D = -disk_N; - ray.t = 2.0f * disk_height; + ray.tmin = 0.0f; + ray.tmax = 2.0f * disk_height; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); ray.time = sd->time; diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 305bd404d27..99a8fdd3be9 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -395,7 +395,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) # endif { - /* This is to prevent mnee from receiving a null bsdf. */ + /* This is to prevent MNEE from receiving a null BSDF. */ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), base_color * glass_weight * refraction_fresnel); @@ -676,7 +676,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif { - /* This is to prevent mnee from receiving a null bsdf. */ + /* This is to prevent MNEE from receiving a null BSDF. */ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( sd, sizeof(MicrofacetBsdf), weight * refraction_fresnel); diff --git a/intern/cycles/kernel/svm/node_types_template.h b/intern/cycles/kernel/svm/node_types_template.h new file mode 100644 index 00000000000..39d279be4cb --- /dev/null +++ b/intern/cycles/kernel/svm/node_types_template.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifndef SHADER_NODE_TYPE +# define SHADER_NODE_TYPE(name) +#endif + +/* NOTE: for best OpenCL performance, item definition in the enum must + * match the switch case order in `svm.h`. */ + +SHADER_NODE_TYPE(NODE_END) +SHADER_NODE_TYPE(NODE_SHADER_JUMP) +SHADER_NODE_TYPE(NODE_CLOSURE_BSDF) +SHADER_NODE_TYPE(NODE_CLOSURE_EMISSION) +SHADER_NODE_TYPE(NODE_CLOSURE_BACKGROUND) +SHADER_NODE_TYPE(NODE_CLOSURE_SET_WEIGHT) +SHADER_NODE_TYPE(NODE_CLOSURE_WEIGHT) +SHADER_NODE_TYPE(NODE_EMISSION_WEIGHT) +SHADER_NODE_TYPE(NODE_MIX_CLOSURE) +SHADER_NODE_TYPE(NODE_JUMP_IF_ZERO) +SHADER_NODE_TYPE(NODE_JUMP_IF_ONE) +SHADER_NODE_TYPE(NODE_GEOMETRY) +SHADER_NODE_TYPE(NODE_CONVERT) +SHADER_NODE_TYPE(NODE_TEX_COORD) +SHADER_NODE_TYPE(NODE_VALUE_F) +SHADER_NODE_TYPE(NODE_VALUE_V) +SHADER_NODE_TYPE(NODE_ATTR) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR) +SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DX) +SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DY) +SHADER_NODE_TYPE(NODE_SET_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_VECTOR_DISPLACEMENT) +SHADER_NODE_TYPE(NODE_TEX_IMAGE) +SHADER_NODE_TYPE(NODE_TEX_IMAGE_BOX) +SHADER_NODE_TYPE(NODE_TEX_NOISE) +SHADER_NODE_TYPE(NODE_SET_BUMP) +SHADER_NODE_TYPE(NODE_ATTR_BUMP_DX) +SHADER_NODE_TYPE(NODE_ATTR_BUMP_DY) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DX) +SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DY) +SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DX) +SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DY) +SHADER_NODE_TYPE(NODE_CLOSURE_SET_NORMAL) +SHADER_NODE_TYPE(NODE_ENTER_BUMP_EVAL) +SHADER_NODE_TYPE(NODE_LEAVE_BUMP_EVAL) +SHADER_NODE_TYPE(NODE_HSV) +SHADER_NODE_TYPE(NODE_CLOSURE_HOLDOUT) +SHADER_NODE_TYPE(NODE_FRESNEL) +SHADER_NODE_TYPE(NODE_LAYER_WEIGHT) +SHADER_NODE_TYPE(NODE_CLOSURE_VOLUME) +SHADER_NODE_TYPE(NODE_PRINCIPLED_VOLUME) +SHADER_NODE_TYPE(NODE_MATH) +SHADER_NODE_TYPE(NODE_VECTOR_MATH) +SHADER_NODE_TYPE(NODE_RGB_RAMP) +SHADER_NODE_TYPE(NODE_GAMMA) +SHADER_NODE_TYPE(NODE_BRIGHTCONTRAST) +SHADER_NODE_TYPE(NODE_LIGHT_PATH) +SHADER_NODE_TYPE(NODE_OBJECT_INFO) +SHADER_NODE_TYPE(NODE_PARTICLE_INFO) +SHADER_NODE_TYPE(NODE_HAIR_INFO) +SHADER_NODE_TYPE(NODE_POINT_INFO) +SHADER_NODE_TYPE(NODE_TEXTURE_MAPPING) +SHADER_NODE_TYPE(NODE_MAPPING) +SHADER_NODE_TYPE(NODE_MIN_MAX) +SHADER_NODE_TYPE(NODE_CAMERA) +SHADER_NODE_TYPE(NODE_TEX_ENVIRONMENT) +SHADER_NODE_TYPE(NODE_TEX_SKY) +SHADER_NODE_TYPE(NODE_TEX_GRADIENT) +SHADER_NODE_TYPE(NODE_TEX_VORONOI) +SHADER_NODE_TYPE(NODE_TEX_MUSGRAVE) +SHADER_NODE_TYPE(NODE_TEX_WAVE) +SHADER_NODE_TYPE(NODE_TEX_MAGIC) +SHADER_NODE_TYPE(NODE_TEX_CHECKER) +SHADER_NODE_TYPE(NODE_TEX_BRICK) +SHADER_NODE_TYPE(NODE_TEX_WHITE_NOISE) +SHADER_NODE_TYPE(NODE_NORMAL) +SHADER_NODE_TYPE(NODE_LIGHT_FALLOFF) +SHADER_NODE_TYPE(NODE_IES) +SHADER_NODE_TYPE(NODE_CURVES) +SHADER_NODE_TYPE(NODE_TANGENT) +SHADER_NODE_TYPE(NODE_NORMAL_MAP) +SHADER_NODE_TYPE(NODE_INVERT) +SHADER_NODE_TYPE(NODE_MIX) +SHADER_NODE_TYPE(NODE_SEPARATE_COLOR) +SHADER_NODE_TYPE(NODE_COMBINE_COLOR) +SHADER_NODE_TYPE(NODE_SEPARATE_VECTOR) +SHADER_NODE_TYPE(NODE_COMBINE_VECTOR) +SHADER_NODE_TYPE(NODE_SEPARATE_HSV) +SHADER_NODE_TYPE(NODE_COMBINE_HSV) +SHADER_NODE_TYPE(NODE_VECTOR_ROTATE) +SHADER_NODE_TYPE(NODE_VECTOR_TRANSFORM) +SHADER_NODE_TYPE(NODE_WIREFRAME) +SHADER_NODE_TYPE(NODE_WAVELENGTH) +SHADER_NODE_TYPE(NODE_BLACKBODY) +SHADER_NODE_TYPE(NODE_MAP_RANGE) +SHADER_NODE_TYPE(NODE_VECTOR_MAP_RANGE) +SHADER_NODE_TYPE(NODE_CLAMP) +SHADER_NODE_TYPE(NODE_BEVEL) +SHADER_NODE_TYPE(NODE_AMBIENT_OCCLUSION) +SHADER_NODE_TYPE(NODE_TEX_VOXEL) +SHADER_NODE_TYPE(NODE_AOV_START) +SHADER_NODE_TYPE(NODE_AOV_COLOR) +SHADER_NODE_TYPE(NODE_AOV_VALUE) +SHADER_NODE_TYPE(NODE_FLOAT_CURVE) + +/* Padding for struct alignment. */ +SHADER_NODE_TYPE(NODE_PAD1) + +#undef SHADER_NODE_TYPE diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 8fd41ec8531..9d6d3e9222c 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -204,6 +204,15 @@ CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN +#ifdef __KERNEL_USE_DATA_CONSTANTS__ +# define SVM_CASE(node) \ + case node: \ + if (!kernel_data_svm_usage_##node) \ + break; +#else +# define SVM_CASE(node) case node: +#endif + /* Main Interpreter Loop */ template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState> ccl_device void svm_eval_nodes(KernelGlobals kg, @@ -219,9 +228,10 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, uint4 node = read_node(kg, &offset); switch (node.x) { - case NODE_END: - return; - case NODE_SHADER_JUMP: { + SVM_CASE(NODE_END) + return; + SVM_CASE(NODE_SHADER_JUMP) + { if (type == SHADER_TYPE_SURFACE) offset = node.y; else if (type == SHADER_TYPE_VOLUME) @@ -232,351 +242,349 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, return; break; } - case NODE_CLOSURE_BSDF: - offset = svm_node_closure_bsdf<node_feature_mask, type>( - kg, sd, stack, node, path_flag, offset); - break; - case NODE_CLOSURE_EMISSION: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_closure_emission(sd, stack, node); - } - break; - case NODE_CLOSURE_BACKGROUND: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_closure_background(sd, stack, node); - } - break; - case NODE_CLOSURE_SET_WEIGHT: - svm_node_closure_set_weight(sd, node.y, node.z, node.w); - break; - case NODE_CLOSURE_WEIGHT: - svm_node_closure_weight(sd, stack, node.y); - break; - case NODE_EMISSION_WEIGHT: - IF_KERNEL_NODES_FEATURE(EMISSION) - { - svm_node_emission_weight(kg, sd, stack, node); - } - break; - case NODE_MIX_CLOSURE: - svm_node_mix_closure(sd, stack, node); - break; - case NODE_JUMP_IF_ZERO: - if (stack_load_float(stack, node.z) <= 0.0f) - offset += node.y; - break; - case NODE_JUMP_IF_ONE: - if (stack_load_float(stack, node.z) >= 1.0f) - offset += node.y; - break; - case NODE_GEOMETRY: - svm_node_geometry(kg, sd, stack, node.y, node.z); - break; - case NODE_CONVERT: - svm_node_convert(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_TEX_COORD: - offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); - break; - case NODE_VALUE_F: - svm_node_value_f(kg, sd, stack, node.y, node.z); - break; - case NODE_VALUE_V: - offset = svm_node_value_v(kg, sd, stack, node.y, offset); - break; - case NODE_ATTR: - svm_node_attr<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_VERTEX_COLOR: - svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_GEOMETRY_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); - } - break; - case NODE_GEOMETRY_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); - } - break; - case NODE_SET_DISPLACEMENT: - svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y); - break; - case NODE_DISPLACEMENT: - svm_node_displacement<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_VECTOR_DISPLACEMENT: - offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset); - break; - case NODE_TEX_IMAGE: - offset = svm_node_tex_image(kg, sd, stack, node, offset); - break; - case NODE_TEX_IMAGE_BOX: - svm_node_tex_image_box(kg, sd, stack, node); - break; - case NODE_TEX_NOISE: - offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_SET_BUMP: - svm_node_set_bump<node_feature_mask>(kg, sd, stack, node); - break; - case NODE_ATTR_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_attr_bump_dx(kg, sd, stack, node); - } - break; - case NODE_ATTR_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_attr_bump_dy(kg, sd, stack, node); - } - break; - case NODE_VERTEX_COLOR_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); - } - break; - case NODE_VERTEX_COLOR_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); - } - break; - case NODE_TEX_COORD_BUMP_DX: - IF_KERNEL_NODES_FEATURE(BUMP) - { - offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); - } - break; - case NODE_TEX_COORD_BUMP_DY: - IF_KERNEL_NODES_FEATURE(BUMP) - { - offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); - } - break; - case NODE_CLOSURE_SET_NORMAL: - IF_KERNEL_NODES_FEATURE(BUMP) - { - svm_node_set_normal(kg, sd, stack, node.y, node.z); - } - break; - case NODE_ENTER_BUMP_EVAL: - IF_KERNEL_NODES_FEATURE(BUMP_STATE) - { - svm_node_enter_bump_eval(kg, sd, stack, node.y); - } - break; - case NODE_LEAVE_BUMP_EVAL: - IF_KERNEL_NODES_FEATURE(BUMP_STATE) - { - svm_node_leave_bump_eval(kg, sd, stack, node.y); - } - break; - case NODE_HSV: - svm_node_hsv(kg, sd, stack, node); - break; - - case NODE_CLOSURE_HOLDOUT: - svm_node_closure_holdout(sd, stack, node); - break; - case NODE_FRESNEL: - svm_node_fresnel(sd, stack, node.y, node.z, node.w); - break; - case NODE_LAYER_WEIGHT: - svm_node_layer_weight(sd, stack, node); - break; - case NODE_CLOSURE_VOLUME: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - svm_node_closure_volume<type>(kg, sd, stack, node); - } - break; - case NODE_PRINCIPLED_VOLUME: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset); - } - break; - case NODE_MATH: - svm_node_math(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_VECTOR_MATH: - offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_RGB_RAMP: - offset = svm_node_rgb_ramp(kg, sd, stack, node, offset); - break; - case NODE_GAMMA: - svm_node_gamma(sd, stack, node.y, node.z, node.w); - break; - case NODE_BRIGHTCONTRAST: - svm_node_brightness(sd, stack, node.y, node.z, node.w); - break; - case NODE_LIGHT_PATH: - svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag); - break; - case NODE_OBJECT_INFO: - svm_node_object_info(kg, sd, stack, node.y, node.z); - break; - case NODE_PARTICLE_INFO: - svm_node_particle_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_CLOSURE_BSDF) + offset = svm_node_closure_bsdf<node_feature_mask, type>( + kg, sd, stack, node, path_flag, offset); + break; + SVM_CASE(NODE_CLOSURE_EMISSION) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_closure_emission(sd, stack, node); + } + break; + SVM_CASE(NODE_CLOSURE_BACKGROUND) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_closure_background(sd, stack, node); + } + break; + SVM_CASE(NODE_CLOSURE_SET_WEIGHT) + svm_node_closure_set_weight(sd, node.y, node.z, node.w); + break; + SVM_CASE(NODE_CLOSURE_WEIGHT) + svm_node_closure_weight(sd, stack, node.y); + break; + SVM_CASE(NODE_EMISSION_WEIGHT) + IF_KERNEL_NODES_FEATURE(EMISSION) + { + svm_node_emission_weight(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_MIX_CLOSURE) + svm_node_mix_closure(sd, stack, node); + break; + SVM_CASE(NODE_JUMP_IF_ZERO) + if (stack_load_float(stack, node.z) <= 0.0f) + offset += node.y; + break; + SVM_CASE(NODE_JUMP_IF_ONE) + if (stack_load_float(stack, node.z) >= 1.0f) + offset += node.y; + break; + SVM_CASE(NODE_GEOMETRY) + svm_node_geometry(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_CONVERT) + svm_node_convert(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_TEX_COORD) + offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + break; + SVM_CASE(NODE_VALUE_F) + svm_node_value_f(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_VALUE_V) + offset = svm_node_value_v(kg, sd, stack, node.y, offset); + break; + SVM_CASE(NODE_ATTR) + svm_node_attr<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_VERTEX_COLOR) + svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_GEOMETRY_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_GEOMETRY_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_SET_DISPLACEMENT) + svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y); + break; + SVM_CASE(NODE_DISPLACEMENT) + svm_node_displacement<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_VECTOR_DISPLACEMENT) + offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_IMAGE) + offset = svm_node_tex_image(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_IMAGE_BOX) + svm_node_tex_image_box(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_NOISE) + offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_SET_BUMP) + svm_node_set_bump<node_feature_mask>(kg, sd, stack, node); + break; + SVM_CASE(NODE_ATTR_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_attr_bump_dx(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_ATTR_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_attr_bump_dy(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_VERTEX_COLOR_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w); + } + break; + SVM_CASE(NODE_VERTEX_COLOR_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w); + } + break; + SVM_CASE(NODE_TEX_COORD_BUMP_DX) + IF_KERNEL_NODES_FEATURE(BUMP) + { + offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset); + } + break; + SVM_CASE(NODE_TEX_COORD_BUMP_DY) + IF_KERNEL_NODES_FEATURE(BUMP) + { + offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset); + } + break; + SVM_CASE(NODE_CLOSURE_SET_NORMAL) + IF_KERNEL_NODES_FEATURE(BUMP) + { + svm_node_set_normal(kg, sd, stack, node.y, node.z); + } + break; + SVM_CASE(NODE_ENTER_BUMP_EVAL) + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { + svm_node_enter_bump_eval(kg, sd, stack, node.y); + } + break; + SVM_CASE(NODE_LEAVE_BUMP_EVAL) + IF_KERNEL_NODES_FEATURE(BUMP_STATE) + { + svm_node_leave_bump_eval(kg, sd, stack, node.y); + } + break; + SVM_CASE(NODE_HSV) + svm_node_hsv(kg, sd, stack, node); + break; + SVM_CASE(NODE_CLOSURE_HOLDOUT) + svm_node_closure_holdout(sd, stack, node); + break; + SVM_CASE(NODE_FRESNEL) + svm_node_fresnel(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_LAYER_WEIGHT) + svm_node_layer_weight(sd, stack, node); + break; + SVM_CASE(NODE_CLOSURE_VOLUME) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + svm_node_closure_volume<type>(kg, sd, stack, node); + } + break; + SVM_CASE(NODE_PRINCIPLED_VOLUME) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset); + } + break; + SVM_CASE(NODE_MATH) + svm_node_math(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_VECTOR_MATH) + offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_RGB_RAMP) + offset = svm_node_rgb_ramp(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_GAMMA) + svm_node_gamma(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_BRIGHTCONTRAST) + svm_node_brightness(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_LIGHT_PATH) + svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag); + break; + SVM_CASE(NODE_OBJECT_INFO) + svm_node_object_info(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_PARTICLE_INFO) + svm_node_particle_info(kg, sd, stack, node.y, node.z); + break; #if defined(__HAIR__) - case NODE_HAIR_INFO: - svm_node_hair_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_HAIR_INFO) + svm_node_hair_info(kg, sd, stack, node.y, node.z); + break; #endif #if defined(__POINTCLOUD__) - case NODE_POINT_INFO: - svm_node_point_info(kg, sd, stack, node.y, node.z); - break; + SVM_CASE(NODE_POINT_INFO) + svm_node_point_info(kg, sd, stack, node.y, node.z); + break; #endif - case NODE_TEXTURE_MAPPING: - offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); - break; - case NODE_MAPPING: - svm_node_mapping(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_MIN_MAX: - offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset); - break; - case NODE_CAMERA: - svm_node_camera(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_TEX_ENVIRONMENT: - svm_node_tex_environment(kg, sd, stack, node); - break; - case NODE_TEX_SKY: - offset = svm_node_tex_sky(kg, sd, stack, node, offset); - break; - case NODE_TEX_GRADIENT: - svm_node_tex_gradient(sd, stack, node); - break; - case NODE_TEX_VORONOI: - offset = svm_node_tex_voronoi<node_feature_mask>( - kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_TEX_MUSGRAVE: - offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_TEX_WAVE: - offset = svm_node_tex_wave(kg, sd, stack, node, offset); - break; - case NODE_TEX_MAGIC: - offset = svm_node_tex_magic(kg, sd, stack, node, offset); - break; - case NODE_TEX_CHECKER: - svm_node_tex_checker(kg, sd, stack, node); - break; - case NODE_TEX_BRICK: - offset = svm_node_tex_brick(kg, sd, stack, node, offset); - break; - case NODE_TEX_WHITE_NOISE: - svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_NORMAL: - offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_LIGHT_FALLOFF: - svm_node_light_falloff(sd, stack, node); - break; - case NODE_IES: - svm_node_ies(kg, sd, stack, node); - break; - case NODE_RGB_CURVES: - case NODE_VECTOR_CURVES: - offset = svm_node_curves(kg, sd, stack, node, offset); - break; - case NODE_FLOAT_CURVE: - offset = svm_node_curve(kg, sd, stack, node, offset); - break; - case NODE_TANGENT: - svm_node_tangent(kg, sd, stack, node); - break; - case NODE_NORMAL_MAP: - svm_node_normal_map(kg, sd, stack, node); - break; - case NODE_INVERT: - svm_node_invert(sd, stack, node.y, node.z, node.w); - break; - case NODE_MIX: - offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_SEPARATE_COLOR: - svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_COMBINE_COLOR: - svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_SEPARATE_VECTOR: - svm_node_separate_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_COMBINE_VECTOR: - svm_node_combine_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_SEPARATE_HSV: - offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_COMBINE_HSV: - offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_VECTOR_ROTATE: - svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); - break; - case NODE_VECTOR_TRANSFORM: - svm_node_vector_transform(kg, sd, stack, node); - break; - case NODE_WIREFRAME: - svm_node_wireframe(kg, sd, stack, node); - break; - case NODE_WAVELENGTH: - svm_node_wavelength(kg, sd, stack, node.y, node.z); - break; - case NODE_BLACKBODY: - svm_node_blackbody(kg, sd, stack, node.y, node.z); - break; - case NODE_MAP_RANGE: - offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_VECTOR_MAP_RANGE: - offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset); - break; - case NODE_CLAMP: - offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset); - break; + SVM_CASE(NODE_TEXTURE_MAPPING) + offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); + break; + SVM_CASE(NODE_MAPPING) + svm_node_mapping(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIN_MAX) + offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset); + break; + SVM_CASE(NODE_CAMERA) + svm_node_camera(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_TEX_ENVIRONMENT) + svm_node_tex_environment(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_SKY) + offset = svm_node_tex_sky(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_GRADIENT) + svm_node_tex_gradient(sd, stack, node); + break; + SVM_CASE(NODE_TEX_VORONOI) + offset = svm_node_tex_voronoi<node_feature_mask>( + kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_TEX_MUSGRAVE) + offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_TEX_WAVE) + offset = svm_node_tex_wave(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_MAGIC) + offset = svm_node_tex_magic(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_CHECKER) + svm_node_tex_checker(kg, sd, stack, node); + break; + SVM_CASE(NODE_TEX_BRICK) + offset = svm_node_tex_brick(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TEX_WHITE_NOISE) + svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_NORMAL) + offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_LIGHT_FALLOFF) + svm_node_light_falloff(sd, stack, node); + break; + SVM_CASE(NODE_IES) + svm_node_ies(kg, sd, stack, node); + break; + SVM_CASE(NODE_CURVES) + offset = svm_node_curves(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_FLOAT_CURVE) + offset = svm_node_curve(kg, sd, stack, node, offset); + break; + SVM_CASE(NODE_TANGENT) + svm_node_tangent(kg, sd, stack, node); + break; + SVM_CASE(NODE_NORMAL_MAP) + svm_node_normal_map(kg, sd, stack, node); + break; + SVM_CASE(NODE_INVERT) + svm_node_invert(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_MIX) + offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_SEPARATE_COLOR) + svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_COMBINE_COLOR) + svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_SEPARATE_VECTOR) + svm_node_separate_vector(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_COMBINE_VECTOR) + svm_node_combine_vector(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_SEPARATE_HSV) + offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_COMBINE_HSV) + offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_VECTOR_ROTATE) + svm_node_vector_rotate(sd, stack, node.y, node.z, node.w); + break; + SVM_CASE(NODE_VECTOR_TRANSFORM) + svm_node_vector_transform(kg, sd, stack, node); + break; + SVM_CASE(NODE_WIREFRAME) + svm_node_wireframe(kg, sd, stack, node); + break; + SVM_CASE(NODE_WAVELENGTH) + svm_node_wavelength(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_BLACKBODY) + svm_node_blackbody(kg, sd, stack, node.y, node.z); + break; + SVM_CASE(NODE_MAP_RANGE) + offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_VECTOR_MAP_RANGE) + offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset); + break; + SVM_CASE(NODE_CLAMP) + offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset); + break; #ifdef __SHADER_RAYTRACE__ - case NODE_BEVEL: - svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node); - break; - case NODE_AMBIENT_OCCLUSION: - svm_node_ao<node_feature_mask>(kg, state, sd, stack, node); - break; + SVM_CASE(NODE_BEVEL) + svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node); + break; + SVM_CASE(NODE_AMBIENT_OCCLUSION) + svm_node_ao<node_feature_mask>(kg, state, sd, stack, node); + break; #endif - case NODE_TEX_VOXEL: - IF_KERNEL_NODES_FEATURE(VOLUME) - { - offset = svm_node_tex_voxel(kg, sd, stack, node, offset); - } - break; - case NODE_AOV_START: - if (!svm_node_aov_check(path_flag, render_buffer)) { - return; - } - break; - case NODE_AOV_COLOR: - svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer); - break; - case NODE_AOV_VALUE: - svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer); - break; + SVM_CASE(NODE_TEX_VOXEL) + IF_KERNEL_NODES_FEATURE(VOLUME) + { + offset = svm_node_tex_voxel(kg, sd, stack, node, offset); + } + break; + SVM_CASE(NODE_AOV_START) + if (!svm_node_aov_check(path_flag, render_buffer)) { + return; + } + break; + SVM_CASE(NODE_AOV_COLOR) + svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer); + break; + SVM_CASE(NODE_AOV_VALUE) + svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer); + break; default: kernel_assert(!"Unknown node type was passed to the SVM machine"); return; diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h index d9138796c45..2a0130e11d4 100644 --- a/intern/cycles/kernel/svm/tex_coord.h +++ b/intern/cycles/kernel/svm/tex_coord.h @@ -138,7 +138,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg, case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)); + data = camera_world_to_ndc(kg, sd, sd->ray_P); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); data.z = 0.0f; @@ -223,7 +223,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg, case NODE_TEXCO_WINDOW: { if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)); + data = camera_world_to_ndc(kg, sd, sd->ray_P); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); data.z = 0.0f; diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index 82109ec4c4f..12d0ec141e6 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -17,104 +17,9 @@ CCL_NAMESPACE_BEGIN /* Nodes */ typedef enum ShaderNodeType { - NODE_END = 0, - NODE_SHADER_JUMP, - NODE_CLOSURE_BSDF, - NODE_CLOSURE_EMISSION, - NODE_CLOSURE_BACKGROUND, - NODE_CLOSURE_SET_WEIGHT, - NODE_CLOSURE_WEIGHT, - NODE_EMISSION_WEIGHT, - NODE_MIX_CLOSURE, - NODE_JUMP_IF_ZERO, - NODE_JUMP_IF_ONE, - NODE_GEOMETRY, - NODE_CONVERT, - NODE_TEX_COORD, - NODE_VALUE_F, - NODE_VALUE_V, - NODE_ATTR, - NODE_VERTEX_COLOR, - NODE_GEOMETRY_BUMP_DX, - NODE_GEOMETRY_BUMP_DY, - NODE_SET_DISPLACEMENT, - NODE_DISPLACEMENT, - NODE_VECTOR_DISPLACEMENT, - NODE_TEX_IMAGE, - NODE_TEX_IMAGE_BOX, - NODE_TEX_NOISE, - NODE_SET_BUMP, - NODE_ATTR_BUMP_DX, - NODE_ATTR_BUMP_DY, - NODE_VERTEX_COLOR_BUMP_DX, - NODE_VERTEX_COLOR_BUMP_DY, - NODE_TEX_COORD_BUMP_DX, - NODE_TEX_COORD_BUMP_DY, - NODE_CLOSURE_SET_NORMAL, - NODE_ENTER_BUMP_EVAL, - NODE_LEAVE_BUMP_EVAL, - NODE_HSV, - NODE_CLOSURE_HOLDOUT, - NODE_FRESNEL, - NODE_LAYER_WEIGHT, - NODE_CLOSURE_VOLUME, - NODE_PRINCIPLED_VOLUME, - NODE_MATH, - NODE_VECTOR_MATH, - NODE_RGB_RAMP, - NODE_GAMMA, - NODE_BRIGHTCONTRAST, - NODE_LIGHT_PATH, - NODE_OBJECT_INFO, - NODE_PARTICLE_INFO, - NODE_HAIR_INFO, - NODE_POINT_INFO, - NODE_TEXTURE_MAPPING, - NODE_MAPPING, - NODE_MIN_MAX, - NODE_CAMERA, - NODE_TEX_ENVIRONMENT, - NODE_TEX_SKY, - NODE_TEX_GRADIENT, - NODE_TEX_VORONOI, - NODE_TEX_MUSGRAVE, - NODE_TEX_WAVE, - NODE_TEX_MAGIC, - NODE_TEX_CHECKER, - NODE_TEX_BRICK, - NODE_TEX_WHITE_NOISE, - NODE_NORMAL, - NODE_LIGHT_FALLOFF, - NODE_IES, - NODE_RGB_CURVES, - NODE_VECTOR_CURVES, - NODE_TANGENT, - NODE_NORMAL_MAP, - NODE_INVERT, - NODE_MIX, - NODE_SEPARATE_COLOR, - NODE_COMBINE_COLOR, - NODE_SEPARATE_VECTOR, - NODE_COMBINE_VECTOR, - NODE_SEPARATE_HSV, - NODE_COMBINE_HSV, - NODE_VECTOR_ROTATE, - NODE_VECTOR_TRANSFORM, - NODE_WIREFRAME, - NODE_WAVELENGTH, - NODE_BLACKBODY, - NODE_MAP_RANGE, - NODE_VECTOR_MAP_RANGE, - NODE_CLAMP, - NODE_BEVEL, - NODE_AMBIENT_OCCLUSION, - NODE_TEX_VOXEL, - NODE_AOV_START, - NODE_AOV_COLOR, - NODE_AOV_VALUE, - NODE_FLOAT_CURVE, - /* NOTE: for best OpenCL performance, item definition in the enum must - * match the switch case order in `svm.h`. */ +#define SHADER_NODE_TYPE(name) name, +#include "node_types_template.h" + NODE_NUM } ShaderNodeType; typedef enum NodeAttributeOutputType { diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index f2e61d25002..7762c95275e 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -19,10 +19,6 @@ #include "kernel/svm/types.h" -#ifndef __KERNEL_GPU__ -# define __KERNEL_CPU__ -#endif - CCL_NAMESPACE_BEGIN /* Constants */ @@ -51,10 +47,10 @@ CCL_NAMESPACE_BEGIN #define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U #define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U -#ifdef __KERNEL_CPU__ -# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU -#else +#ifdef __KERNEL_GPU__ # define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU +#else +# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU #endif /* Kernel features */ @@ -83,7 +79,6 @@ CCL_NAMESPACE_BEGIN #define __LAMP_MIS__ #define __CAMERA_MOTION__ #define __OBJECT_MOTION__ -#define __BAKING__ #define __PRINCIPLED__ #define __SUBSURFACE__ #define __VOLUME__ @@ -92,16 +87,12 @@ CCL_NAMESPACE_BEGIN #define __BRANCHED_PATH__ /* Device specific features */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # ifdef WITH_OSL # define __OSL__ # endif # define __VOLUME_RECORD_ALL__ -#endif /* __KERNEL_CPU__ */ - -#ifdef __KERNEL_GPU_RAYTRACING__ -# undef __BAKING__ -#endif /* __KERNEL_GPU_RAYTRACING__ */ +#endif /* !__KERNEL_GPU__ */ /* MNEE currently causes "Compute function exceeds available temporary registers" * on Metal, disabled for now. */ @@ -129,9 +120,6 @@ CCL_NAMESPACE_BEGIN # if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE) # undef __SUBSURFACE__ # endif -# if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING) -# undef __BAKING__ -# endif # if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION) # undef __PATCH_EVAL__ # endif @@ -535,7 +523,8 @@ typedef struct RaySelfPrimitives { typedef struct Ray { float3 P; /* origin */ float3 D; /* direction */ - float t; /* length of the ray */ + float tmin; /* start distance */ + float tmax; /* end distance */ float time; /* time (for motion blur) */ RaySelfPrimitives self; @@ -729,7 +718,7 @@ typedef struct ccl_align(16) ShaderClosure { SHADER_CLOSURE_BASE; -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ float pad[2]; #endif float data[10]; @@ -1072,94 +1061,6 @@ typedef struct KernelCamera { } KernelCamera; static_assert_align(KernelCamera, 16); -typedef struct KernelFilm { - float exposure; - int pass_flag; - - int light_pass_flag; - int pass_stride; - - int pass_combined; - int pass_depth; - int pass_position; - int pass_normal; - int pass_roughness; - int pass_motion; - - int pass_motion_weight; - int pass_uv; - int pass_object_id; - int pass_material_id; - - int pass_diffuse_color; - int pass_glossy_color; - int pass_transmission_color; - - int pass_diffuse_indirect; - int pass_glossy_indirect; - int pass_transmission_indirect; - int pass_volume_indirect; - - int pass_diffuse_direct; - int pass_glossy_direct; - int pass_transmission_direct; - int pass_volume_direct; - - int pass_emission; - int pass_background; - int pass_ao; - float pass_alpha_threshold; - - int pass_shadow; - float pass_shadow_scale; - - int pass_shadow_catcher; - int pass_shadow_catcher_sample_count; - int pass_shadow_catcher_matte; - - int filter_table_offset; - - int cryptomatte_passes; - int cryptomatte_depth; - int pass_cryptomatte; - - int pass_adaptive_aux_buffer; - int pass_sample_count; - - int pass_mist; - float mist_start; - float mist_inv_depth; - float mist_falloff; - - int pass_denoising_normal; - int pass_denoising_albedo; - int pass_denoising_depth; - - int pass_aov_color; - int pass_aov_value; - int pass_lightgroup; - - /* XYZ to rendering color space transform. float4 instead of float3 to - * ensure consistent padding/alignment across devices. */ - float4 xyz_to_r; - float4 xyz_to_g; - float4 xyz_to_b; - float4 rgb_to_y; - /* Rec709 to rendering color space. */ - float4 rec709_to_r; - float4 rec709_to_g; - float4 rec709_to_b; - int is_rec709; - - int pass_bake_primitive; - int pass_bake_differential; - - int use_approximate_shadow_catcher; - - int pad1; -} KernelFilm; -static_assert_align(KernelFilm, 16); - typedef struct KernelFilmConvert { int pass_offset; int pass_stride; @@ -1201,108 +1102,6 @@ typedef struct KernelFilmConvert { } KernelFilmConvert; static_assert_align(KernelFilmConvert, 16); -typedef struct KernelBackground { - /* only shader index */ - int surface_shader; - int volume_shader; - float volume_step_size; - int transparent; - float transparent_roughness_squared_threshold; - - /* portal sampling */ - float portal_weight; - int num_portals; - int portal_offset; - - /* sun sampling */ - float sun_weight; - /* xyz store direction, w the angle. float4 instead of float3 is used - * to ensure consistent padding/alignment across devices. */ - float4 sun; - - /* map sampling */ - float map_weight; - int map_res_x; - int map_res_y; - - int use_mis; - - int lightgroup; - - /* Padding */ - int pad1, pad2; -} KernelBackground; -static_assert_align(KernelBackground, 16); - -typedef struct KernelIntegrator { - /* emission */ - int use_direct_light; - int num_distribution; - int num_all_lights; - float pdf_triangles; - float pdf_lights; - float light_inv_rr_threshold; - - /* bounces */ - int min_bounce; - int max_bounce; - - int max_diffuse_bounce; - int max_glossy_bounce; - int max_transmission_bounce; - int max_volume_bounce; - - /* AO bounces */ - int ao_bounces; - float ao_bounces_distance; - float ao_bounces_factor; - float ao_additive_factor; - - /* transparent */ - int transparent_min_bounce; - int transparent_max_bounce; - int transparent_shadows; - - /* caustics */ - int caustics_reflective; - int caustics_refractive; - float filter_glossy; - - /* seed */ - int seed; - - /* clamp */ - float sample_clamp_direct; - float sample_clamp_indirect; - - /* mis */ - int use_lamp_mis; - - /* caustics */ - int use_caustics; - - /* sampler */ - int sampling_pattern; - - /* volume render */ - int use_volumes; - int volume_max_steps; - float volume_step_rate; - - int has_shadow_catcher; - float scrambling_distance; - - /* Closure filter. */ - int filter_closures; - - /* MIS debugging. */ - int direct_light_sampling_type; - - /* padding */ - int pad1; -} KernelIntegrator; -static_assert_align(KernelIntegrator, 16); - typedef enum KernelBVHLayout { BVH_LAYOUT_NONE = 0, @@ -1320,36 +1119,25 @@ typedef enum KernelBVHLayout { BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL, } KernelBVHLayout; -typedef struct KernelBVH { - /* Own BVH */ - int root; - int have_motion; - int have_curves; - int bvh_layout; - int use_bvh_steps; - int curve_subdivisions; +/* Specialized struct that can become constants in dynamic compilation. */ +#define KERNEL_STRUCT_BEGIN(name, parent) struct name { +#define KERNEL_STRUCT_END(name) \ + } \ + ; \ + static_assert_align(name, 16); - /* Custom BVH */ -#ifdef __KERNEL_OPTIX__ - OptixTraversableHandle scene; -#elif defined __METALRT__ - metalrt_as_type scene; +#ifdef __KERNEL_USE_DATA_CONSTANTS__ +# define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name; #else -# ifdef __EMBREE__ - RTCScene scene; -# ifndef __KERNEL_64_BIT__ - int pad2; -# endif -# else - int scene, pad2; -# endif +# define KERNEL_STRUCT_MEMBER(parent, type, name) type name; #endif -} KernelBVH; -static_assert_align(KernelBVH, 16); + +#include "kernel/data_template.h" typedef struct KernelTables { int beckmann_offset; - int pad1, pad2, pad3; + int filter_table_offset; + int pad1, pad2; } KernelTables; static_assert_align(KernelTables, 16); @@ -1362,18 +1150,37 @@ typedef struct KernelBake { static_assert_align(KernelBake, 16); typedef struct KernelData { + /* Features and limits. */ uint kernel_features; uint max_closures; uint max_shaders; uint volume_stack_size; + /* Always dynamic data members. */ KernelCamera cam; - KernelFilm film; - KernelBackground background; - KernelIntegrator integrator; - KernelBVH bvh; - KernelTables tables; KernelBake bake; + KernelTables tables; + + /* Potentially specialized data members. */ +#define KERNEL_STRUCT_BEGIN(name, parent) name parent; +#include "kernel/data_template.h" + + /* Device specific BVH. */ +#ifdef __KERNEL_OPTIX__ + OptixTraversableHandle device_bvh; +#elif defined __METALRT__ + metalrt_as_type device_bvh; +#else +# ifdef __EMBREE__ + RTCScene device_bvh; +# ifndef __KERNEL_64_BIT__ + int pad1; +# endif +# else + int device_bvh, pad1; +# endif +#endif + int pad2, pad3; } KernelData; static_assert_align(KernelData, 16); @@ -1729,15 +1536,15 @@ enum KernelFeatureFlag : uint32_t { /* Must be constexpr on the CPU to avoid compile errors because the state types * are different depending on the main, shadow or null path. For GPU we don't have * C++17 everywhere so can't use it. */ -#ifdef __KERNEL_CPU__ +#ifdef __KERNEL_GPU__ +# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) +# define IF_KERNEL_NODES_FEATURE(feature) \ + if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) +#else # define IF_KERNEL_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) -#else -# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) -# define IF_KERNEL_NODES_FEATURE(feature) \ - if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/util/profiling.h b/intern/cycles/kernel/util/profiling.h index 39cabd35967..b8afaf1166d 100644 --- a/intern/cycles/kernel/util/profiling.h +++ b/intern/cycles/kernel/util/profiling.h @@ -3,13 +3,13 @@ #pragma once -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # include "util/profiling.h" #endif CCL_NAMESPACE_BEGIN -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ # define PROFILING_INIT(kg, event) \ ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event) # define PROFILING_EVENT(event) profiling_helper.set_event(event) @@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN # define PROFILING_EVENT(event) # define PROFILING_INIT_FOR_SHADER(kg, event) # define PROFILING_SHADER(object, shader) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END |