diff options
Diffstat (limited to 'intern/cycles/kernel/bvh')
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 823 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/embree.h | 176 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/local.h | 31 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/metal.h | 37 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/nodes.h | 50 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/shadow_all.h | 76 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/traversal.h | 57 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/util.h | 92 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/volume.h | 47 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/volume_all.h | 101 |
10 files changed, 360 insertions, 1130 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 04ccb7ceff5..29789a15b28 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -1,40 +1,47 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ -/* BVH - * - * Bounding volume hierarchy for ray tracing. We compile different variations - * of the same BVH traversal function for faster rendering when some types of - * primitives are not needed, using #includes to work around the lack of - * C++ templates in OpenCL. - * - * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", - * the code has been extended and modified to support more primitives and work - * with CPU/CUDA/OpenCL. */ - #pragma once -#ifdef __EMBREE__ -# include "kernel/bvh/embree.h" -#endif - -#ifdef __METALRT__ -# include "kernel/bvh/metal.h" -#endif - #include "kernel/bvh/types.h" #include "kernel/bvh/util.h" #include "kernel/integrator/state_util.h" +/* Device specific acceleration structures for ray tracing. */ + +#if defined(__EMBREE__) +# include "kernel/device/cpu/bvh.h" +# define __BVH2__ +#elif defined(__METALRT__) +# include "kernel/device/metal/bvh.h" +#elif defined(__KERNEL_OPTIX__) +# include "kernel/device/optix/bvh.h" +#else +# define __BVH2__ +#endif + CCL_NAMESPACE_BEGIN -#if !defined(__KERNEL_GPU_RAYTRACING__) +#ifdef __BVH2__ -/* Regular BVH traversal */ +/* BVH2 + * + * Bounding volume hierarchy for ray tracing, when no native acceleration + * structure is available for the device. + * + * We compile different variations of the same BVH traversal function for + * faster rendering when some types of primitives are not needed, using #includes + * to work around the lack of C++ templates in OpenCL. + * + * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", + * the code has been extended and modified to support more primitives and work + * with CPU and various GPU kernel languages. */ # include "kernel/bvh/nodes.h" +/* Regular BVH traversal */ + # define BVH_FUNCTION_NAME bvh_intersect # define BVH_FUNCTION_FEATURES BVH_POINTCLOUD # include "kernel/bvh/traversal.h" @@ -57,260 +64,20 @@ CCL_NAMESPACE_BEGIN # include "kernel/bvh/traversal.h" # endif -/* Subsurface scattering BVH traversal */ - -# if defined(__BVH_LOCAL__) -# define BVH_FUNCTION_NAME bvh_intersect_local -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/local.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_local_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/local.h" -# endif -# endif /* __BVH_LOCAL__ */ - -/* Volume BVH traversal */ - -# if defined(__VOLUME__) -# define BVH_FUNCTION_NAME bvh_intersect_volume -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume.h" -# endif -# endif /* __VOLUME__ */ - -/* Record all intersections - Shadow BVH traversal */ - -# if defined(__SHADOW_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all -# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" - -# if defined(__HAIR__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# if defined(__HAIR__) && defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD -# include "kernel/bvh/shadow_all.h" -# endif - -# endif /* __SHADOW_RECORD_ALL__ */ - -/* Record all intersections - Volume BVH traversal. */ - -# if defined(__VOLUME_RECORD_ALL__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all -# define BVH_FUNCTION_FEATURES BVH_HAIR -# include "kernel/bvh/volume_all.h" - -# if defined(__OBJECT_MOTION__) -# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR -# include "kernel/bvh/volume_all.h" -# endif -# endif /* __VOLUME_RECORD_ALL__ */ - -# undef BVH_FEATURE -# undef BVH_NAME_JOIN -# undef BVH_NAME_EVAL -# undef BVH_FUNCTION_FULL_NAME - -#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */ - -ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray) -{ - /* NOTE: Due to some vectorization code non-finite origin point might - * cause lots of false-positive intersections which will overflow traversal - * stack. - * This code is a quick way to perform early output, to avoid crashes in - * such cases. - * From production scenes so far it seems it's enough to test first element - * only. - * Scene intersection may also called with empty rays for conditional trace - * calls that evaluate to false, so filter those out. - */ - return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; -} - ccl_device_intersect bool scene_intersect(KernelGlobals kg, ccl_private const Ray *ray, const uint visibility, ccl_private Intersection *isect) { -#ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - ray_flags, - 0, /* SBT offset for PG_HITD */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -#elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - kernel_assert(!"Invalid ift_default"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.u = 0.0f; - payload.v = 0.0f; - payload.visibility = visibility; - - typename metalrt_intersector_type::result_type intersection; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - /* No further intersector setup required: Default MetalRT behavior is any-hit. */ - } - else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - /* No further intersector setup required: Shadow ray early termination is controlled by the - * intersection handler */ - } - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); -# endif - - if (intersection.type == intersection_type::none) { - isect->t = ray->t; - isect->type = PRIMITIVE_NONE; - - return false; - } - - isect->t = intersection.distance; - - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -#else - - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { return false; } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - isect->t = ray->t; - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); - IntersectContext rtc_ctx(&ctx); - RTCRayHit ray_hit; - ctx.ray = ray; - kernel_embree_setup_rayhit(*ray, ray_hit, visibility); - rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); - if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && - ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { - kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); - return true; - } - return false; + if (kernel_data.device_bvh) { + return kernel_embree_intersect(kg, ray, visibility, isect); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -322,7 +89,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, return bvh_intersect_motion(kg, ray, isect, visibility); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -331,10 +98,22 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, # endif /* __HAIR__ */ return bvh_intersect(kg, ray, isect, visibility); -#endif /* __KERNEL_OPTIX__ */ } -#ifdef __BVH_LOCAL__ +/* Single object BVH traversal, for SSS/AO/bevel. */ + +# ifdef __BVH_LOCAL__ + +# define BVH_FUNCTION_NAME bvh_intersect_local +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/local.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_local_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/local.h" +# endif + ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private const Ray *ray, ccl_private LocalIntersection *local_isect, @@ -342,108 +121,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, ccl_private uint *lcg_state, int max_hits) { -# ifdef __KERNEL_OPTIX__ - uint p0 = pointer_pack_to_uint_0(lcg_state); - uint p1 = pointer_pack_to_uint_1(lcg_state); - uint p2 = pointer_pack_to_uint_0(local_isect); - uint p3 = pointer_pack_to_uint_1(local_isect); - uint p4 = local_object; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ - uint p5 = max_hits; - - if (local_isect) { - local_isect->num_hits = 0; /* Initialize hit count to zero. */ - } - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - 0xFF, - /* Need to always call into __anyhit__kernel_optix_local_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 2, /* SBT offset for PG_HITL */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - return p5; -# elif defined(__METALRT__) - if (!scene_intersect_valid(ray)) { - if (local_isect) { - local_isect->num_hits = 0; - } - return false; - } - -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - if (local_isect) { - local_isect->num_hits = 0; - } - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } - - if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { - if (local_isect) { - local_isect->num_hits = 0; - } - kernel_assert(!"Invalid ift_local"); - return false; - } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionLocalPayload payload; - payload.self = ray->self; - payload.local_object = local_object; - payload.max_hits = max_hits; - payload.local_isect.num_hits = 0; - if (lcg_state) { - payload.has_lcg_state = true; - payload.lcg_state = *lcg_state; - } - payload.result = false; - - typename metalrt_intersector_type::result_type intersection; - -# if defined(__METALRT_MOTION__) - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); -# endif - - if (lcg_state) { - *lcg_state = payload.lcg_state; - } - *local_isect = payload.local_isect; - - return payload.result; - -# else - - if (!scene_intersect_valid(ray)) { + if (!intersection_ray_valid(ray)) { if (local_isect) { local_isect->num_hits = 0; } @@ -451,59 +129,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - const bool has_bvh = !(kernel_tex_fetch(__object_flag, local_object) & - SD_OBJECT_TRANSFORM_APPLIED); - CCLIntersectContext ctx( - kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); - ctx.lcg_state = lcg_state; - ctx.max_hits = max_hits; - ctx.ray = ray; - ctx.local_isect = local_isect; - if (local_isect) { - local_isect->num_hits = 0; - } - ctx.local_object_id = local_object; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); - - /* If this object has its own BVH, use it. */ - if (has_bvh) { - RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); - if (geom) { - float3 P = ray->P; - float3 dir = ray->D; - float3 idir = ray->D; - Transform ob_itfm; - rtc_ray.tfar = ray->t * - bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); - /* bvh_instance_motion_push() returns the inverse transform but - * it's not needed here. */ - (void)ob_itfm; - - rtc_ray.org_x = P.x; - rtc_ray.org_y = P.y; - rtc_ray.org_z = P.z; - rtc_ray.dir_x = dir.x; - rtc_ray.dir_y = dir.y; - rtc_ray.dir_z = dir.z; - RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); - kernel_assert(scene); - if (scene) { - rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); - } - } - } - else { - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - } - - /* rtcOccluded1 sets tfar to -inf if a hit was found. */ - return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0); - ; + if (kernel_data.device_bvh) { + return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -511,144 +140,55 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } # endif /* __OBJECT_MOTION__ */ return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); -# endif /* __KERNEL_OPTIX__ */ } -#endif +# endif -#ifdef __SHADOW_RECORD_ALL__ -ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, - IntegratorShadowState state, - ccl_private const Ray *ray, - uint visibility, - uint max_hits, - ccl_private uint *num_recorded_hits, - ccl_private float *throughput) -{ -# ifdef __KERNEL_OPTIX__ - uint p0 = state; - uint p1 = __float_as_uint(1.0f); /* Throughput. */ - uint p2 = 0; /* Number of hits. */ - uint p3 = max_hits; - uint p4 = visibility; - uint p5 = false; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } +/* Transparent shadow BVH traversal, recording multiple intersections. */ - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 1, /* SBT offset for PG_HITS */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - *num_recorded_hits = uint16_unpack_from_uint_0(p2); - *throughput = __uint_as_float(p1); - - return p5; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } +# ifdef __SHADOW_RECORD_ALL__ -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); - return false; - } +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all +# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" - if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { - kernel_assert(!"Invalid ift_shadow"); - return false; - } +# if defined(__HAIR__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" # endif - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } - - MetalRTIntersectionShadowPayload payload; - payload.self = ray->self; - payload.visibility = visibility; - payload.max_hits = max_hits; - payload.num_hits = 0; - payload.num_recorded_hits = 0; - payload.throughput = 1.0f; - payload.result = false; - payload.state = state; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - typename metalrt_intersector_type::result_type intersection; - -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_shadow, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" # endif - *num_recorded_hits = payload.num_recorded_hits; - *throughput = payload.throughput; - - return payload.result; +# if defined(__HAIR__) && defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD +# include "kernel/bvh/shadow_all.h" +# endif -# else - if (!scene_intersect_valid(ray)) { +ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, + IntegratorShadowState state, + ccl_private const Ray *ray, + uint visibility, + uint max_hits, + ccl_private uint *num_recorded_hits, + ccl_private float *throughput) +{ + if (!intersection_ray_valid(ray)) { *num_recorded_hits = 0; *throughput = 1.0f; return false; } # ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); - Intersection *isect_array = (Intersection *)state->shadow_isect; - ctx.isect_s = isect_array; - ctx.max_hits = max_hits; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - - *num_recorded_hits = ctx.num_recorded_hits; - *throughput = ctx.throughput; - return ctx.opaque_hit; + if (kernel_data.device_bvh) { + return kernel_embree_intersect_shadow_all( + kg, state, ray, visibility, max_hits, num_recorded_hits, throughput); } -# endif /* __EMBREE__ */ +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { @@ -662,7 +202,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all_motion( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); } -# endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ if (kernel_data.bvh.have_curves) { @@ -673,180 +213,89 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, return bvh_intersect_shadow_all( kg, ray, state, visibility, max_hits, num_recorded_hits, throughput); -# endif /* __KERNEL_OPTIX__ */ } -#endif /* __SHADOW_RECORD_ALL__ */ +# endif /* __SHADOW_RECORD_ALL__ */ + +/* Volume BVH traversal, for initializing or updating the volume stack. */ + +# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) + +# define BVH_FUNCTION_NAME bvh_intersect_volume +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume.h" + +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume.h" +# endif -#ifdef __VOLUME__ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, ccl_private const Ray *ray, ccl_private Intersection *isect, const uint visibility) { -# ifdef __KERNEL_OPTIX__ - uint p0 = 0; - uint p1 = 0; - uint p2 = 0; - uint p3 = 0; - uint p4 = visibility; - uint p5 = PRIMITIVE_NONE; - uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; - uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; - - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } - - optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, - ray->P, - ray->D, - 0.0f, - ray->t, - ray->time, - ray_mask, - /* Need to always call into __anyhit__kernel_optix_volume_test. */ - OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 3, /* SBT offset for PG_HITV */ - 0, - 0, - p0, - p1, - p2, - p3, - p4, - p5, - p6, - p7); - - isect->t = __uint_as_float(p0); - isect->u = __uint_as_float(p1); - isect->v = __uint_as_float(p2); - isect->prim = p3; - isect->object = p4; - isect->type = p5; - - return p5 != PRIMITIVE_NONE; -# elif defined(__METALRT__) - - if (!scene_intersect_valid(ray)) { - return false; - } -# if defined(__KERNEL_DEBUG__) - if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { - kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + if (!intersection_ray_valid(ray)) { return false; } - if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { - kernel_assert(!"Invalid ift_default"); - return false; +# ifdef __OBJECT_MOTION__ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_volume_motion(kg, ray, isect, visibility); } -# endif - - metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); - metalrt_intersector_type metalrt_intersect; +# endif /* __OBJECT_MOTION__ */ - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - if (!kernel_data.bvh.have_curves) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); - } + return bvh_intersect_volume(kg, ray, isect, visibility); +} +# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */ - MetalRTIntersectionPayload payload; - payload.self = ray->self; - payload.visibility = visibility; +/* Volume BVH traversal, for initializing or updating the volume stack. + * Variation that records multiple intersections at once. */ - typename metalrt_intersector_type::result_type intersection; +# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) - uint ray_mask = visibility & 0xFF; - if (0 == ray_mask && (visibility & ~0xFF) != 0) { - ray_mask = 0xFF; - } +# define BVH_FUNCTION_NAME bvh_intersect_volume_all +# define BVH_FUNCTION_FEATURES BVH_HAIR +# include "kernel/bvh/volume_all.h" -# if defined(__METALRT_MOTION__) - payload.time = ray->time; - intersection = metalrt_intersect.intersect(r, - metal_ancillaries->accel_struct, - ray_mask, - ray->time, - metal_ancillaries->ift_default, - payload); -# else - intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# if defined(__OBJECT_MOTION__) +# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR +# include "kernel/bvh/volume_all.h" # endif - if (intersection.type == intersection_type::none) { +ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg, + ccl_private const Ray *ray, + ccl_private Intersection *isect, + const uint max_hits, + const uint visibility) +{ + if (!intersection_ray_valid(ray)) { return false; } - isect->prim = payload.prim; - isect->type = payload.type; - isect->object = intersection.user_instance_id; - - isect->t = intersection.distance; - if (intersection.type == intersection_type::triangle) { - isect->u = 1.0f - intersection.triangle_barycentric_coord.y - - intersection.triangle_barycentric_coord.x; - isect->v = intersection.triangle_barycentric_coord.x; - } - else { - isect->u = payload.u; - isect->v = payload.v; - } - - return isect->type != PRIMITIVE_NONE; - -# else - if (!scene_intersect_valid(ray)) { - return false; +# ifdef __EMBREE__ + if (kernel_data.device_bvh) { + return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility); } +# endif # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); } # endif /* __OBJECT_MOTION__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __KERNEL_OPTIX__ */ + return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } -#endif /* __VOLUME__ */ -#ifdef __VOLUME_RECORD_ALL__ -ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg, - ccl_private const Ray *ray, - ccl_private Intersection *isect, - const uint max_hits, - const uint visibility) -{ - if (!scene_intersect_valid(ray)) { - return false; - } +# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */ -# ifdef __EMBREE__ - if (kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); - ctx.isect_s = isect; - ctx.max_hits = max_hits; - ctx.num_hits = 0; - ctx.ray = ray; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - return ctx.num_hits; - } -# endif /* __EMBREE__ */ - -# ifdef __OBJECT_MOTION__ - if (kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ +# undef BVH_FEATURE +# undef BVH_NAME_JOIN +# undef BVH_NAME_EVAL +# undef BVH_FUNCTION_FULL_NAME - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); -} -#endif /* __VOLUME_RECORD_ALL__ */ +#endif /* __BVH2__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h deleted file mode 100644 index 4f7e6435daf..00000000000 --- a/intern/cycles/kernel/bvh/embree.h +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2022 Blender Foundation. */ - -#pragma once - -#include <embree3/rtcore_ray.h> -#include <embree3/rtcore_scene.h> - -#include "kernel/device/cpu/compat.h" -#include "kernel/device/cpu/globals.h" - -#include "kernel/bvh/util.h" - -#include "util/vector.h" - -CCL_NAMESPACE_BEGIN - -struct CCLIntersectContext { - typedef enum { - RAY_REGULAR = 0, - RAY_SHADOW_ALL = 1, - RAY_LOCAL = 2, - RAY_SSS = 3, - RAY_VOLUME_ALL = 4, - } RayType; - - KernelGlobals kg; - RayType type; - - /* For avoiding self intersections */ - const Ray *ray; - - /* for shadow rays */ - Intersection *isect_s; - uint max_hits; - uint num_hits; - uint num_recorded_hits; - float throughput; - float max_t; - bool opaque_hit; - - /* for SSS Rays: */ - LocalIntersection *local_isect; - int local_object_id; - uint *lcg_state; - - CCLIntersectContext(KernelGlobals kg_, RayType type_) - { - kg = kg_; - type = type_; - ray = NULL; - max_hits = 1; - num_hits = 0; - num_recorded_hits = 0; - throughput = 1.0f; - max_t = FLT_MAX; - opaque_hit = false; - isect_s = NULL; - local_isect = NULL; - local_object_id = -1; - lcg_state = NULL; - } -}; - -class IntersectContext { - public: - IntersectContext(CCLIntersectContext *ctx) - { - rtcInitIntersectContext(&context); - userRayExt = ctx; - } - RTCIntersectContext context; - CCLIntersectContext *userRayExt; -}; - -ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, - RTCRay &rtc_ray, - const uint visibility) -{ - rtc_ray.org_x = ray.P.x; - rtc_ray.org_y = ray.P.y; - rtc_ray.org_z = ray.P.z; - rtc_ray.dir_x = ray.D.x; - rtc_ray.dir_y = ray.D.y; - rtc_ray.dir_z = ray.D.z; - rtc_ray.tnear = 0.0f; - rtc_ray.tfar = ray.t; - rtc_ray.time = ray.time; - rtc_ray.mask = visibility; -} - -ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, - RTCRayHit &rayhit, - const uint visibility) -{ - kernel_embree_setup_ray(ray, rayhit.ray, visibility); - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; -} - -ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, - const RTCHit *hit, - const Ray *ray) -{ - bool status = false; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - const int oID = hit->instID[0] / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - const int pID = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - else { - const int oID = hit->geomID / 2; - if ((ray->self.object == oID) || (ray->self.light_object == oID)) { - const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - status = intersection_skip_self_shadow(ray->self, oID, pID); - } - } - - return status; -} - -ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, - const RTCRay *ray, - const RTCHit *hit, - Intersection *isect) -{ - isect->t = ray->tfar; - if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = hit->instID[0] / 2; - } - else { - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = hit->geomID / 2; - } - - const bool is_hair = hit->geomID & 1; - if (is_hair) { - const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim); - isect->type = segment.type; - isect->prim = segment.prim; - isect->u = hit->u; - isect->v = hit->v; - } - else { - isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - } -} - -ccl_device_inline void kernel_embree_convert_sss_hit( - KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) -{ - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - isect->t = ray->tfar; - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, object * 2)); - isect->prim = hit->primID + - (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = object; - isect->type = kernel_tex_fetch(__objects, object).primitive_type; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h index 0d05e09d75f..add61adc126 100644 --- a/intern/cycles/kernel/bvh/local.h +++ b/intern/cycles/kernel/bvh/local.h @@ -41,27 +41,27 @@ ccl_device_inline /* traversal variables in registers */ int stack_ptr = 0; - int node_addr = kernel_tex_fetch(__object_node, local_object); + int node_addr = kernel_data_fetch(object_node, local_object); /* ray parameters in registers */ float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = ray->t; + float isect_t = ray->tmax; if (local_isect != NULL) { local_isect->num_hits = 0; } kernel_assert((local_isect == NULL) == (max_hits == 0)); - const int object_flag = kernel_tex_fetch(__object_flag, local_object); + const int object_flag = kernel_data_fetch(object_flag, local_object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); + bvh_instance_push(kg, local_object, ray, &P, &dir, &idir); #endif object = local_object; } @@ -73,7 +73,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -81,6 +81,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, PATH_RAY_ALL_VISIBILITY, @@ -117,7 +118,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); const int prim_addr2 = __float_as_int(leaf.y); @@ -132,18 +133,18 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* Only intersect with matching object, for instanced objects we * already know we are only intersecting the right object. */ if (object == OBJECT_NONE) { - if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + if (kernel_data_fetch(prim_object, prim_addr) != local_object) { continue; } } /* Skip self intersection. */ - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_local(ray->self, prim)) { continue; } @@ -155,6 +156,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { @@ -167,18 +169,18 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* Only intersect with matching object, for instanced objects we * already know we are only intersecting the right object. */ if (object == OBJECT_NONE) { - if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + if (kernel_data_fetch(prim_object, prim_addr) != local_object) { continue; } } /* Skip self intersection. */ - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_local(ray->self, prim)) { continue; } @@ -191,6 +193,7 @@ ccl_device_inline local_object, prim, prim_addr, + tmin, isect_t, lcg_state, max_hits)) { diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h deleted file mode 100644 index 04289e259a7..00000000000 --- a/intern/cycles/kernel/bvh/metal.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2021-2022 Blender Foundation */ - -struct MetalRTIntersectionPayload { - RaySelfPrimitives self; - uint visibility; - float u, v; - int prim; - int type; -#if defined(__METALRT_MOTION__) - float time; -#endif -}; - -struct MetalRTIntersectionLocalPayload { - RaySelfPrimitives self; - uint local_object; - uint lcg_state; - short max_hits; - bool has_lcg_state; - bool result; - LocalIntersection local_isect; -}; - -struct MetalRTIntersectionShadowPayload { - RaySelfPrimitives self; - uint visibility; -#if defined(__METALRT_MOTION__) - float time; -#endif - int state; - float throughput; - short max_hits; - short num_hits; - short num_recorded_hits; - bool result; -}; diff --git a/intern/cycles/kernel/bvh/nodes.h b/intern/cycles/kernel/bvh/nodes.h index fd475dcd5e9..e02841fad16 100644 --- a/intern/cycles/kernel/bvh/nodes.h +++ b/intern/cycles/kernel/bvh/nodes.h @@ -9,16 +9,17 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg { Transform space; const int child_addr = node_addr + child * 3; - space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1); - space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2); - space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3); + space.x = kernel_data_fetch(bvh_nodes, child_addr + 1); + space.y = kernel_data_fetch(bvh_nodes, child_addr + 2); + space.z = kernel_data_fetch(bvh_nodes, child_addr + 3); return space; } ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, const float3 P, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) @@ -26,11 +27,11 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, /* fetch node data */ #ifdef __VISIBILITY_FLAG__ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); #endif - float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); + float4 node0 = kernel_data_fetch(bvh_nodes, node_addr + 1); + float4 node1 = kernel_data_fetch(bvh_nodes, node_addr + 2); + float4 node2 = kernel_data_fetch(bvh_nodes, node_addr + 3); /* intersect ray against child nodes */ float c0lox = (node0.x - P.x) * idir.x; @@ -39,8 +40,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c0hiy = (node1.z - P.y) * idir.y; float c0loz = (node2.x - P.z) * idir.z; float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); + float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); + float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); float c1lox = (node0.y - P.x) * idir.x; float c1hix = (node0.w - P.x) * idir.x; @@ -48,8 +49,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, float c1hiy = (node1.w - P.y) * idir.y; float c1loz = (node2.y - P.z) * idir.z; float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); + float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); + float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); dist[0] = c0min; dist[1] = c1min; @@ -66,7 +67,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg, ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float3 P, const float3 dir, - const float t, + const float tmin, + const float tmax, int node_addr, int child, float dist[2]) @@ -83,8 +85,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg, const float far_x = max(lower_xyz.x, upper_xyz.x); const float far_y = max(lower_xyz.y, upper_xyz.y); const float far_z = max(lower_xyz.z, upper_xyz.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); + const float tnear = max4(tmin, near_x, near_y, near_z); + const float tfar = min4(tmax, far_x, far_y, far_z); *dist = tnear; return tnear <= tfar; } @@ -93,16 +95,17 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) { int mask = 0; #ifdef __VISIBILITY_FLAG__ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); #endif - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.x) & visibility)) #endif @@ -110,7 +113,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg, mask |= 1; } } - if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { + if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) { #ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.y) & visibility)) #endif @@ -125,16 +128,17 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg, const float3 P, const float3 dir, const float3 idir, - const float t, + const float tmin, + const float tmax, const int node_addr, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + float4 node = kernel_data_fetch(bvh_nodes, node_addr); if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist); + return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist); } else { - return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist); + return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist); } } diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h index 2f58929c1e5..2ffe1496c72 100644 --- a/intern/cycles/kernel/bvh/shadow_all.h +++ b/intern/cycles/kernel/bvh/shadow_all.h @@ -49,26 +49,15 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + float tmin = ray->tmin; int object = OBJECT_NONE; uint num_hits = 0; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - /* Max distance in world space. May be dynamically reduced when max number of * recorded hits is exceeded and we no longer need to find hits beyond the max * distance found. */ - float t_max_world = ray->t; - - /* Current maximum distance to the intersection. - * Is calculated as a ray length, transformed to an object space when entering - * instance node. */ - float t_max_current = ray->t; - - /* Conversion from world to local space for the current instance if any, 1.0 - * otherwise. */ - float t_world_to_instance = 1.0f; + const float tmax = ray->tmax; + float tmax_hits = tmax; *r_num_recorded_hits = 0; *r_throughput = 1.0f; @@ -80,7 +69,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -88,7 +77,8 @@ ccl_device_inline dir, #endif idir, - t_max_current, + tmin, + tmax, node_addr, visibility, dist); @@ -124,7 +114,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -137,7 +127,7 @@ ccl_device_inline /* primitive intersection */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == + kernel_assert((kernel_data_fetch(prim_type, prim_addr) & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); bool hit; @@ -147,9 +137,9 @@ ccl_device_inline Intersection isect ccl_optional_struct_init; const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { continue; } @@ -157,7 +147,7 @@ ccl_device_inline switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { hit = triangle_intersect( - kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr); + kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr); break; } #if BVH_FEATURE(BVH_MOTION) @@ -166,7 +156,8 @@ ccl_device_inline &isect, P, dir, - t_max_current, + tmin, + tmax, ray->time, visibility, prim_object, @@ -181,16 +172,16 @@ ccl_device_inline case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { hit = false; break; } } - const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_type = kernel_data_fetch(prim_type, prim_addr); hit = curve_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type); break; } @@ -199,16 +190,16 @@ ccl_device_inline case PRIMITIVE_POINT: case PRIMITIVE_MOTION_POINT: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { hit = false; break; } } - const int point_type = kernel_tex_fetch(__prim_type, prim_addr); + const int point_type = kernel_data_fetch(prim_type, prim_addr); hit = point_intersect( - kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type); + kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type); break; } #endif /* BVH_FEATURE(BVH_POINTCLOUD) */ @@ -220,9 +211,6 @@ ccl_device_inline /* shadow ray early termination */ if (hit) { - /* Convert intersection distance to world space. */ - isect.t /= t_world_to_instance; - /* detect if this surface has a shader with transparent shadows */ /* todo: optimize so primitive visibility flag indicates if * the primitive has a transparent shadow shader? */ @@ -254,7 +242,7 @@ ccl_device_inline if (record_intersection) { /* Test if we need to record this transparent intersection. */ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE); - if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) { + if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) { /* If maximum number of hits was reached, replace the intersection with the * highest distance. We want to find the N closest intersections. */ const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits); @@ -276,7 +264,7 @@ ccl_device_inline } /* Limit the ray distance and stop counting hits beyond this. */ - t_max_world = max(isect.t, max_t); + tmax_hits = max(isect.t, max_t); } integrator_state_write_shadow_isect(state, &isect, isect_index); @@ -291,23 +279,19 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - t_world_to_instance = bvh_instance_motion_push( - kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif - /* Convert intersection to object space. */ - t_max_current *= t_world_to_instance; - ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } } } while (node_addr != ENTRYPOINT_SENTINEL); @@ -316,17 +300,9 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* Instance pop. */ -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); -#endif - - /* Restore world space ray length. */ - t_max_current = ray->t; + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; - t_world_to_instance = 1.0f; node_addr = traversal_stack[stack_ptr]; --stack_ptr; } diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h index 1181d4bfdee..f3744aca5c0 100644 --- a/intern/cycles/kernel/bvh/traversal.h +++ b/intern/cycles/kernel/bvh/traversal.h @@ -43,13 +43,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -62,7 +59,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); { traverse_mask = NODE_INTERSECT(kg, @@ -71,6 +68,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -108,7 +106,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -121,20 +119,28 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, /* primitive intersection */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { continue; } switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { - if (triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) { + if (triangle_intersect(kg, + isect, + P, + dir, + tmin, + isect->t, + visibility, + prim_object, + prim, + prim_addr)) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; @@ -147,6 +153,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -166,15 +173,15 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { break; } } - const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = curve_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -187,15 +194,15 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, case PRIMITIVE_POINT: case PRIMITIVE_MOTION_POINT: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + const float2 prim_time = kernel_data_fetch(prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { break; } } - const int point_type = kernel_tex_fetch(__prim_type, prim_addr); + const int point_type = kernel_data_fetch(prim_type, prim_addr); const bool hit = point_intersect( - kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type); + kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) @@ -209,19 +216,19 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + object = kernel_data_fetch(prim_object, -prim_addr - 1); #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } } } while (node_addr != ENTRYPOINT_SENTINEL); @@ -230,11 +237,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); -#else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); -#endif + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index 71045157372..a57703a8b8c 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -5,7 +5,59 @@ CCL_NAMESPACE_BEGIN -#if defined(__KERNEL_CPU__) +ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray) +{ + /* NOTE: Due to some vectorization code non-finite origin point might + * cause lots of false-positive intersections which will overflow traversal + * stack. + * This code is a quick way to perform early output, to avoid crashes in + * such cases. + * From production scenes so far it seems it's enough to test first element + * only. + * Scene intersection may also called with empty rays for conditional trace + * calls that evaluate to false, so filter those out. + */ + return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; +} + +/* Offset intersection distance by the smallest possible amount, to skip + * intersections at this distance. This works in cases where the ray start + * position is unchanged and only tmin is updated, since for self + * intersection we'll be comparing against the exact same distances. */ +ccl_device_forceinline float intersection_t_offset(const float t) +{ + /* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with + * non-negative and finite t. */ + kernel_assert(t >= 0.0f && isfinite_safe(t)); + const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1; + return __uint_as_float(bits); +} + +/* Ray offset to avoid self intersection. + * + * This function can be used to compute a modified ray start position for rays + * leaving from a surface. This is from: + * "A Fast and Robust Method for Avoiding Self-Intersection" + * Ray Tracing Gems, chapter 6. + */ +ccl_device_inline float3 ray_offset(const float3 P, const float3 Ng) +{ + const float int_scale = 256.0f; + const int3 of_i = make_int3( + (int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z)); + + const float3 p_i = make_float3( + __int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)), + __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)), + __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z))); + const float origin = 1.0f / 32.0f; + const float float_scale = 1.0f / 65536.0f; + return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x, + fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y, + fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z); +} + +#ifndef __KERNEL_GPU__ ccl_device int intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -53,20 +105,20 @@ ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg, int shader = 0; if (type & PRIMITIVE_TRIANGLE) { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_data_fetch(tri_shader, prim); } #ifdef __POINTCLOUD__ else if (type & PRIMITIVE_POINT) { - shader = kernel_tex_fetch(__points_shader, prim); + shader = kernel_data_fetch(points_shader, prim); } #endif #ifdef __HAIR__ else if (type & PRIMITIVE_CURVE) { - shader = kernel_tex_fetch(__curves, prim).shader_id; + shader = kernel_data_fetch(curves, prim).shader_id; } #endif - return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + return kernel_data_fetch(shaders, (shader & SHADER_MASK)).flags; } ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg, @@ -76,16 +128,16 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals int shader = 0; if (isect_type & PRIMITIVE_TRIANGLE) { - shader = kernel_tex_fetch(__tri_shader, prim); + shader = kernel_data_fetch(tri_shader, prim); } #ifdef __POINTCLOUD__ else if (isect_type & PRIMITIVE_POINT) { - shader = kernel_tex_fetch(__points_shader, prim); + shader = kernel_data_fetch(points_shader, prim); } #endif #ifdef __HAIR__ else if (isect_type & PRIMITIVE_CURVE) { - shader = kernel_tex_fetch(__curves, prim).shader_id; + shader = kernel_data_fetch(curves, prim).shader_id; } #endif @@ -101,7 +153,7 @@ ccl_device_forceinline int intersection_get_shader( ccl_device_forceinline int intersection_get_object_flags( KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect) { - return kernel_tex_fetch(__object_flag, isect->object); + return kernel_data_fetch(object_flag, isect->object); } /* TODO: find a better (faster) solution for this. Maybe store offset per object for @@ -110,27 +162,27 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg, const int object, const uint id) { - uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset; - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + uint attr_offset = kernel_data_fetch(objects, object).attribute_map_offset; + AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset); - while (attr_map.x != id) { - if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - if (UNLIKELY(attr_map.y == 0)) { + while (attr_map.id != id) { + if (UNLIKELY(attr_map.id == ATTR_STD_NONE)) { + if (UNLIKELY(attr_map.element == 0)) { return (int)ATTR_STD_NOT_FOUND; } else { /* Chain jump to a different part of the table. */ - attr_offset = attr_map.z; + attr_offset = attr_map.offset; } } else { attr_offset += ATTR_PRIM_TYPES; } - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + attr_map = kernel_data_fetch(attributes_map, attr_offset); } /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + return (attr_map.element == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.offset; } /* Transparent Shadows */ @@ -151,12 +203,12 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, } /* Interpolate transparency between curve keys. */ - const KernelCurve kcurve = kernel_tex_fetch(__curves, prim); + const KernelCurve kcurve = kernel_data_fetch(curves, prim); const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type); const int k1 = k0 + 1; - const float f0 = kernel_tex_fetch(__attributes_float, offset + k0); - const float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + const float f0 = kernel_data_fetch(attributes_float, offset + k0); + const float f1 = kernel_data_fetch(attributes_float, offset + k1); return (1.0f - u) * f0 + u * f1; } diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h index d711b3abbf4..664c692dd3d 100644 --- a/intern/cycles/kernel/bvh/volume.h +++ b/intern/cycles/kernel/bvh/volume.h @@ -46,13 +46,10 @@ ccl_device_inline float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif - - isect->t = ray->t; + isect->t = ray->tmax; isect->u = 0.0f; isect->v = 0.0f; isect->prim = PRIM_NONE; @@ -65,7 +62,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -73,6 +70,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect->t, node_addr, visibility, @@ -109,7 +107,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -125,22 +123,22 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } triangle_intersect( - kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr); + kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr); } break; } @@ -148,16 +146,16 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } @@ -165,6 +163,7 @@ ccl_device_inline isect, P, dir, + tmin, isect->t, ray->time, visibility, @@ -182,20 +181,20 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); - int object_flag = kernel_tex_fetch(__object_flag, object); + object = kernel_data_fetch(prim_object, -prim_addr - 1); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif ++stack_ptr; kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } else { /* pop */ @@ -211,11 +210,7 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* instance pop */ -#if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); -#else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); -#endif + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h index a969bae14a1..721eb555d4d 100644 --- a/intern/cycles/kernel/bvh/volume_all.h +++ b/intern/cycles/kernel/bvh/volume_all.h @@ -44,21 +44,17 @@ ccl_device_inline int node_addr = kernel_data.bvh.root; /* ray parameters in registers */ - const float tmax = ray->t; float3 P = ray->P; float3 dir = bvh_clamp_direction(ray->D); float3 idir = bvh_inverse_direction(dir); + const float tmin = ray->tmin; int object = OBJECT_NONE; - float isect_t = tmax; - -#if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; -#endif + float isect_t = ray->tmax; int num_hits_in_instance = 0; uint num_hits = 0; - isect_array->t = tmax; + isect_array->t = ray->tmax; /* traversal loop */ do { @@ -67,7 +63,7 @@ ccl_device_inline while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { int node_addr_child1, traverse_mask; float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0); traverse_mask = NODE_INTERSECT(kg, P, @@ -75,6 +71,7 @@ ccl_device_inline dir, #endif idir, + tmin, isect_t, node_addr, visibility, @@ -111,7 +108,7 @@ ccl_device_inline /* if node is leaf, fetch triangle list */ if (node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + float4 leaf = kernel_data_fetch(bvh_leaf_nodes, (-node_addr - 1)); int prim_addr = __float_as_int(leaf.x); if (prim_addr >= 0) { @@ -128,21 +125,29 @@ ccl_device_inline case PRIMITIVE_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } - hit = triangle_intersect( - kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr); + hit = triangle_intersect(kg, + isect_array, + P, + dir, + tmin, + isect_t, + visibility, + prim_object, + prim, + prim_addr); if (hit) { /* Move on to next entry in intersections array. */ isect_array++; @@ -150,18 +155,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -#if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -#else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -#endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -172,16 +165,16 @@ ccl_device_inline case PRIMITIVE_MOTION_TRIANGLE: { /* intersect ray against primitive */ for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + kernel_assert(kernel_data_fetch(prim_type, prim_addr) == type); /* only primitives from volume object */ const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : + kernel_data_fetch(prim_object, prim_addr) : object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const int prim = kernel_data_fetch(prim_index, prim_addr); if (intersection_skip_self(ray->self, prim_object, prim)) { continue; } - int object_flag = kernel_tex_fetch(__object_flag, prim_object); + int object_flag = kernel_data_fetch(object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; } @@ -189,6 +182,7 @@ ccl_device_inline isect_array, P, dir, + tmin, isect_t, ray->time, visibility, @@ -202,18 +196,6 @@ ccl_device_inline num_hits_in_instance++; isect_array->t = isect_t; if (num_hits == max_hits) { - if (object != OBJECT_NONE) { -# if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); -# else - Transform itfm = object_fetch_transform( - kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); -# endif - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } return num_hits; } } @@ -228,13 +210,13 @@ ccl_device_inline } else { /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr - 1); - int object_flag = kernel_tex_fetch(__object_flag, object); + object = kernel_data_fetch(prim_object, -prim_addr - 1); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME) { #if BVH_FEATURE(BVH_MOTION) - isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir); #else - isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir); + bvh_instance_push(kg, object, ray, &P, &dir, &idir); #endif num_hits_in_instance = 0; @@ -244,7 +226,7 @@ ccl_device_inline kernel_assert(stack_ptr < BVH_STACK_SIZE); traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_data_fetch(object_node, object); } else { /* pop */ @@ -260,28 +242,7 @@ ccl_device_inline kernel_assert(object != OBJECT_NONE); /* Instance pop. */ - if (num_hits_in_instance) { - float t_fac; -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); -#else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); -#endif - /* Scale isect->t to adjust for instancing. */ - for (int i = 0; i < num_hits_in_instance; i++) { - (isect_array - i - 1)->t *= t_fac; - } - } - else { -#if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); -#else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); -#endif - } - - isect_t = tmax; - isect_array->t = isect_t; + bvh_instance_pop(ray, &P, &dir, &idir); object = OBJECT_NONE; node_addr = traversal_stack[stack_ptr]; |