diff options
24 files changed, 1010 insertions, 32 deletions
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index ae6655eb27b..d3c8e4db6d0 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -40,8 +40,11 @@ const char *bvh_layout_name(BVHLayout layout) return "EMBREE"; case BVH_LAYOUT_OPTIX: return "OPTIX"; + case BVH_LAYOUT_METAL: + return "METAL"; case BVH_LAYOUT_MULTI_OPTIX: case BVH_LAYOUT_MULTI_OPTIX_EMBREE: + case BVH_LAYOUT_MULTI_METAL_EMBREE: return "MULTI"; case BVH_LAYOUT_ALL: return "ALL"; @@ -105,7 +108,10 @@ BVH *BVH::create(const BVHParams ¶ms, #endif case BVH_LAYOUT_MULTI_OPTIX: case BVH_LAYOUT_MULTI_OPTIX_EMBREE: + case BVH_LAYOUT_MULTI_METAL_EMBREE: return new BVHMulti(params, geometry, objects); + case BVH_LAYOUT_METAL: + /* host-side changes for BVH_LAYOUT_METAL are imminent */ case BVH_LAYOUT_NONE: case BVH_LAYOUT_ALL: break; diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 2ad76de70ca..62b9cc93dae 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -274,7 +274,8 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) { #ifdef WITH_EMBREE if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE || - bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) { + bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE || + bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE) { BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh); if (refit) { bvh_embree->refit(progress); diff --git a/intern/cycles/device/multi/device.cpp b/intern/cycles/device/multi/device.cpp index e319246d4f4..2513df63489 100644 --- a/intern/cycles/device/multi/device.cpp +++ b/intern/cycles/device/multi/device.cpp @@ -129,6 +129,10 @@ class MultiDevice : public Device { if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) { return BVH_LAYOUT_MULTI_OPTIX_EMBREE; } + const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE); + if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) { + return BVH_LAYOUT_MULTI_METAL_EMBREE; + } return bvh_layout_mask; } @@ -151,7 +155,8 @@ class MultiDevice : public Device { } assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX || - bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE); + bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE || + bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE); BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh); bvh_multi->sub_bvhs.resize(devices.size()); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index d759399b04d..674eb702814 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -207,6 +207,7 @@ set(SRC_KERNEL_BVH_HEADERS bvh/volume.h bvh/volume_all.h bvh/embree.h + bvh/metal.h ) set(SRC_KERNEL_CAMERA_HEADERS diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 0e083812355..33d2e44471a 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -31,6 +31,10 @@ # include "kernel/bvh/embree.h" #endif +#ifdef __METALRT__ +# include "kernel/bvh/metal.h" +#endif + #include "kernel/bvh/types.h" #include "kernel/bvh/util.h" @@ -38,7 +42,7 @@ CCL_NAMESPACE_BEGIN -#ifndef __KERNEL_OPTIX__ +#if !defined(__KERNEL_GPU_RAYTRACING__) /* Regular BVH traversal */ @@ -139,7 +143,7 @@ CCL_NAMESPACE_BEGIN # undef BVH_NAME_EVAL # undef BVH_FUNCTION_FULL_NAME -#endif /* __KERNEL_OPTIX__ */ +#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */ ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray) { @@ -205,7 +209,95 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, isect->type = p5; return p5 != PRIMITIVE_NONE; -#else /* __KERNEL_OPTIX__ */ +#elif defined(__METALRT__) + + if (!scene_intersect_valid(ray)) { + isect->t = ray->t; + isect->type = PRIMITIVE_NONE; + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + isect->t = ray->t; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + isect->t = ray->t; + isect->type = PRIMITIVE_NONE; + kernel_assert(!"Invalid ift_default"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); + metalrt_intersector_type metalrt_intersect; + + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.u = 0.0f; + payload.v = 0.0f; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + /* No further intersector setup required: Default MetalRT behaviour is anyhit */ + } + else if (visibility & PATH_RAY_SHADOW_OPAQUE) { + /* No further intersector setup required: Shadow ray early termination is controlled by the + * intersection handler */ + } + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# endif + + if (intersection.type == intersection_type::none) { + isect->t = ray->t; + isect->type = PRIMITIVE_NONE; + + return false; + } + + isect->t = intersection.distance; + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = 1.0f - intersection.triangle_barycentric_coord.y - + intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.x; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; + +#else + if (!scene_intersect_valid(ray)) { return false; } @@ -289,7 +381,69 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, p5); return p5; -# else /* __KERNEL_OPTIX__ */ +# elif defined(__METALRT__) + if (!scene_intersect_valid(ray)) { + if (local_isect) { + local_isect->num_hits = 0; + } + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_local)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid ift_local"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionLocalPayload payload; + payload.local_object = local_object; + payload.max_hits = max_hits; + payload.local_isect.num_hits = 0; + if (lcg_state) { + payload.has_lcg_state = true; + payload.lcg_state = *lcg_state; + } + payload.result = false; + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); +# endif + + if (lcg_state) { + *lcg_state = payload.lcg_state; + } + *local_isect = payload.local_isect; + + return payload.result; + +# else + if (!scene_intersect_valid(ray)) { if (local_isect) { local_isect->num_hits = 0; @@ -406,7 +560,67 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, *throughput = __uint_as_float(p1); return p5; -# else /* __KERNEL_OPTIX__ */ +# elif defined(__METALRT__) + + if (!scene_intersect_valid(ray)) { + return false; + } + +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) { + kernel_assert(!"Invalid ift_shadow"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionShadowPayload payload; + payload.visibility = visibility; + payload.max_hits = max_hits; + payload.num_hits = 0; + payload.num_recorded_hits = 0; + payload.throughput = 1.0f; + payload.result = false; + payload.state = state; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + + typename metalrt_intersector_type::result_type intersection; + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_shadow, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload); +# endif + + *num_recorded_hits = payload.num_recorded_hits; + *throughput = payload.throughput; + + return payload.result; + +# else if (!scene_intersect_valid(ray)) { *num_recorded_hits = 0; *throughput = 1.0f; @@ -503,7 +717,76 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, isect->type = p5; return p5 != PRIMITIVE_NONE; -# else /* __KERNEL_OPTIX__ */ +# elif defined(__METALRT__) + + if (!scene_intersect_valid(ray)) { + return false; + } +# if defined(__KERNEL_DEBUG__) + if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) { + kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer"); + return false; + } + + if (is_null_intersection_function_table(metal_ancillaries->ift_default)) { + kernel_assert(!"Invalid ift_default"); + return false; + } +# endif + + metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t); + metalrt_intersector_type metalrt_intersect; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + if (!kernel_data.bvh.have_curves) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + MetalRTIntersectionPayload payload; + payload.visibility = visibility; + + typename metalrt_intersector_type::result_type intersection; + + uint ray_mask = visibility & 0xFF; + if (0 == ray_mask && (visibility & ~0xFF) != 0) { + ray_mask = 0xFF; + } + +# if defined(__METALRT_MOTION__) + payload.time = ray->time; + intersection = metalrt_intersect.intersect(r, + metal_ancillaries->accel_struct, + ray_mask, + ray->time, + metal_ancillaries->ift_default, + payload); +# else + intersection = metalrt_intersect.intersect( + r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload); +# endif + + if (intersection.type == intersection_type::none) { + return false; + } + + isect->prim = payload.prim; + isect->type = payload.type; + isect->object = intersection.user_instance_id; + + isect->t = intersection.distance; + if (intersection.type == intersection_type::triangle) { + isect->u = 1.0f - intersection.triangle_barycentric_coord.y - + intersection.triangle_barycentric_coord.x; + isect->v = intersection.triangle_barycentric_coord.x; + } + else { + isect->u = payload.u; + isect->v = payload.v; + } + + return isect->type != PRIMITIVE_NONE; + +# else if (!scene_intersect_valid(ray)) { return false; } diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h new file mode 100644 index 00000000000..55456d15f50 --- /dev/null +++ b/intern/cycles/kernel/bvh/metal.h @@ -0,0 +1,47 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +struct MetalRTIntersectionPayload { + uint visibility; + float u, v; + int prim; + int type; +#if defined(__METALRT_MOTION__) + float time; +#endif +}; + +struct MetalRTIntersectionLocalPayload { + uint local_object; + uint lcg_state; + short max_hits; + bool has_lcg_state; + bool result; + LocalIntersection local_isect; +}; + +struct MetalRTIntersectionShadowPayload { + uint visibility; +#if defined(__METALRT_MOTION__) + float time; +#endif + int state; + float throughput; + short max_hits; + short num_hits; + short num_recorded_hits; + bool result; +}; diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 24702de496c..0f88063e3b7 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -21,6 +21,8 @@ #include "kernel/device/gpu/parallel_sorted_index.h" #include "kernel/device/gpu/work_stealing.h" +#include "kernel/sample/lcg.h" + /* Include constant tables before entering Metal's context class scope (context_begin.h) */ #include "kernel/tables.h" diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 61597a4acfc..a80965ba267 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -32,6 +32,10 @@ using namespace metal; +#ifdef __METALRT__ +using namespace metal::raytracing; +#endif + #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wsign-compare" #pragma clang diagnostic ignored "-Wuninitialized" @@ -47,7 +51,7 @@ using namespace metal; #define ccl_global device #define ccl_inline_constant static constant constexpr #define ccl_device_constant constant -#define ccl_constant const device +#define ccl_constant constant #define ccl_gpu_shared threadgroup #define ccl_private thread #define ccl_may_alias @@ -246,6 +250,22 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \ #define __device__ +#ifdef __METALRT__ + +# define __KERNEL_GPU_RAYTRACING__ + +# if defined(__METALRT_MOTION__) +# define METALRT_TAGS instancing, instance_motion, primitive_motion +# else +# define METALRT_TAGS instancing +# endif /* __METALRT_MOTION__ */ + +typedef acceleration_structure<METALRT_TAGS> metalrt_as_type; +typedef intersection_function_table<triangle_data, METALRT_TAGS> metalrt_ift_type; +typedef metal::raytracing::intersector<triangle_data, METALRT_TAGS> metalrt_intersector_type; + +#endif /* __METALRT__ */ + /* texture bindings and sampler setup */ struct Texture2DParamsMetal { @@ -258,6 +278,13 @@ struct Texture3DParamsMetal { struct MetalAncillaries { device Texture2DParamsMetal *textures_2d; device Texture3DParamsMetal *textures_3d; + +#ifdef __METALRT__ + metalrt_as_type accel_struct; + metalrt_ift_type ift_default; + metalrt_ift_type ift_shadow; + metalrt_ift_type ift_local; +#endif }; #include "util/half.h" diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h index 8c9e1c54077..2e91e93f088 100644 --- a/intern/cycles/kernel/device/metal/context_begin.h +++ b/intern/cycles/kernel/device/metal/context_begin.h @@ -26,6 +26,10 @@ class MetalKernelContext { MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries) : launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries) {} + + MetalKernelContext(constant KernelParamsMetal &_launch_params_metal) + : launch_params_metal(_launch_params_metal) + {} /* texture fetch adapter functions */ typedef uint64_t ccl_gpu_tex_object; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index feca20ff475..ba80238bb84 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -16,10 +16,566 @@ /* Metal kernel entry points */ -// clang-format off - #include "kernel/device/metal/compat.h" #include "kernel/device/metal/globals.h" #include "kernel/device/gpu/kernel.h" -// clang-format on
\ No newline at end of file +/* MetalRT intersection handlers */ +#ifdef __METALRT__ + +/* Return type for a bounding box intersection function. */ +struct BoundingBoxIntersectionResult +{ + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; + float distance [[distance]]; +}; + +/* Return type for a triangle intersection function. */ +struct TriangleIntersectionResult +{ + bool accept [[accept_intersection]]; + bool continue_search [[continue_search]]; +}; + +enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; + +template<typename TReturn, uint intersection_type> +TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, + const uint object, + const uint primitive_id, + const float2 barycentrics, + const float ray_tmax) +{ + TReturn result; + +#ifdef __BVH_LOCAL__ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + + if (object != payload.local_object) { + /* Only intersect with matching object */ + result.accept = false; + result.continue_search = true; + return result; + } + + const short max_hits = payload.max_hits; + if (max_hits == 0) { + /* Special case for when no hit information is requested, just report that something was hit */ + payload.result = true; + result.accept = true; + result.continue_search = false; + return result; + } + + int hit = 0; + if (payload.has_lcg_state) { + for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) { + if (ray_tmax == payload.local_isect.hits[i].t) { + result.accept = false; + result.continue_search = true; + return result; + } + } + + hit = payload.local_isect.num_hits++; + + if (payload.local_isect.num_hits > max_hits) { + hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits; + if (hit >= max_hits) { + result.accept = false; + result.continue_search = true; + return result; + } + } + } + else { + if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) { + /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */ + result.accept = false; + result.continue_search = true; + return result; + } + + payload.local_isect.num_hits = 1; + } + + ray_data Intersection *isect = &payload.local_isect.hits[hit]; + isect->t = ray_tmax; + isect->prim = prim; + isect->object = object; + isect->type = kernel_tex_fetch(__objects, object).primitive_type; + + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; + + /* Record geometric normal */ + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w; + const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); + const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); + const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + + /* Continue tracing (without this the trace call would return after the first hit) */ + result.accept = false; + result.continue_search = true; + return result; +#endif +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__kernel_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint instance_id [[user_instance_id]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax); +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__kernel_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<uint intersection_type> +bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + uint object, + uint prim, + const float2 barycentrics, + const float ray_tmax) +{ +#ifdef __SHADOW_RECORD_ALL__ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + /* continue search */ + return true; + } +# endif + + float u = 0.0f, v = 0.0f; + int type = 0; + if (intersection_type == METALRT_HIT_TRIANGLE) { + u = 1.0f - barycentrics.y - barycentrics.x; + v = barycentrics.x; + type = kernel_tex_fetch(__objects, object).primitive_type; + } +# ifdef __HAIR__ + else { + u = barycentrics.x; + v = barycentrics.y; + + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + type = segment.type; + prim = segment.prim; + + /* Filter out curve endcaps */ + if (u == 0.0f || u == 1.0f) { + /* continue search */ + return true; + } + } +# endif + +# ifndef __TRANSPARENT_SHADOWS__ + /* No transparent shadows support compiled in, make opaque. */ + payload.result = true; + /* terminate ray */ + return false; +# else + short max_hits = payload.max_hits; + short num_hits = payload.num_hits; + short num_recorded_hits = payload.num_recorded_hits; + + MetalKernelContext context(launch_params_metal); + + /* If no transparent shadows, all light is blocked and we can stop immediately. */ + if (num_hits >= max_hits || + !(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) { + payload.result = true; + /* terminate ray */ + return false; + } + + /* Always use baked shadow transparency for curves. */ + if (type & PRIMITIVE_ALL_CURVE) { + float throughput = payload.throughput; + throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); + payload.throughput = throughput; + payload.num_hits += 1; + + if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { + /* Accept result and terminate if throughput is sufficiently low */ + payload.result = true; + return false; + } + else { + return true; + } + } + + payload.num_hits += 1; + payload.num_recorded_hits += 1; + + uint record_index = num_recorded_hits; + + const IntegratorShadowState state = payload.state; + + const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE); + if (record_index >= max_record_hits) { + /* If maximum number of hits reached, find a hit to replace. */ + float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t); + uint max_recorded_hit = 0; + + for (int i = 1; i < max_record_hits; i++) { + const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t); + if (isect_t > max_recorded_t) { + max_recorded_t = isect_t; + max_recorded_hit = i; + } + } + + if (ray_tmax >= max_recorded_t) { + /* Accept hit, so that we don't consider any more hits beyond the distance of the + * current hit anymore. */ + payload.result = true; + return true; + } + + record_index = max_recorded_hit; + } + + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object; + INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type; + + /* Continue tracing. */ +# endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __SHADOW_RECORD_ALL__ */ + + return true; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__kernel_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + + TriangleIntersectionResult result; + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, barycentrics, ray_tmax); + result.accept = !result.continue_search; + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__kernel_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + +template<typename TReturnType, uint intersection_type> +inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const float u) +{ + TReturnType result; + +# ifdef __HAIR__ + if (intersection_type == METALRT_HIT_BOUNDING_BOX) { + /* Filter out curve endcaps. */ + if (u == 0.0f || u == 1.0f) { + result.accept = false; + result.continue_search = true; + return result; + } + } +# endif + +# ifdef __VISIBILITY_FLAG__ + uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + result.accept = false; + result.continue_search = true; + return result; + } + + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + result.accept = true; + result.continue_search = false; + return result; + } +# endif + + result.accept = true; + result.continue_search = true; + return result; +} + +[[intersection(triangle, triangle_data, METALRT_TAGS)]] +TriangleIntersectionResult +__anyhit__kernel_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + unsigned int object [[user_instance_id]], + unsigned int primitive_id [[primitive_id]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>( + launch_params_metal, payload, object, prim, 0.0f); + if (result.accept) { + payload.prim = prim; + payload.type = kernel_tex_fetch(__objects, object).primitive_type; + } + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__anyhit__kernel_metalrt_visibility_test_box(const float ray_tmax [[max_distance]]) +{ + /* Unused function */ + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + return result; +} + +#ifdef __HAIR__ +ccl_device_inline +void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the curve intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space. */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t / len; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline +void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the curve intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t / len; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { + metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + if (segment.type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { + metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + } + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +#endif /* __HAIR__ */ +#endif /* __METALRT__ */ diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index 0619c135c39..db4233624b9 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -21,6 +21,7 @@ #include <optix.h> #define __KERNEL_GPU__ +#define __KERNEL_GPU_RAYTRACING__ #define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */ #define __KERNEL_OPTIX__ #define CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h index 72ad237eeeb..3bbb7be685d 100644 --- a/intern/cycles/kernel/geom/motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h @@ -101,8 +101,8 @@ ccl_device_inline const int isect_prim, float3 verts[3]) { -# ifdef __KERNEL_OPTIX__ - /* t is always in world space with OptiX. */ +# if defined(__KERNEL_GPU_RAYTRACING__) + /* t is always in world space with OptiX and MetalRT. */ return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts); # else # ifdef __INTERSECTION_REFINE__ diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h index 57a6ae7fe72..4a7f38131da 100644 --- a/intern/cycles/kernel/geom/triangle_intersect.h +++ b/intern/cycles/kernel/geom/triangle_intersect.h @@ -227,8 +227,8 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals kg, const int isect_object, const int isect_prim) { -#ifdef __KERNEL_OPTIX__ - /* t is always in world space with OptiX. */ +#if defined(__KERNEL_GPU_RAYTRACING__) + /* t is always in world space with OptiX and MetalRT. */ return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim); #else if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index 22327268e02..cc6f5048cda 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -137,8 +137,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); - /* Transform t to world space, except for OptiX where it already is. */ -#ifdef __KERNEL_OPTIX__ + /* Transform t to world space, except for OptiX and MetalRT where it already is. */ +#ifdef __KERNEL_GPU_RAYTRACING__ (void)tfm; #else float3 D = transform_direction(&itfm, ray.D); diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index f0712758174..7a8b467e199 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -212,7 +212,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ray.dP = ray_dP; ray.dD = differential_zero_compact(); -#ifndef __KERNEL_OPTIX__ +#ifndef __KERNEL_GPU_RAYTRACING__ /* Compute or fetch object transforms. */ Transform ob_itfm ccl_optional_struct_init; Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); @@ -382,8 +382,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, hit = (ss_isect.num_hits > 0); if (hit) { -#ifdef __KERNEL_OPTIX__ - /* t is always in world space with OptiX. */ +#ifdef __KERNEL_GPU_RAYTRACING__ + /* t is always in world space with OptiX and MetalRT. */ ray.t = ss_isect.hits[0].t; #else /* Compute world space distance to surface hit. */ diff --git a/intern/cycles/kernel/textures.h b/intern/cycles/kernel/textures.h index 2e3ae29a19a..58edb239007 100644 --- a/intern/cycles/kernel/textures.h +++ b/intern/cycles/kernel/textures.h @@ -34,6 +34,7 @@ KERNEL_TEX(Transform, __object_motion_pass) KERNEL_TEX(DecomposedTransform, __object_motion) KERNEL_TEX(uint, __object_flag) KERNEL_TEX(float, __object_volume_step) +KERNEL_TEX(uint, __object_prim_offset) /* cameras */ KERNEL_TEX(DecomposedTransform, __camera_motion) diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 4a730dbfaaa..b15230e627f 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -110,9 +110,9 @@ CCL_NAMESPACE_BEGIN # define __VOLUME_RECORD_ALL__ #endif /* __KERNEL_CPU__ */ -#ifdef __KERNEL_OPTIX__ +#ifdef __KERNEL_GPU_RAYTRACING__ # undef __BAKING__ -#endif /* __KERNEL_OPTIX__ */ +#endif /* __KERNEL_GPU_RAYTRACING__ */ /* Scene-based selective features compilation. */ #ifdef __KERNEL_FEATURES__ @@ -1220,10 +1220,12 @@ typedef enum KernelBVHLayout { BVH_LAYOUT_OPTIX = (1 << 2), BVH_LAYOUT_MULTI_OPTIX = (1 << 3), BVH_LAYOUT_MULTI_OPTIX_EMBREE = (1 << 4), + BVH_LAYOUT_METAL = (1 << 5), + BVH_LAYOUT_MULTI_METAL_EMBREE = (1 << 6), /* Default BVH layout to use for CPU. */ BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE, - BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX, + BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL, } KernelBVHLayout; typedef struct KernelBVH { @@ -1238,6 +1240,8 @@ typedef struct KernelBVH { /* Custom BVH */ #ifdef __KERNEL_OPTIX__ OptixTraversableHandle scene; +#elif defined __METALRT__ + metalrt_as_type scene; #else # ifdef __EMBREE__ RTCScene scene; diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index bf426fc49f6..346b030817f 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -165,7 +165,8 @@ int Geometry::motion_step(float time) const bool Geometry::need_build_bvh(BVHLayout layout) const { return is_instanced() || layout == BVH_LAYOUT_OPTIX || layout == BVH_LAYOUT_MULTI_OPTIX || - layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE; + layout == BVH_LAYOUT_METAL || layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE || + layout == BVH_LAYOUT_MULTI_METAL_EMBREE; } bool Geometry::is_instanced() const @@ -1247,7 +1248,8 @@ void GeometryManager::device_update_bvh(Device *device, VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout."; const bool can_refit = scene->bvh != nullptr && - (bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX); + (bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX || + bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL); BVH *bvh = scene->bvh; if (!scene->bvh) { diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp index 69a2365f17c..bf224a81af5 100644 --- a/intern/cycles/scene/object.cpp +++ b/intern/cycles/scene/object.cpp @@ -530,6 +530,34 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s } } +void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene) +{ + BVHLayoutMask layout_mask = device->get_bvh_layout_mask(); + if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) { + return; + } + + /* On MetalRT, primitive / curve segment offsets can't be baked at BVH build time. Intersection + * handlers need to apply the offset manually. */ + uint *object_prim_offset = dscene->object_prim_offset.alloc(scene->objects.size()); + foreach (Object *ob, scene->objects) { + uint32_t prim_offset = 0; + if (Geometry *const geom = ob->geometry) { + if (geom->geometry_type == Geometry::HAIR) { + prim_offset = ((Hair *const)geom)->curve_segment_offset; + } + else { + prim_offset = geom->prim_offset; + } + } + uint obj_index = ob->get_device_index(); + object_prim_offset[obj_index] = prim_offset; + } + + dscene->object_prim_offset.copy_to_device(); + dscene->object_prim_offset.clear_modified(); +} + void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress) { UpdateObjectTransformState state; @@ -840,6 +868,7 @@ void ObjectManager::device_free(Device *, DeviceScene *dscene, bool force_free) dscene->object_motion.free_if_need_realloc(force_free); dscene->object_flag.free_if_need_realloc(force_free); dscene->object_volume_step.free_if_need_realloc(force_free); + dscene->object_prim_offset.free_if_need_realloc(force_free); } void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress &progress) diff --git a/intern/cycles/scene/object.h b/intern/cycles/scene/object.h index f6dc57ee8b9..f983b58b59c 100644 --- a/intern/cycles/scene/object.h +++ b/intern/cycles/scene/object.h @@ -155,6 +155,7 @@ class ObjectManager { void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); void device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress); + void device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene); void device_update_flags(Device *device, DeviceScene *dscene, diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index 4230abe9a1b..452b5215836 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -69,6 +69,7 @@ DeviceScene::DeviceScene(Device *device) object_motion(device, "__object_motion", MEM_GLOBAL), object_flag(device, "__object_flag", MEM_GLOBAL), object_volume_step(device, "__object_volume_step", MEM_GLOBAL), + object_prim_offset(device, "__object_prim_offset", MEM_GLOBAL), camera_motion(device, "__camera_motion", MEM_GLOBAL), attributes_map(device, "__attributes_map", MEM_GLOBAL), attributes_float(device, "__attributes_float", MEM_GLOBAL), @@ -312,6 +313,12 @@ void Scene::device_update(Device *device_, Progress &progress) if (progress.get_cancel() || device->have_error()) return; + progress.set_status("Updating Primitive Offsets"); + object_manager->device_update_prim_offsets(device, &dscene, this); + + if (progress.get_cancel() || device->have_error()) + return; + progress.set_status("Updating Images"); image_manager->device_update(device, this, progress); diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index 4af05349dd3..f8f672a079a 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -100,6 +100,7 @@ class DeviceScene { device_vector<DecomposedTransform> object_motion; device_vector<uint> object_flag; device_vector<float> object_volume_step; + device_vector<uint> object_prim_offset; /* cameras */ device_vector<DecomposedTransform> camera_motion; diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 1a0213f2a6d..74f1c98e649 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -233,7 +233,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f) return a = a * invf; } -#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) +# if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b) { a = float3(a) * b; @@ -257,7 +257,7 @@ ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f) a = float3(a) / f; return a; } -#endif +# endif ccl_device_inline bool operator==(const float3 &a, const float3 &b) { diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 1d78dfd1385..80cd37d35e2 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -366,10 +366,10 @@ ccl_device_inline Transform transform_empty() ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) { - /* Optix is using lerp to interpolate motion transformations. */ -#ifdef __KERNEL_OPTIX__ + /* Optix and MetalRT are using lerp to interpolate motion transformations. */ +#if defined(__KERNEL_GPU_RAYTRACING__) return normalize((1.0f - t) * q1 + t * q2); -#else /* __KERNEL_OPTIX__ */ +#else /* defined(__KERNEL_GPU_RAYTRACING__) */ /* note: this does not ensure rotation around shortest angle, q1 and q2 * are assumed to be matched already in transform_motion_decompose */ float costheta = dot(q1, q2); @@ -387,7 +387,7 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) float thetap = theta * t; return q1 * cosf(thetap) + qperp * sinf(thetap); } -#endif /* __KERNEL_OPTIX__ */ +#endif /* defined(__KERNEL_GPU_RAYTRACING__) */ } ccl_device_inline Transform transform_quick_inverse(Transform M) |