Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/metal/bvh.mm')
-rw-r--r--intern/cycles/device/metal/bvh.mm813
1 files changed, 813 insertions, 0 deletions
diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm
new file mode 100644
index 00000000000..1953102cb41
--- /dev/null
+++ b/intern/cycles/device/metal/bvh.mm
@@ -0,0 +1,813 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_METAL
+
+# include "scene/hair.h"
+# include "scene/mesh.h"
+# include "scene/object.h"
+
+# include "util/progress.h"
+
+# include "device/metal/bvh.h"
+
+CCL_NAMESPACE_BEGIN
+
+# define BVH_status(...) \
+ { \
+ string str = string_printf(__VA_ARGS__); \
+ progress.set_substatus(str); \
+ }
+
+BVHMetal::BVHMetal(const BVHParams &params_,
+ const vector<Geometry *> &geometry_,
+ const vector<Object *> &objects_,
+ Device *device)
+ : BVH(params_, geometry_, objects_), stats(device->stats)
+{
+}
+
+BVHMetal::~BVHMetal()
+{
+ if (@available(macos 12.0, *)) {
+ if (accel_struct) {
+ stats.mem_free(accel_struct.allocatedSize);
+ [accel_struct release];
+ }
+ }
+}
+
+bool BVHMetal::build_BLAS_mesh(Progress &progress,
+ id<MTLDevice> device,
+ id<MTLCommandQueue> queue,
+ Geometry *const geom,
+ bool refit)
+{
+ if (@available(macos 12.0, *)) {
+ /* Build BLAS for triangle primitives */
+ Mesh *const mesh = static_cast<Mesh *const>(geom);
+ if (mesh->num_triangles() == 0) {
+ return false;
+ }
+
+ /*------------------------------------------------*/
+ BVH_status(
+ "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
+ /*------------------------------------------------*/
+
+ const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
+
+ const array<float3> &verts = mesh->get_verts();
+ const array<int> &tris = mesh->get_triangles();
+ const size_t num_verts = verts.size();
+ const size_t num_indices = tris.size();
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
+ num_motion_steps = mesh->get_motion_steps();
+ }
+
+ MTLResourceOptions storage_mode;
+ if (device.hasUnifiedMemory) {
+ storage_mode = MTLResourceStorageModeShared;
+ }
+ else {
+ storage_mode = MTLResourceStorageModeManaged;
+ }
+
+ /* Upload the mesh data to the GPU */
+ id<MTLBuffer> posBuf = nil;
+ id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data()
+ length:num_indices * sizeof(tris.data()[0])
+ options:storage_mode];
+
+ if (num_motion_steps == 1) {
+ posBuf = [device newBufferWithBytes:verts.data()
+ length:num_verts * sizeof(verts.data()[0])
+ options:storage_mode];
+ }
+ else {
+ posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
+ options:storage_mode];
+ float3 *dest_data = (float3 *)[posBuf contents];
+ size_t center_step = (num_motion_steps - 1) / 2;
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ const float3 *verts = mesh->get_verts().data();
+
+ /* The center step for motion vertices is not stored in the attribute. */
+ if (step != center_step) {
+ verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
+ }
+ memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
+ }
+ if (storage_mode == MTLResourceStorageModeManaged) {
+ [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
+ }
+ }
+
+ /* Create an acceleration structure. */
+ MTLAccelerationStructureGeometryDescriptor *geomDesc;
+ if (num_motion_steps > 1) {
+ std::vector<MTLMotionKeyframeData *> vertex_ptrs;
+ vertex_ptrs.reserve(num_motion_steps);
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
+ k.buffer = posBuf;
+ k.offset = num_verts * step * sizeof(float3);
+ vertex_ptrs.push_back(k);
+ }
+
+ MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
+ [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
+ geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
+ count:vertex_ptrs.size()];
+ geomDescMotion.vertexStride = sizeof(verts.data()[0]);
+ geomDescMotion.indexBuffer = indexBuf;
+ geomDescMotion.indexBufferOffset = 0;
+ geomDescMotion.indexType = MTLIndexTypeUInt32;
+ geomDescMotion.triangleCount = num_indices / 3;
+ geomDescMotion.intersectionFunctionTableOffset = 0;
+
+ geomDesc = geomDescMotion;
+ }
+ else {
+ MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
+ [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
+ geomDescNoMotion.vertexBuffer = posBuf;
+ geomDescNoMotion.vertexBufferOffset = 0;
+ geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
+ geomDescNoMotion.indexBuffer = indexBuf;
+ geomDescNoMotion.indexBufferOffset = 0;
+ geomDescNoMotion.indexType = MTLIndexTypeUInt32;
+ geomDescNoMotion.triangleCount = num_indices / 3;
+ geomDescNoMotion.intersectionFunctionTableOffset = 0;
+
+ geomDesc = geomDescNoMotion;
+ }
+
+ /* Force a single any-hit call, so shadow record-all behavior works correctly */
+ /* (Match optix behavior: unsigned int build_flags =
+ * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
+ geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
+
+ MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
+ [MTLPrimitiveAccelerationStructureDescriptor descriptor];
+ accelDesc.geometryDescriptors = @[ geomDesc ];
+ if (num_motion_steps > 1) {
+ accelDesc.motionStartTime = 0.0f;
+ accelDesc.motionEndTime = 1.0f;
+ accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
+ accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
+ accelDesc.motionKeyframeCount = num_motion_steps;
+ }
+
+ if (!use_fast_trace_bvh) {
+ accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
+ MTLAccelerationStructureUsagePreferFastBuild);
+ }
+
+ MTLAccelerationStructureSizes accelSizes = [device
+ accelerationStructureSizesWithDescriptor:accelDesc];
+ id<MTLAccelerationStructure> accel_uncompressed = [device
+ newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
+ id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
+ options:MTLResourceStorageModePrivate];
+ id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
+ id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
+ id<MTLAccelerationStructureCommandEncoder> accelEnc =
+ [accelCommands accelerationStructureCommandEncoder];
+ if (refit) {
+ [accelEnc refitAccelerationStructure:accel_struct
+ descriptor:accelDesc
+ destination:accel_uncompressed
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ else {
+ [accelEnc buildAccelerationStructure:accel_uncompressed
+ descriptor:accelDesc
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ if (use_fast_trace_bvh) {
+ [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
+ toBuffer:sizeBuf
+ offset:0
+ sizeDataType:MTLDataTypeULong];
+ }
+ [accelEnc endEncoding];
+ [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
+ /* free temp resources */
+ [scratchBuf release];
+ [indexBuf release];
+ [posBuf release];
+
+ if (use_fast_trace_bvh) {
+ /* Compact the accel structure */
+ uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
+
+ dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+ id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
+ id<MTLAccelerationStructureCommandEncoder> accelEnc =
+ [accelCommands accelerationStructureCommandEncoder];
+ id<MTLAccelerationStructure> accel = [device
+ newAccelerationStructureWithSize:compressed_size];
+ [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
+ toAccelerationStructure:accel];
+ [accelEnc endEncoding];
+ [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
+ uint64_t allocated_size = [accel allocatedSize];
+ stats.mem_alloc(allocated_size);
+ accel_struct = accel;
+ [accel_uncompressed release];
+ accel_struct_building = false;
+ }];
+ [accelCommands commit];
+ });
+ }
+ else {
+ /* set our acceleration structure to the uncompressed structure */
+ accel_struct = accel_uncompressed;
+
+ uint64_t allocated_size = [accel_struct allocatedSize];
+ stats.mem_alloc(allocated_size);
+ accel_struct_building = false;
+ }
+ [sizeBuf release];
+ }];
+
+ accel_struct_building = true;
+ [accelCommands commit];
+
+ return true;
+ }
+ return false;
+}
+
+bool BVHMetal::build_BLAS_hair(Progress &progress,
+ id<MTLDevice> device,
+ id<MTLCommandQueue> queue,
+ Geometry *const geom,
+ bool refit)
+{
+ if (@available(macos 12.0, *)) {
+ /* Build BLAS for hair curves */
+ Hair *hair = static_cast<Hair *>(geom);
+ if (hair->num_curves() == 0) {
+ return false;
+ }
+
+ /*------------------------------------------------*/
+ BVH_status(
+ "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
+ /*------------------------------------------------*/
+
+ const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
+ const size_t num_segments = hair->num_segments();
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
+ num_motion_steps = hair->get_motion_steps();
+ }
+
+ const size_t num_aabbs = num_segments * num_motion_steps;
+
+ MTLResourceOptions storage_mode;
+ if (device.hasUnifiedMemory) {
+ storage_mode = MTLResourceStorageModeShared;
+ }
+ else {
+ storage_mode = MTLResourceStorageModeManaged;
+ }
+
+ /* Allocate a GPU buffer for the AABB data and populate it */
+ id<MTLBuffer> aabbBuf = [device
+ newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
+ options:storage_mode];
+ MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
+
+ /* Get AABBs for each motion step */
+ size_t center_step = (num_motion_steps - 1) / 2;
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ /* The center step for motion vertices is not stored in the attribute */
+ const float3 *keys = hair->get_curve_keys().data();
+ if (step != center_step) {
+ size_t attr_offset = (step > center_step) ? step - 1 : step;
+ /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */
+ keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
+ }
+
+ for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
+ const Hair::Curve curve = hair->get_curve(j);
+
+ for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
+ {
+ BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
+
+ const size_t index = step * num_segments + i;
+ aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
+ aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
+ }
+ }
+ }
+ }
+
+ if (storage_mode == MTLResourceStorageModeManaged) {
+ [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
+ }
+
+# if 0
+ for (size_t i=0; i<num_aabbs && i < 400; i++) {
+ MTLAxisAlignedBoundingBox& bb = aabb_data[i];
+ printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
+ }
+# endif
+
+ MTLAccelerationStructureGeometryDescriptor *geomDesc;
+ if (motion_blur) {
+ std::vector<MTLMotionKeyframeData *> aabb_ptrs;
+ aabb_ptrs.reserve(num_motion_steps);
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
+ k.buffer = aabbBuf;
+ k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox);
+ aabb_ptrs.push_back(k);
+ }
+
+ MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
+ [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
+ geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
+ count:aabb_ptrs.size()];
+ geomDescMotion.boundingBoxCount = num_segments;
+ geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
+ geomDescMotion.intersectionFunctionTableOffset = 1;
+
+ /* Force a single any-hit call, so shadow record-all behavior works correctly */
+ /* (Match optix behavior: unsigned int build_flags =
+ * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
+ geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
+ geomDescMotion.opaque = true;
+ geomDesc = geomDescMotion;
+ }
+ else {
+ MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
+ [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
+ geomDescNoMotion.boundingBoxBuffer = aabbBuf;
+ geomDescNoMotion.boundingBoxBufferOffset = 0;
+ geomDescNoMotion.boundingBoxCount = int(num_aabbs);
+ geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
+ geomDescNoMotion.intersectionFunctionTableOffset = 1;
+
+ /* Force a single any-hit call, so shadow record-all behavior works correctly */
+ /* (Match optix behavior: unsigned int build_flags =
+ * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
+ geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
+ geomDescNoMotion.opaque = true;
+ geomDesc = geomDescNoMotion;
+ }
+
+ MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
+ [MTLPrimitiveAccelerationStructureDescriptor descriptor];
+ accelDesc.geometryDescriptors = @[ geomDesc ];
+
+ if (motion_blur) {
+ accelDesc.motionStartTime = 0.0f;
+ accelDesc.motionEndTime = 1.0f;
+ accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
+ accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
+ accelDesc.motionKeyframeCount = num_motion_steps;
+ }
+
+ if (!use_fast_trace_bvh) {
+ accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
+ MTLAccelerationStructureUsagePreferFastBuild);
+ }
+
+ MTLAccelerationStructureSizes accelSizes = [device
+ accelerationStructureSizesWithDescriptor:accelDesc];
+ id<MTLAccelerationStructure> accel_uncompressed = [device
+ newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
+ id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
+ options:MTLResourceStorageModePrivate];
+ id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
+ id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
+ id<MTLAccelerationStructureCommandEncoder> accelEnc =
+ [accelCommands accelerationStructureCommandEncoder];
+ if (refit) {
+ [accelEnc refitAccelerationStructure:accel_struct
+ descriptor:accelDesc
+ destination:accel_uncompressed
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ else {
+ [accelEnc buildAccelerationStructure:accel_uncompressed
+ descriptor:accelDesc
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ if (use_fast_trace_bvh) {
+ [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
+ toBuffer:sizeBuf
+ offset:0
+ sizeDataType:MTLDataTypeULong];
+ }
+ [accelEnc endEncoding];
+ [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
+ /* free temp resources */
+ [scratchBuf release];
+ [aabbBuf release];
+
+ if (use_fast_trace_bvh) {
+ /* Compact the accel structure */
+ uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
+
+ dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+ id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
+ id<MTLAccelerationStructureCommandEncoder> accelEnc =
+ [accelCommands accelerationStructureCommandEncoder];
+ id<MTLAccelerationStructure> accel = [device
+ newAccelerationStructureWithSize:compressed_size];
+ [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
+ toAccelerationStructure:accel];
+ [accelEnc endEncoding];
+ [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
+ uint64_t allocated_size = [accel allocatedSize];
+ stats.mem_alloc(allocated_size);
+ accel_struct = accel;
+ [accel_uncompressed release];
+ accel_struct_building = false;
+ }];
+ [accelCommands commit];
+ });
+ }
+ else {
+ /* set our acceleration structure to the uncompressed structure */
+ accel_struct = accel_uncompressed;
+
+ uint64_t allocated_size = [accel_struct allocatedSize];
+ stats.mem_alloc(allocated_size);
+ accel_struct_building = false;
+ }
+ [sizeBuf release];
+ }];
+
+ accel_struct_building = true;
+ [accelCommands commit];
+ return true;
+ }
+ return false;
+}
+
+bool BVHMetal::build_BLAS(Progress &progress,
+ id<MTLDevice> device,
+ id<MTLCommandQueue> queue,
+ bool refit)
+{
+ if (@available(macos 12.0, *)) {
+ assert(objects.size() == 1 && geometry.size() == 1);
+
+ /* Build bottom level acceleration structures (BLAS) */
+ Geometry *const geom = geometry[0];
+ switch (geom->geometry_type) {
+ case Geometry::VOLUME:
+ case Geometry::MESH:
+ return build_BLAS_mesh(progress, device, queue, geom, refit);
+ case Geometry::HAIR:
+ return build_BLAS_hair(progress, device, queue, geom, refit);
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+bool BVHMetal::build_TLAS(Progress &progress,
+ id<MTLDevice> device,
+ id<MTLCommandQueue> queue,
+ bool refit)
+{
+ if (@available(macos 12.0, *)) {
+
+ /* we need to sync here and ensure that all BLAS have completed async generation by both GCD
+ * and Metal */
+ {
+ __block bool complete_bvh = false;
+ while (!complete_bvh) {
+ dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+ complete_bvh = true;
+ for (Object *ob : objects) {
+ /* Skip non-traceable objects */
+ if (!ob->is_traceable())
+ continue;
+
+ Geometry const *geom = ob->get_geometry();
+ BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
+ if (blas->accel_struct_building) {
+ complete_bvh = false;
+
+ /* We're likely waiting on a command buffer that's in flight to complete.
+ * Queue up a command buffer and wait for it complete before checking the BLAS again
+ */
+ id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
+ [command_buffer commit];
+ [command_buffer waitUntilCompleted];
+ break;
+ }
+ }
+ });
+ }
+ }
+
+ uint32_t num_instances = 0;
+ uint32_t num_motion_transforms = 0;
+ for (Object *ob : objects) {
+ /* Skip non-traceable objects */
+ if (!ob->is_traceable())
+ continue;
+ num_instances++;
+
+ if (ob->use_motion()) {
+ num_motion_transforms += max(1, ob->get_motion().size());
+ }
+ else {
+ num_motion_transforms++;
+ }
+ }
+
+ /*------------------------------------------------*/
+ BVH_status("Building TLAS | %7d instances", (int)num_instances);
+ /*------------------------------------------------*/
+
+ const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
+
+ NSMutableArray *all_blas = [NSMutableArray array];
+ unordered_map<BVHMetal const *, int> instance_mapping;
+
+ /* Lambda function to build/retrieve the BLAS index mapping */
+ auto get_blas_index = [&](BVHMetal const *blas) {
+ auto it = instance_mapping.find(blas);
+ if (it != instance_mapping.end()) {
+ return it->second;
+ }
+ else {
+ int blas_index = (int)[all_blas count];
+ instance_mapping[blas] = blas_index;
+ if (@available(macos 12.0, *)) {
+ [all_blas addObject:blas->accel_struct];
+ }
+ return blas_index;
+ }
+ };
+
+ MTLResourceOptions storage_mode;
+ if (device.hasUnifiedMemory) {
+ storage_mode = MTLResourceStorageModeShared;
+ }
+ else {
+ storage_mode = MTLResourceStorageModeManaged;
+ }
+
+ size_t instance_size;
+ if (motion_blur) {
+ instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
+ }
+ else {
+ instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
+ }
+
+ /* Allocate a GPU buffer for the instance data and populate it */
+ id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size
+ options:storage_mode];
+ id<MTLBuffer> motion_transforms_buf = nil;
+ MTLPackedFloat4x3 *motion_transforms = nullptr;
+ if (motion_blur && num_motion_transforms) {
+ motion_transforms_buf = [device
+ newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
+ options:storage_mode];
+ motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
+ }
+
+ uint32_t instance_index = 0;
+ uint32_t motion_transform_index = 0;
+ for (Object *ob : objects) {
+ /* Skip non-traceable objects */
+ if (!ob->is_traceable())
+ continue;
+
+ Geometry const *geom = ob->get_geometry();
+
+ BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
+ uint32_t accel_struct_index = get_blas_index(blas);
+
+ /* Add some of the object visibility bits to the mask.
+ * __prim_visibility contains the combined visibility bits of all instances, so is not
+ * reliable if they differ between instances.
+ *
+ * METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here
+ * and select just a few important ones.
+ */
+ uint32_t mask = ob->visibility_for_tracing() & 0xFF;
+
+ /* Have to have at least one bit in the mask, or else instance would always be culled. */
+ if (0 == mask) {
+ mask = 0xFF;
+ }
+
+ /* Set user instance ID to object index */
+ int object_index = ob->get_device_index();
+ uint32_t user_id = uint32_t(object_index);
+
+ /* Bake into the appropriate descriptor */
+ if (motion_blur) {
+ MTLAccelerationStructureMotionInstanceDescriptor *instances =
+ (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
+ MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
+
+ desc.accelerationStructureIndex = accel_struct_index;
+ desc.userID = user_id;
+ desc.mask = mask;
+ desc.motionStartTime = 0.0f;
+ desc.motionEndTime = 1.0f;
+ desc.motionTransformsStartIndex = motion_transform_index;
+ desc.motionStartBorderMode = MTLMotionBorderModeVanish;
+ desc.motionEndBorderMode = MTLMotionBorderModeVanish;
+ desc.intersectionFunctionTableOffset = 0;
+
+ int key_count = ob->get_motion().size();
+ if (key_count) {
+ desc.motionTransformsCount = key_count;
+
+ Transform *keys = ob->get_motion().data();
+ for (int i = 0; i < key_count; i++) {
+ float *t = (float *)&motion_transforms[motion_transform_index++];
+ /* Transpose transform */
+ auto src = (float const *)&keys[i];
+ for (int i = 0; i < 12; i++) {
+ t[i] = src[(i / 3) + 4 * (i % 3)];
+ }
+ }
+ }
+ else {
+ desc.motionTransformsCount = 1;
+
+ float *t = (float *)&motion_transforms[motion_transform_index++];
+ if (ob->get_geometry()->is_instanced()) {
+ /* Transpose transform */
+ auto src = (float const *)&ob->get_tfm();
+ for (int i = 0; i < 12; i++) {
+ t[i] = src[(i / 3) + 4 * (i % 3)];
+ }
+ }
+ else {
+ /* Clear transform to identity matrix */
+ t[0] = t[4] = t[8] = 1.0f;
+ }
+ }
+ }
+ else {
+ MTLAccelerationStructureUserIDInstanceDescriptor *instances =
+ (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
+ MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
+
+ desc.accelerationStructureIndex = accel_struct_index;
+ desc.userID = user_id;
+ desc.mask = mask;
+ desc.intersectionFunctionTableOffset = 0;
+
+ float *t = (float *)&desc.transformationMatrix;
+ if (ob->get_geometry()->is_instanced()) {
+ /* Transpose transform */
+ auto src = (float const *)&ob->get_tfm();
+ for (int i = 0; i < 12; i++) {
+ t[i] = src[(i / 3) + 4 * (i % 3)];
+ }
+ }
+ else {
+ /* Clear transform to identity matrix */
+ t[0] = t[4] = t[8] = 1.0f;
+ }
+ }
+ }
+
+ if (storage_mode == MTLResourceStorageModeManaged) {
+ [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
+ if (motion_transforms_buf) {
+ [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
+ assert(num_motion_transforms == motion_transform_index);
+ }
+ }
+
+ MTLInstanceAccelerationStructureDescriptor *accelDesc =
+ [MTLInstanceAccelerationStructureDescriptor descriptor];
+ accelDesc.instanceCount = num_instances;
+ accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
+ accelDesc.instanceDescriptorBuffer = instanceBuf;
+ accelDesc.instanceDescriptorBufferOffset = 0;
+ accelDesc.instanceDescriptorStride = instance_size;
+ accelDesc.instancedAccelerationStructures = all_blas;
+
+ if (motion_blur) {
+ accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
+ accelDesc.motionTransformBuffer = motion_transforms_buf;
+ accelDesc.motionTransformCount = num_motion_transforms;
+ }
+
+ if (!use_fast_trace_bvh) {
+ accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
+ MTLAccelerationStructureUsagePreferFastBuild);
+ }
+
+ MTLAccelerationStructureSizes accelSizes = [device
+ accelerationStructureSizesWithDescriptor:accelDesc];
+ id<MTLAccelerationStructure> accel = [device
+ newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
+ id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
+ options:MTLResourceStorageModePrivate];
+ id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
+ id<MTLAccelerationStructureCommandEncoder> accelEnc =
+ [accelCommands accelerationStructureCommandEncoder];
+ if (refit) {
+ [accelEnc refitAccelerationStructure:accel_struct
+ descriptor:accelDesc
+ destination:accel
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ else {
+ [accelEnc buildAccelerationStructure:accel
+ descriptor:accelDesc
+ scratchBuffer:scratchBuf
+ scratchBufferOffset:0];
+ }
+ [accelEnc endEncoding];
+ [accelCommands commit];
+ [accelCommands waitUntilCompleted];
+
+ if (motion_transforms_buf) {
+ [motion_transforms_buf release];
+ }
+ [instanceBuf release];
+ [scratchBuf release];
+
+ uint64_t allocated_size = [accel allocatedSize];
+ stats.mem_alloc(allocated_size);
+
+ /* Cache top and bottom-level acceleration structs */
+ accel_struct = accel;
+ blas_array.clear();
+ blas_array.reserve(all_blas.count);
+ for (id<MTLAccelerationStructure> blas in all_blas) {
+ blas_array.push_back(blas);
+ }
+
+ return true;
+ }
+ return false;
+}
+
+bool BVHMetal::build(Progress &progress,
+ id<MTLDevice> device,
+ id<MTLCommandQueue> queue,
+ bool refit)
+{
+ if (@available(macos 12.0, *)) {
+ if (refit && params.bvh_type != BVH_TYPE_STATIC) {
+ assert(accel_struct);
+ }
+ else {
+ if (accel_struct) {
+ stats.mem_free(accel_struct.allocatedSize);
+ [accel_struct release];
+ accel_struct = nil;
+ }
+ }
+ }
+
+ if (!params.top_level) {
+ return build_BLAS(progress, device, queue, refit);
+ }
+ else {
+ return build_TLAS(progress, device, queue, refit);
+ }
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_METAL */