diff options
author | Siddhartha Jejurkar <f20180617@goa.bits-pilani.ac.in> | 2021-12-17 16:01:32 +0300 |
---|---|---|
committer | Siddhartha Jejurkar <f20180617@goa.bits-pilani.ac.in> | 2021-12-17 16:01:32 +0300 |
commit | dbc41b30f88b96f7d8c6e995b17f5930eb55cc77 (patch) | |
tree | c6c495328443ea3621e5df2ef483b0e0dd504496 /intern/cycles/device/metal/bvh.mm | |
parent | 99a2af76d10e05a18987be5d554ada197b1ca086 (diff) | |
parent | 7c9e4099854a4fc8eab4db97173c1aacd25f9e08 (diff) |
Merge branch 'master' into soc-2021-uv-edge-select-supportsoc-2021-uv-edge-select-support
Diffstat (limited to 'intern/cycles/device/metal/bvh.mm')
-rw-r--r-- | intern/cycles/device/metal/bvh.mm | 813 |
1 files changed, 813 insertions, 0 deletions
diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm new file mode 100644 index 00000000000..1953102cb41 --- /dev/null +++ b/intern/cycles/device/metal/bvh.mm @@ -0,0 +1,813 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_METAL + +# include "scene/hair.h" +# include "scene/mesh.h" +# include "scene/object.h" + +# include "util/progress.h" + +# include "device/metal/bvh.h" + +CCL_NAMESPACE_BEGIN + +# define BVH_status(...) \ + { \ + string str = string_printf(__VA_ARGS__); \ + progress.set_substatus(str); \ + } + +BVHMetal::BVHMetal(const BVHParams ¶ms_, + const vector<Geometry *> &geometry_, + const vector<Object *> &objects_, + Device *device) + : BVH(params_, geometry_, objects_), stats(device->stats) +{ +} + +BVHMetal::~BVHMetal() +{ + if (@available(macos 12.0, *)) { + if (accel_struct) { + stats.mem_free(accel_struct.allocatedSize); + [accel_struct release]; + } + } +} + +bool BVHMetal::build_BLAS_mesh(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + Geometry *const geom, + bool refit) +{ + if (@available(macos 12.0, *)) { + /* Build BLAS for triangle primitives */ + Mesh *const mesh = static_cast<Mesh *const>(geom); + if (mesh->num_triangles() == 0) { + return false; + } + + /*------------------------------------------------*/ + BVH_status( + "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str()); + /*------------------------------------------------*/ + + const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); + + const array<float3> &verts = mesh->get_verts(); + const array<int> &tris = mesh->get_triangles(); + const size_t num_verts = verts.size(); + const size_t num_indices = tris.size(); + + size_t num_motion_steps = 1; + Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && mesh->get_use_motion_blur() && motion_keys) { + num_motion_steps = mesh->get_motion_steps(); + } + + MTLResourceOptions storage_mode; + if (device.hasUnifiedMemory) { + storage_mode = MTLResourceStorageModeShared; + } + else { + storage_mode = MTLResourceStorageModeManaged; + } + + /* Upload the mesh data to the GPU */ + id<MTLBuffer> posBuf = nil; + id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data() + length:num_indices * sizeof(tris.data()[0]) + options:storage_mode]; + + if (num_motion_steps == 1) { + posBuf = [device newBufferWithBytes:verts.data() + length:num_verts * sizeof(verts.data()[0]) + options:storage_mode]; + } + else { + posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0]) + options:storage_mode]; + float3 *dest_data = (float3 *)[posBuf contents]; + size_t center_step = (num_motion_steps - 1) / 2; + for (size_t step = 0; step < num_motion_steps; ++step) { + const float3 *verts = mesh->get_verts().data(); + + /* The center step for motion vertices is not stored in the attribute. */ + if (step != center_step) { + verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts; + } + memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3)); + } + if (storage_mode == MTLResourceStorageModeManaged) { + [posBuf didModifyRange:NSMakeRange(0, posBuf.length)]; + } + } + + /* Create an acceleration structure. */ + MTLAccelerationStructureGeometryDescriptor *geomDesc; + if (num_motion_steps > 1) { + std::vector<MTLMotionKeyframeData *> vertex_ptrs; + vertex_ptrs.reserve(num_motion_steps); + for (size_t step = 0; step < num_motion_steps; ++step) { + MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; + k.buffer = posBuf; + k.offset = num_verts * step * sizeof(float3); + vertex_ptrs.push_back(k); + } + + MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion = + [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor]; + geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data() + count:vertex_ptrs.size()]; + geomDescMotion.vertexStride = sizeof(verts.data()[0]); + geomDescMotion.indexBuffer = indexBuf; + geomDescMotion.indexBufferOffset = 0; + geomDescMotion.indexType = MTLIndexTypeUInt32; + geomDescMotion.triangleCount = num_indices / 3; + geomDescMotion.intersectionFunctionTableOffset = 0; + + geomDesc = geomDescMotion; + } + else { + MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion = + [MTLAccelerationStructureTriangleGeometryDescriptor descriptor]; + geomDescNoMotion.vertexBuffer = posBuf; + geomDescNoMotion.vertexBufferOffset = 0; + geomDescNoMotion.vertexStride = sizeof(verts.data()[0]); + geomDescNoMotion.indexBuffer = indexBuf; + geomDescNoMotion.indexBufferOffset = 0; + geomDescNoMotion.indexType = MTLIndexTypeUInt32; + geomDescNoMotion.triangleCount = num_indices / 3; + geomDescNoMotion.intersectionFunctionTableOffset = 0; + + geomDesc = geomDescNoMotion; + } + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDesc.allowDuplicateIntersectionFunctionInvocation = false; + + MTLPrimitiveAccelerationStructureDescriptor *accelDesc = + [MTLPrimitiveAccelerationStructureDescriptor descriptor]; + accelDesc.geometryDescriptors = @[ geomDesc ]; + if (num_motion_steps > 1) { + accelDesc.motionStartTime = 0.0f; + accelDesc.motionEndTime = 1.0f; + accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp; + accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp; + accelDesc.motionKeyframeCount = num_motion_steps; + } + + if (!use_fast_trace_bvh) { + accelDesc.usage |= (MTLAccelerationStructureUsageRefit | + MTLAccelerationStructureUsagePreferFastBuild); + } + + MTLAccelerationStructureSizes accelSizes = [device + accelerationStructureSizesWithDescriptor:accelDesc]; + id<MTLAccelerationStructure> accel_uncompressed = [device + newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; + id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + if (refit) { + [accelEnc refitAccelerationStructure:accel_struct + descriptor:accelDesc + destination:accel_uncompressed + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + else { + [accelEnc buildAccelerationStructure:accel_uncompressed + descriptor:accelDesc + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + if (use_fast_trace_bvh) { + [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed + toBuffer:sizeBuf + offset:0 + sizeDataType:MTLDataTypeULong]; + } + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + /* free temp resources */ + [scratchBuf release]; + [indexBuf release]; + [posBuf release]; + + if (use_fast_trace_bvh) { + /* Compact the accel structure */ + uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; + + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + id<MTLAccelerationStructure> accel = [device + newAccelerationStructureWithSize:compressed_size]; + [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed + toAccelerationStructure:accel]; + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + uint64_t allocated_size = [accel allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct = accel; + [accel_uncompressed release]; + accel_struct_building = false; + }]; + [accelCommands commit]; + }); + } + else { + /* set our acceleration structure to the uncompressed structure */ + accel_struct = accel_uncompressed; + + uint64_t allocated_size = [accel_struct allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct_building = false; + } + [sizeBuf release]; + }]; + + accel_struct_building = true; + [accelCommands commit]; + + return true; + } + return false; +} + +bool BVHMetal::build_BLAS_hair(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + Geometry *const geom, + bool refit) +{ + if (@available(macos 12.0, *)) { + /* Build BLAS for hair curves */ + Hair *hair = static_cast<Hair *>(geom); + if (hair->num_curves() == 0) { + return false; + } + + /*------------------------------------------------*/ + BVH_status( + "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str()); + /*------------------------------------------------*/ + + const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); + const size_t num_segments = hair->num_segments(); + + size_t num_motion_steps = 1; + Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && hair->get_use_motion_blur() && motion_keys) { + num_motion_steps = hair->get_motion_steps(); + } + + const size_t num_aabbs = num_segments * num_motion_steps; + + MTLResourceOptions storage_mode; + if (device.hasUnifiedMemory) { + storage_mode = MTLResourceStorageModeShared; + } + else { + storage_mode = MTLResourceStorageModeManaged; + } + + /* Allocate a GPU buffer for the AABB data and populate it */ + id<MTLBuffer> aabbBuf = [device + newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) + options:storage_mode]; + MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; + + /* Get AABBs for each motion step */ + size_t center_step = (num_motion_steps - 1) / 2; + for (size_t step = 0; step < num_motion_steps; ++step) { + /* The center step for motion vertices is not stored in the attribute */ + const float3 *keys = hair->get_curve_keys().data(); + if (step != center_step) { + size_t attr_offset = (step > center_step) ? step - 1 : step; + /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */ + keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size(); + } + + for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { + const Hair::Curve curve = hair->get_curve(j); + + for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { + { + BoundBox bounds = BoundBox::empty; + curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds); + + const size_t index = step * num_segments + i; + aabb_data[index].min = (MTLPackedFloat3 &)bounds.min; + aabb_data[index].max = (MTLPackedFloat3 &)bounds.max; + } + } + } + } + + if (storage_mode == MTLResourceStorageModeManaged) { + [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)]; + } + +# if 0 + for (size_t i=0; i<num_aabbs && i < 400; i++) { + MTLAxisAlignedBoundingBox& bb = aabb_data[i]; + printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z); + } +# endif + + MTLAccelerationStructureGeometryDescriptor *geomDesc; + if (motion_blur) { + std::vector<MTLMotionKeyframeData *> aabb_ptrs; + aabb_ptrs.reserve(num_motion_steps); + for (size_t step = 0; step < num_motion_steps; ++step) { + MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; + k.buffer = aabbBuf; + k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox); + aabb_ptrs.push_back(k); + } + + MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion = + [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor]; + geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data() + count:aabb_ptrs.size()]; + geomDescMotion.boundingBoxCount = num_segments; + geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescMotion.intersectionFunctionTableOffset = 1; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescMotion.opaque = true; + geomDesc = geomDescMotion; + } + else { + MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion = + [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor]; + geomDescNoMotion.boundingBoxBuffer = aabbBuf; + geomDescNoMotion.boundingBoxBufferOffset = 0; + geomDescNoMotion.boundingBoxCount = int(num_aabbs); + geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescNoMotion.intersectionFunctionTableOffset = 1; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescNoMotion.opaque = true; + geomDesc = geomDescNoMotion; + } + + MTLPrimitiveAccelerationStructureDescriptor *accelDesc = + [MTLPrimitiveAccelerationStructureDescriptor descriptor]; + accelDesc.geometryDescriptors = @[ geomDesc ]; + + if (motion_blur) { + accelDesc.motionStartTime = 0.0f; + accelDesc.motionEndTime = 1.0f; + accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionKeyframeCount = num_motion_steps; + } + + if (!use_fast_trace_bvh) { + accelDesc.usage |= (MTLAccelerationStructureUsageRefit | + MTLAccelerationStructureUsagePreferFastBuild); + } + + MTLAccelerationStructureSizes accelSizes = [device + accelerationStructureSizesWithDescriptor:accelDesc]; + id<MTLAccelerationStructure> accel_uncompressed = [device + newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; + id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + if (refit) { + [accelEnc refitAccelerationStructure:accel_struct + descriptor:accelDesc + destination:accel_uncompressed + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + else { + [accelEnc buildAccelerationStructure:accel_uncompressed + descriptor:accelDesc + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + if (use_fast_trace_bvh) { + [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed + toBuffer:sizeBuf + offset:0 + sizeDataType:MTLDataTypeULong]; + } + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + /* free temp resources */ + [scratchBuf release]; + [aabbBuf release]; + + if (use_fast_trace_bvh) { + /* Compact the accel structure */ + uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; + + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + id<MTLAccelerationStructure> accel = [device + newAccelerationStructureWithSize:compressed_size]; + [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed + toAccelerationStructure:accel]; + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + uint64_t allocated_size = [accel allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct = accel; + [accel_uncompressed release]; + accel_struct_building = false; + }]; + [accelCommands commit]; + }); + } + else { + /* set our acceleration structure to the uncompressed structure */ + accel_struct = accel_uncompressed; + + uint64_t allocated_size = [accel_struct allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct_building = false; + } + [sizeBuf release]; + }]; + + accel_struct_building = true; + [accelCommands commit]; + return true; + } + return false; +} + +bool BVHMetal::build_BLAS(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + bool refit) +{ + if (@available(macos 12.0, *)) { + assert(objects.size() == 1 && geometry.size() == 1); + + /* Build bottom level acceleration structures (BLAS) */ + Geometry *const geom = geometry[0]; + switch (geom->geometry_type) { + case Geometry::VOLUME: + case Geometry::MESH: + return build_BLAS_mesh(progress, device, queue, geom, refit); + case Geometry::HAIR: + return build_BLAS_hair(progress, device, queue, geom, refit); + default: + return false; + } + } + return false; +} + +bool BVHMetal::build_TLAS(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + bool refit) +{ + if (@available(macos 12.0, *)) { + + /* we need to sync here and ensure that all BLAS have completed async generation by both GCD + * and Metal */ + { + __block bool complete_bvh = false; + while (!complete_bvh) { + dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + complete_bvh = true; + for (Object *ob : objects) { + /* Skip non-traceable objects */ + if (!ob->is_traceable()) + continue; + + Geometry const *geom = ob->get_geometry(); + BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh); + if (blas->accel_struct_building) { + complete_bvh = false; + + /* We're likely waiting on a command buffer that's in flight to complete. + * Queue up a command buffer and wait for it complete before checking the BLAS again + */ + id<MTLCommandBuffer> command_buffer = [queue commandBuffer]; + [command_buffer commit]; + [command_buffer waitUntilCompleted]; + break; + } + } + }); + } + } + + uint32_t num_instances = 0; + uint32_t num_motion_transforms = 0; + for (Object *ob : objects) { + /* Skip non-traceable objects */ + if (!ob->is_traceable()) + continue; + num_instances++; + + if (ob->use_motion()) { + num_motion_transforms += max(1, ob->get_motion().size()); + } + else { + num_motion_transforms++; + } + } + + /*------------------------------------------------*/ + BVH_status("Building TLAS | %7d instances", (int)num_instances); + /*------------------------------------------------*/ + + const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); + + NSMutableArray *all_blas = [NSMutableArray array]; + unordered_map<BVHMetal const *, int> instance_mapping; + + /* Lambda function to build/retrieve the BLAS index mapping */ + auto get_blas_index = [&](BVHMetal const *blas) { + auto it = instance_mapping.find(blas); + if (it != instance_mapping.end()) { + return it->second; + } + else { + int blas_index = (int)[all_blas count]; + instance_mapping[blas] = blas_index; + if (@available(macos 12.0, *)) { + [all_blas addObject:blas->accel_struct]; + } + return blas_index; + } + }; + + MTLResourceOptions storage_mode; + if (device.hasUnifiedMemory) { + storage_mode = MTLResourceStorageModeShared; + } + else { + storage_mode = MTLResourceStorageModeManaged; + } + + size_t instance_size; + if (motion_blur) { + instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor); + } + else { + instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor); + } + + /* Allocate a GPU buffer for the instance data and populate it */ + id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size + options:storage_mode]; + id<MTLBuffer> motion_transforms_buf = nil; + MTLPackedFloat4x3 *motion_transforms = nullptr; + if (motion_blur && num_motion_transforms) { + motion_transforms_buf = [device + newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) + options:storage_mode]; + motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; + } + + uint32_t instance_index = 0; + uint32_t motion_transform_index = 0; + for (Object *ob : objects) { + /* Skip non-traceable objects */ + if (!ob->is_traceable()) + continue; + + Geometry const *geom = ob->get_geometry(); + + BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh); + uint32_t accel_struct_index = get_blas_index(blas); + + /* Add some of the object visibility bits to the mask. + * __prim_visibility contains the combined visibility bits of all instances, so is not + * reliable if they differ between instances. + * + * METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here + * and select just a few important ones. + */ + uint32_t mask = ob->visibility_for_tracing() & 0xFF; + + /* Have to have at least one bit in the mask, or else instance would always be culled. */ + if (0 == mask) { + mask = 0xFF; + } + + /* Set user instance ID to object index */ + int object_index = ob->get_device_index(); + uint32_t user_id = uint32_t(object_index); + + /* Bake into the appropriate descriptor */ + if (motion_blur) { + MTLAccelerationStructureMotionInstanceDescriptor *instances = + (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents]; + MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++]; + + desc.accelerationStructureIndex = accel_struct_index; + desc.userID = user_id; + desc.mask = mask; + desc.motionStartTime = 0.0f; + desc.motionEndTime = 1.0f; + desc.motionTransformsStartIndex = motion_transform_index; + desc.motionStartBorderMode = MTLMotionBorderModeVanish; + desc.motionEndBorderMode = MTLMotionBorderModeVanish; + desc.intersectionFunctionTableOffset = 0; + + int key_count = ob->get_motion().size(); + if (key_count) { + desc.motionTransformsCount = key_count; + + Transform *keys = ob->get_motion().data(); + for (int i = 0; i < key_count; i++) { + float *t = (float *)&motion_transforms[motion_transform_index++]; + /* Transpose transform */ + auto src = (float const *)&keys[i]; + for (int i = 0; i < 12; i++) { + t[i] = src[(i / 3) + 4 * (i % 3)]; + } + } + } + else { + desc.motionTransformsCount = 1; + + float *t = (float *)&motion_transforms[motion_transform_index++]; + if (ob->get_geometry()->is_instanced()) { + /* Transpose transform */ + auto src = (float const *)&ob->get_tfm(); + for (int i = 0; i < 12; i++) { + t[i] = src[(i / 3) + 4 * (i % 3)]; + } + } + else { + /* Clear transform to identity matrix */ + t[0] = t[4] = t[8] = 1.0f; + } + } + } + else { + MTLAccelerationStructureUserIDInstanceDescriptor *instances = + (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents]; + MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++]; + + desc.accelerationStructureIndex = accel_struct_index; + desc.userID = user_id; + desc.mask = mask; + desc.intersectionFunctionTableOffset = 0; + + float *t = (float *)&desc.transformationMatrix; + if (ob->get_geometry()->is_instanced()) { + /* Transpose transform */ + auto src = (float const *)&ob->get_tfm(); + for (int i = 0; i < 12; i++) { + t[i] = src[(i / 3) + 4 * (i % 3)]; + } + } + else { + /* Clear transform to identity matrix */ + t[0] = t[4] = t[8] = 1.0f; + } + } + } + + if (storage_mode == MTLResourceStorageModeManaged) { + [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)]; + if (motion_transforms_buf) { + [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)]; + assert(num_motion_transforms == motion_transform_index); + } + } + + MTLInstanceAccelerationStructureDescriptor *accelDesc = + [MTLInstanceAccelerationStructureDescriptor descriptor]; + accelDesc.instanceCount = num_instances; + accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID; + accelDesc.instanceDescriptorBuffer = instanceBuf; + accelDesc.instanceDescriptorBufferOffset = 0; + accelDesc.instanceDescriptorStride = instance_size; + accelDesc.instancedAccelerationStructures = all_blas; + + if (motion_blur) { + accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion; + accelDesc.motionTransformBuffer = motion_transforms_buf; + accelDesc.motionTransformCount = num_motion_transforms; + } + + if (!use_fast_trace_bvh) { + accelDesc.usage |= (MTLAccelerationStructureUsageRefit | + MTLAccelerationStructureUsagePreferFastBuild); + } + + MTLAccelerationStructureSizes accelSizes = [device + accelerationStructureSizesWithDescriptor:accelDesc]; + id<MTLAccelerationStructure> accel = [device + newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; + id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + if (refit) { + [accelEnc refitAccelerationStructure:accel_struct + descriptor:accelDesc + destination:accel + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + else { + [accelEnc buildAccelerationStructure:accel + descriptor:accelDesc + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + [accelEnc endEncoding]; + [accelCommands commit]; + [accelCommands waitUntilCompleted]; + + if (motion_transforms_buf) { + [motion_transforms_buf release]; + } + [instanceBuf release]; + [scratchBuf release]; + + uint64_t allocated_size = [accel allocatedSize]; + stats.mem_alloc(allocated_size); + + /* Cache top and bottom-level acceleration structs */ + accel_struct = accel; + blas_array.clear(); + blas_array.reserve(all_blas.count); + for (id<MTLAccelerationStructure> blas in all_blas) { + blas_array.push_back(blas); + } + + return true; + } + return false; +} + +bool BVHMetal::build(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + bool refit) +{ + if (@available(macos 12.0, *)) { + if (refit && params.bvh_type != BVH_TYPE_STATIC) { + assert(accel_struct); + } + else { + if (accel_struct) { + stats.mem_free(accel_struct.allocatedSize); + [accel_struct release]; + accel_struct = nil; + } + } + } + + if (!params.top_level) { + return build_BLAS(progress, device, queue, refit); + } + else { + return build_TLAS(progress, device, queue, refit); + } +} + +CCL_NAMESPACE_END + +#endif /* WITH_METAL */ |