diff options
author | Michael Jones <michael_p_jones@apple.com> | 2021-12-07 18:11:35 +0300 |
---|---|---|
committer | Michael Jones <michael_p_jones@apple.com> | 2021-12-07 18:52:21 +0300 |
commit | 9558fa5196033390111a2348caa66ab18b8a4f89 (patch) | |
tree | acc3ed446f709390abfef5f97f82c1ed9abe0100 /intern/cycles/device/metal/device_impl.mm | |
parent | 565b33c0ad31966b860123837d2c4b5a8cbedad2 (diff) |
Cycles: Metal host-side code
This patch adds the Metal host-side code:
- Add all core host-side Metal backend files (device_impl, queue, etc)
- Add MetalRT BVH setup files
- Integrate with Cycles device enumeration code
- Revive `path_source_replace_includes` in util/path (required for MSL compilation)
This patch also includes a couple of small kernel-side fixes:
- Add an implementation of `lgammaf` for Metal [Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik](https://users.renyi.hu/~gergonemes/)
- include "work_stealing.h" inside the Metal context class because it accesses state now
Ref T92212
Reviewed By: brecht
Maniphest Tasks: T92212
Differential Revision: https://developer.blender.org/D13423
Diffstat (limited to 'intern/cycles/device/metal/device_impl.mm')
-rw-r--r-- | intern/cycles/device/metal/device_impl.mm | 1008 |
1 files changed, 1008 insertions, 0 deletions
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm new file mode 100644 index 00000000000..69a08ca07d1 --- /dev/null +++ b/intern/cycles/device/metal/device_impl.mm @@ -0,0 +1,1008 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef WITH_METAL + +# include "device/metal/device_impl.h" +# include "device/metal/device.h" + +# include "util/debug.h" +# include "util/md5.h" +# include "util/path.h" + +CCL_NAMESPACE_BEGIN + +class MetalDevice; + +BVHLayoutMask MetalDevice::get_bvh_layout_mask() const +{ + return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2; +} + +void MetalDevice::set_error(const string &error) +{ + static std::mutex s_error_mutex; + std::lock_guard<std::mutex> lock(s_error_mutex); + + Device::set_error(error); + + if (first_error) { + fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); + fprintf(stderr, + "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n"); + first_error = false; + } +} + +MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) + : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL) +{ + mtlDevId = info.num; + + /* select chosen device */ + vector<MetalPlatformDevice> usable_devices; + MetalInfo::get_usable_devices(&usable_devices); + if (usable_devices.size() == 0) { + set_error("Metal: no devices found."); + return; + } + assert(mtlDevId < usable_devices.size()); + MetalPlatformDevice &platform_device = usable_devices[mtlDevId]; + mtlDevice = platform_device.device_id; + device_name = platform_device.device_name; + device_vendor = MetalInfo::get_vendor_from_device_name(device_name); + assert(device_vendor != METAL_GPU_UNKNOWN); + metal_printf("Creating new Cycles device for Metal: %s\n", device_name.c_str()); + + /* determine default storage mode based on whether UMA is supported */ + + default_storage_mode = MTLResourceStorageModeManaged; + + if (@available(macos 11.0, *)) { + if ([mtlDevice hasUnifiedMemory]) { + default_storage_mode = MTLResourceStorageModeShared; + init_host_memory(); + } + } + + texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode]; + texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode]; + + stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); + + switch (device_vendor) { + default: + break; + case METAL_GPU_INTEL: { + use_metalrt = false; + max_threads_per_threadgroup = 64; + break; + } + case METAL_GPU_AMD: { + use_metalrt = false; + max_threads_per_threadgroup = 128; + break; + } + case METAL_GPU_APPLE: { + use_metalrt = true; + max_threads_per_threadgroup = 512; + break; + } + } + + if (auto metalrt = getenv("CYCLES_METALRT")) { + use_metalrt = (atoi(metalrt) != 0); + } + + MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init]; + arg_desc_params.dataType = MTLDataTypePointer; + arg_desc_params.access = MTLArgumentAccessReadOnly; + arg_desc_params.arrayLength = sizeof(KernelParamsMetal) / sizeof(device_ptr); + mtlBufferKernelParamsEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_params ]]; + + MTLArgumentDescriptor *arg_desc_texture = [[MTLArgumentDescriptor alloc] init]; + arg_desc_texture.dataType = MTLDataTypeTexture; + arg_desc_texture.access = MTLArgumentAccessReadOnly; + mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]]; + + /* command queue for non-tracing work on the GPU */ + mtlGeneralCommandQueue = [mtlDevice newCommandQueue]; + + /* Acceleration structure arg encoder, if needed */ + if (@available(macos 12.0, *)) { + if (use_metalrt) { + MTLArgumentDescriptor *arg_desc_as = [[MTLArgumentDescriptor alloc] init]; + arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure; + arg_desc_as.access = MTLArgumentAccessReadOnly; + mtlASArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_as ]]; + [arg_desc_as release]; + } + } + + /* Build the arg encoder for the ancillary bindings */ + { + NSMutableArray *ancillary_desc = [[NSMutableArray alloc] init]; + + int index = 0; + MTLArgumentDescriptor *arg_desc_tex = [[MTLArgumentDescriptor alloc] init]; + arg_desc_tex.dataType = MTLDataTypePointer; + arg_desc_tex.access = MTLArgumentAccessReadOnly; + + arg_desc_tex.index = index++; + [ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */ + arg_desc_tex.index = index++; + [ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_3d */ + + [arg_desc_tex release]; + + if (@available(macos 12.0, *)) { + if (use_metalrt) { + MTLArgumentDescriptor *arg_desc_as = [[MTLArgumentDescriptor alloc] init]; + arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure; + arg_desc_as.access = MTLArgumentAccessReadOnly; + + MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init]; + arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable; + arg_desc_ift.access = MTLArgumentAccessReadOnly; + + arg_desc_as.index = index++; + [ancillary_desc addObject:[arg_desc_as copy]]; /* accel_struct */ + arg_desc_ift.index = index++; + [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_default */ + arg_desc_ift.index = index++; + [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */ + arg_desc_ift.index = index++; + [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */ + + [arg_desc_ift release]; + [arg_desc_as release]; + } + } + + mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc]; + + for (int i = 0; i < ancillary_desc.count; i++) { + [ancillary_desc[i] release]; + } + [ancillary_desc release]; + } + [arg_desc_params release]; + [arg_desc_texture release]; +} + +MetalDevice::~MetalDevice() +{ + for (auto &tex : texture_slot_map) { + if (tex) { + [tex release]; + tex = nil; + } + } + flush_delayed_free_list(); + + if (texture_bindings_2d) { + stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); + + [texture_bindings_2d release]; + [texture_bindings_3d release]; + } + [mtlTextureArgEncoder release]; + [mtlBufferKernelParamsEncoder release]; + [mtlASArgEncoder release]; + [mtlAncillaryArgEncoder release]; + [mtlGeneralCommandQueue release]; + [mtlDevice release]; + + texture_info.free(); +} + +bool MetalDevice::support_device(const uint kernel_features /*requested_features*/) +{ + return true; +} + +bool MetalDevice::check_peer_access(Device *peer_device) +{ + assert(0); + /* does peer access make sense? */ + return false; +} + +bool MetalDevice::use_adaptive_compilation() +{ + return DebugFlags().metal.adaptive_compile; +} + +string MetalDevice::get_source(const uint kernel_features) +{ + string build_options; + + if (use_adaptive_compilation()) { + build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features); + } + + if (use_metalrt) { + build_options += "-D__METALRT__ "; + if (motion_blur) { + build_options += "-D__METALRT_MOTION__ "; + } + } + +# ifdef WITH_CYCLES_DEBUG + build_options += "-D__KERNEL_DEBUG__ "; +# endif + + switch (device_vendor) { + default: + break; + case METAL_GPU_INTEL: + build_options += "-D__KERNEL_METAL_INTEL__ "; + break; + case METAL_GPU_AMD: + build_options += "-D__KERNEL_METAL_AMD__ "; + break; + case METAL_GPU_APPLE: + build_options += "-D__KERNEL_METAL_APPLE__ "; + break; + } + + /* reformat -D defines list into compilable form */ + vector<string> components; + string_replace(build_options, "-D", ""); + string_split(components, build_options, " "); + + string globalDefines; + for (const string &component : components) { + vector<string> assignments; + string_split(assignments, component, "="); + if (assignments.size() == 2) + globalDefines += string_printf( + "#define %s %s\n", assignments[0].c_str(), assignments[1].c_str()); + else + globalDefines += string_printf("#define %s\n", assignments[0].c_str()); + } + + string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n"; + source = path_source_replace_includes(source, path_get("source")); + + metal_printf("Global defines:\n%s\n", globalDefines.c_str()); + + return source; +} + +bool MetalDevice::load_kernels(const uint _kernel_features) +{ + kernel_features = _kernel_features; + + /* check if GPU is supported */ + if (!support_device(kernel_features)) + return false; + + /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds + * This is necessary since objects may be reported to have motion if the Vector pass is + * active, but may still need to be rendered without motion blur if that isn't active as well. */ + motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION; + + NSError *error = NULL; + + for (int i = 0; i < PSO_NUM; i++) { + if (mtlLibrary[i]) { + [mtlLibrary[i] release]; + mtlLibrary[i] = nil; + } + } + + MTLCompileOptions *options = [[MTLCompileOptions alloc] init]; + + options.fastMathEnabled = YES; + options.languageVersion = MTLLanguageVersion2_1; + + if (@available(macOS 11.0, *)) { + options.languageVersion = MTLLanguageVersion2_3; + } + if (@available(macOS 12.0, *)) { + options.languageVersion = MTLLanguageVersion2_4; + } + + string metalsrc; + + /* local helper: dump source to disk and return filepath */ + auto dump_source = [&](int kernel_type) -> string { + string &source = source_used_for_compile[kernel_type]; + string metalsrc = path_cache_get(path_join("kernels", + string_printf("%s.%s.metal", + kernel_type_as_string(kernel_type), + util_md5_string(source).c_str()))); + path_write_text(metalsrc, source); + return metalsrc; + }; + + /* local helper: fetch the kernel source code, adjust it for specific PSO_.. kernel_type flavor, + * then compile it into a MTLLibrary */ + auto fetch_and_compile_source = [&](int kernel_type) { + /* record the source uesd to compile this library, for hash building later */ + string &source = source_used_for_compile[kernel_type]; + + switch (kernel_type) { + case PSO_GENERIC: { + source = get_source(kernel_features); + break; + } + case PSO_SPECIALISED: { + /* PSO_SPECIALISED derives from PSO_GENERIC */ + string &generic_source = source_used_for_compile[PSO_GENERIC]; + if (generic_source.empty()) { + generic_source = get_source(kernel_features); + } + source = "#define __KERNEL_METAL_USE_FUNCTION_SPECIALISATION__\n" + generic_source; + break; + } + default: + assert(0); + } + + /* create MTLLibrary (front-end compilation) */ + mtlLibrary[kernel_type] = [mtlDevice newLibraryWithSource:@(source.c_str()) + options:options + error:&error]; + + bool do_source_dump = (getenv("CYCLES_METAL_DUMP_SOURCE") != nullptr); + + if (!mtlLibrary[kernel_type] || do_source_dump) { + string metalsrc = dump_source(kernel_type); + + if (!mtlLibrary[kernel_type]) { + NSString *err = [error localizedDescription]; + set_error(string_printf("Failed to compile library:\n%s", [err UTF8String])); + + return false; + } + } + return true; + }; + + fetch_and_compile_source(PSO_GENERIC); + + if (use_function_specialisation) { + fetch_and_compile_source(PSO_SPECIALISED); + } + + metal_printf("Front-end compilation finished\n"); + + bool result = kernels.load(this, PSO_GENERIC); + + [options release]; + reserve_local_memory(kernel_features); + + return result; +} + +void MetalDevice::reserve_local_memory(const uint kernel_features) +{ + /* METAL_WIP - implement this */ +} + +void MetalDevice::init_host_memory() +{ + /* METAL_WIP - implement this */ +} + +void MetalDevice::load_texture_info() +{ + if (need_texture_info) { + /* Unset flag before copying. */ + need_texture_info = false; + texture_info.copy_to_device(); + + int num_textures = texture_info.size(); + + for (int tex = 0; tex < num_textures; tex++) { + uint64_t offset = tex * sizeof(void *); + + id<MTLTexture> metal_texture = texture_slot_map[tex]; + if (!metal_texture) { + [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset]; + [mtlTextureArgEncoder setTexture:nil atIndex:0]; + [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset]; + [mtlTextureArgEncoder setTexture:nil atIndex:0]; + } + else { + MTLTextureType type = metal_texture.textureType; + [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset]; + [mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0]; + [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset]; + [mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0]; + } + } + if (default_storage_mode == MTLResourceStorageModeManaged) { + [texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))]; + [texture_bindings_3d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))]; + } + } +} + +MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem) +{ + size_t size = mem.memory_size(); + + mem.device_pointer = 0; + + id<MTLBuffer> metal_buffer = nil; + if (size > 0) { + MTLResourceOptions options = default_storage_mode; + if (mem.type == MEM_DEVICE_ONLY) { + options = MTLResourceStorageModePrivate; + } + + metal_buffer = [mtlDevice newBufferWithLength:size options:options]; + + if (!metal_buffer) { + set_error("System is out of GPU memory"); + return nullptr; + } + } + + if (mem.name) { + VLOG(2) << "Buffer allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + } + + mem.device_size = metal_buffer.allocatedSize; + stats.mem_alloc(mem.device_size); + + metal_buffer.label = [[NSString alloc] initWithFormat:@"%s", mem.name]; + + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + + assert(metal_mem_map.count(&mem) == 0); /* assert against double-alloc */ + MetalMem *mmem = new MetalMem; + metal_mem_map[&mem] = std::unique_ptr<MetalMem>(mmem); + + mmem->mem = &mem; + mmem->mtlBuffer = metal_buffer; + mmem->offset = 0; + mmem->size = size; + if (mem.type != MEM_DEVICE_ONLY) { + mmem->hostPtr = [metal_buffer contents]; + } + else { + mmem->hostPtr = nullptr; + } + + /* encode device_pointer as (MetalMem*) in order to handle resource relocation and device pointer + * recalculation */ + mem.device_pointer = device_ptr(mmem); + + if (metal_buffer.storageMode == MTLResourceStorageModeShared) { + /* Replace host pointer with our host allocation. */ + + if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) { + memcpy(mmem->hostPtr, mem.host_pointer, size); + + mem.host_free(); + mem.host_pointer = mmem->hostPtr; + } + mem.shared_pointer = mmem->hostPtr; + mem.shared_counter++; + mmem->use_UMA = true; + } + else { + mmem->use_UMA = false; + } + + return mmem; +} + +void MetalDevice::generic_copy_to(device_memory &mem) +{ + if (!mem.host_pointer || !mem.device_pointer) { + return; + } + + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) { + MetalMem &mmem = *metal_mem_map.at(&mem); + memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size()); + if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) { + [mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())]; + } + } +} + +void MetalDevice::generic_free(device_memory &mem) +{ + if (mem.device_pointer) { + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + MetalMem &mmem = *metal_mem_map.at(&mem); + size_t size = mmem.size; + + /* If mmem.use_uma is true, reference counting is used + * to safely free memory. */ + + bool free_mtlBuffer = false; + + if (mmem.use_UMA) { + assert(mem.shared_pointer); + if (mem.shared_pointer) { + assert(mem.shared_counter > 0); + if (--mem.shared_counter == 0) { + free_mtlBuffer = true; + } + } + } + else { + free_mtlBuffer = true; + } + + if (free_mtlBuffer) { + if (mem.host_pointer && mem.host_pointer == mem.shared_pointer) { + /* Safely move the device-side data back to the host before it is freed. */ + mem.host_pointer = mem.host_alloc(size); + memcpy(mem.host_pointer, mem.shared_pointer, size); + mmem.use_UMA = false; + } + + mem.shared_pointer = 0; + + /* Free device memory. */ + delayed_free_list.push_back(mmem.mtlBuffer); + mmem.mtlBuffer = nil; + } + + stats.mem_free(mem.device_size); + mem.device_pointer = 0; + mem.device_size = 0; + + metal_mem_map.erase(&mem); + } +} + +void MetalDevice::mem_alloc(device_memory &mem) +{ + if (mem.type == MEM_TEXTURE) { + assert(!"mem_alloc not supported for textures."); + } + else if (mem.type == MEM_GLOBAL) { + generic_alloc(mem); + } + else { + generic_alloc(mem); + } +} + +void MetalDevice::mem_copy_to(device_memory &mem) +{ + if (mem.type == MEM_GLOBAL) { + global_free(mem); + global_alloc(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + tex_alloc((device_texture &)mem); + } + else { + if (!mem.device_pointer) { + generic_alloc(mem); + } + generic_copy_to(mem); + } +} + +void MetalDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) +{ + if (mem.host_pointer) { + + bool subcopy = (w >= 0 && h >= 0); + const size_t size = subcopy ? (elem * w * h) : mem.memory_size(); + const size_t offset = subcopy ? (elem * y * w) : 0; + + if (mem.device_pointer) { + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + MetalMem &mmem = *metal_mem_map.at(&mem); + + if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) { + + id<MTLCommandBuffer> cmdBuffer = [mtlGeneralCommandQueue commandBuffer]; + id<MTLBlitCommandEncoder> blitEncoder = [cmdBuffer blitCommandEncoder]; + [blitEncoder synchronizeResource:mmem.mtlBuffer]; + [blitEncoder endEncoding]; + [cmdBuffer commit]; + [cmdBuffer waitUntilCompleted]; + } + + if (mem.host_pointer != mmem.hostPtr) { + memcpy((uchar *)mem.host_pointer + offset, (uchar *)mmem.hostPtr + offset, size); + } + } + else { + memset((char *)mem.host_pointer + offset, 0, size); + } + } +} + +void MetalDevice::mem_zero(device_memory &mem) +{ + if (!mem.device_pointer) { + mem_alloc(mem); + } + if (!mem.device_pointer) { + return; + } + + size_t size = mem.memory_size(); + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + MetalMem &mmem = *metal_mem_map.at(&mem); + memset(mmem.hostPtr, 0, size); + if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) { + [mmem.mtlBuffer didModifyRange:NSMakeRange(0, size)]; + } +} + +void MetalDevice::mem_free(device_memory &mem) +{ + if (mem.type == MEM_GLOBAL) { + global_free(mem); + } + else if (mem.type == MEM_TEXTURE) { + tex_free((device_texture &)mem); + } + else { + generic_free(mem); + } +} + +device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) +{ + /* METAL_WIP - revive if necessary */ + assert(0); + return 0; +} + +void MetalDevice::const_copy_to(const char *name, void *host, size_t size) +{ + if (strcmp(name, "__data") == 0) { + assert(size == sizeof(KernelData)); + memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size); + return; + } + + auto update_launch_pointers = + [&](size_t offset, void *data, size_t data_size, size_t pointers_size) { + memcpy((uint8_t *)&launch_params + offset, data, data_size); + + MetalMem **mmem = (MetalMem **)data; + int pointer_count = pointers_size / sizeof(device_ptr); + int pointer_index = offset / sizeof(device_ptr); + for (int i = 0; i < pointer_count; i++) { + if (mmem[i]) { + mmem[i]->pointer_index = pointer_index + i; + } + } + }; + + /* Update data storage pointers in launch parameters. */ + if (strcmp(name, "__integrator_state") == 0) { + /* IntegratorStateGPU is contiguous pointers */ + const size_t pointer_block_size = sizeof(IntegratorStateGPU); + update_launch_pointers( + offsetof(KernelParamsMetal, __integrator_state), host, size, pointer_block_size); + } +# define KERNEL_TEX(data_type, tex_name) \ + else if (strcmp(name, #tex_name) == 0) \ + { \ + update_launch_pointers(offsetof(KernelParamsMetal, tex_name), host, size, size); \ + } +# include "kernel/textures.h" +# undef KERNEL_TEX +} + +void MetalDevice::global_alloc(device_memory &mem) +{ + if (mem.is_resident(this)) { + generic_alloc(mem); + generic_copy_to(mem); + } + + const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer)); +} + +void MetalDevice::global_free(device_memory &mem) +{ + if (mem.is_resident(this) && mem.device_pointer) { + generic_free(mem); + } +} + +void MetalDevice::tex_alloc_as_buffer(device_texture &mem) +{ + generic_alloc(mem); + generic_copy_to(mem); + + /* Resize once */ + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(round_up(slot + 1, 128)); + } + + mem.info.data = (uint64_t)mem.device_pointer; + + /* Set Mapping and tag that we need to (re-)upload to device */ + texture_info[slot] = mem.info; + need_texture_info = true; +} + +void MetalDevice::tex_alloc(device_texture &mem) +{ + MTLStorageMode storage_mode = MTLStorageModeManaged; + if (@available(macos 10.15, *)) { + if ([mtlDevice hasUnifiedMemory] && + device_vendor != + METAL_GPU_INTEL) { /* Intel GPUs don't support MTLStorageModeShared for MTLTextures */ + storage_mode = MTLStorageModeShared; + } + } + + /* General variables for both architectures */ + string bind_name = mem.name; + size_t dsize = datatype_size(mem.data_type); + size_t size = mem.memory_size(); + + /* sampler_index maps into the GPU's constant 'metal_samplers' array */ + uint64_t sampler_index = mem.info.extension; + if (mem.info.interpolation != INTERPOLATION_CLOSEST) { + sampler_index += 3; + } + + /* Image Texture Storage */ + MTLPixelFormat format; + switch (mem.data_type) { + case TYPE_UCHAR: { + MTLPixelFormat formats[] = {MTLPixelFormatR8Unorm, + MTLPixelFormatRG8Unorm, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA8Unorm}; + format = formats[mem.data_elements - 1]; + } break; + case TYPE_UINT16: { + MTLPixelFormat formats[] = {MTLPixelFormatR16Unorm, + MTLPixelFormatRG16Unorm, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA16Unorm}; + format = formats[mem.data_elements - 1]; + } break; + case TYPE_UINT: { + MTLPixelFormat formats[] = {MTLPixelFormatR32Uint, + MTLPixelFormatRG32Uint, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA32Uint}; + format = formats[mem.data_elements - 1]; + } break; + case TYPE_INT: { + MTLPixelFormat formats[] = {MTLPixelFormatR32Sint, + MTLPixelFormatRG32Sint, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA32Sint}; + format = formats[mem.data_elements - 1]; + } break; + case TYPE_FLOAT: { + MTLPixelFormat formats[] = {MTLPixelFormatR32Float, + MTLPixelFormatRG32Float, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA32Float}; + format = formats[mem.data_elements - 1]; + } break; + case TYPE_HALF: { + MTLPixelFormat formats[] = {MTLPixelFormatR16Float, + MTLPixelFormatRG16Float, + MTLPixelFormatInvalid, + MTLPixelFormatRGBA16Float}; + format = formats[mem.data_elements - 1]; + } break; + default: + assert(0); + return; + } + + assert(format != MTLPixelFormatInvalid); + + id<MTLTexture> mtlTexture = nil; + size_t src_pitch = mem.data_width * dsize * mem.data_elements; + + if (mem.data_depth > 1) { + /* 3D texture using array */ + MTLTextureDescriptor *desc; + + desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format + width:mem.data_width + height:mem.data_height + mipmapped:NO]; + + desc.storageMode = storage_mode; + desc.usage = MTLTextureUsageShaderRead; + + desc.textureType = MTLTextureType3D; + desc.depth = mem.data_depth; + + VLOG(2) << "Texture 3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + mtlTexture = [mtlDevice newTextureWithDescriptor:desc]; + assert(mtlTexture); + + if (!mtlTexture) { + return; + } + + const size_t imageBytes = src_pitch * mem.data_height; + for (size_t d = 0; d < mem.data_depth; d++) { + const size_t offset = d * imageBytes; + [mtlTexture replaceRegion:MTLRegionMake3D(0, 0, d, mem.data_width, mem.data_height, 1) + mipmapLevel:0 + slice:0 + withBytes:(uint8_t *)mem.host_pointer + offset + bytesPerRow:src_pitch + bytesPerImage:0]; + } + } + else if (mem.data_height > 0) { + /* 2D texture */ + MTLTextureDescriptor *desc; + + desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format + width:mem.data_width + height:mem.data_height + mipmapped:NO]; + + desc.storageMode = storage_mode; + desc.usage = MTLTextureUsageShaderRead; + + VLOG(2) << "Texture 2D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + mtlTexture = [mtlDevice newTextureWithDescriptor:desc]; + assert(mtlTexture); + + [mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height) + mipmapLevel:0 + withBytes:mem.host_pointer + bytesPerRow:src_pitch]; + } + else { + assert(0); + /* 1D texture, using linear memory. */ + } + + mem.device_pointer = (device_ptr)mtlTexture; + mem.device_size = size; + stats.mem_alloc(size); + + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + MetalMem *mmem = new MetalMem; + metal_mem_map[&mem] = std::unique_ptr<MetalMem>(mmem); + mmem->mem = &mem; + mmem->mtlTexture = mtlTexture; + + /* Resize once */ + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. */ + texture_info.resize(slot + 128); + texture_slot_map.resize(slot + 128); + + ssize_t min_buffer_length = sizeof(void *) * texture_info.size(); + if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) { + if (texture_bindings_2d) { + delayed_free_list.push_back(texture_bindings_2d); + delayed_free_list.push_back(texture_bindings_3d); + + stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); + } + texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length + options:default_storage_mode]; + texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length + options:default_storage_mode]; + + stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); + } + } + + /* optimise the texture for GPU access */ + id<MTLCommandBuffer> commandBuffer = [mtlGeneralCommandQueue commandBuffer]; + id<MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder]; + [blitCommandEncoder optimizeContentsForGPUAccess:mtlTexture]; + [blitCommandEncoder endEncoding]; + [commandBuffer commit]; + + /* Set Mapping and tag that we need to (re-)upload to device */ + texture_slot_map[slot] = mtlTexture; + texture_info[slot] = mem.info; + need_texture_info = true; + + texture_info[slot].data = uint64_t(slot) | (sampler_index << 32); +} + +void MetalDevice::tex_free(device_texture &mem) +{ + if (metal_mem_map.count(&mem)) { + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + MetalMem &mmem = *metal_mem_map.at(&mem); + + assert(texture_slot_map[mem.slot] == mmem.mtlTexture); + texture_slot_map[mem.slot] = nil; + + if (mmem.mtlTexture) { + /* Free bindless texture. */ + delayed_free_list.push_back(mmem.mtlTexture); + mmem.mtlTexture = nil; + } + stats.mem_free(mem.device_size); + mem.device_pointer = 0; + mem.device_size = 0; + metal_mem_map.erase(&mem); + } +} + +unique_ptr<DeviceQueue> MetalDevice::gpu_queue_create() +{ + return make_unique<MetalDeviceQueue>(this); +} + +bool MetalDevice::should_use_graphics_interop() +{ + /* METAL_WIP - provide fast interop */ + return false; +} + +void MetalDevice::flush_delayed_free_list() +{ + /* free any Metal buffers that may have been freed by host while a command + * buffer was being generated. This function should be called after each + * completion of a command buffer */ + std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex); + for (auto &it : delayed_free_list) { + [it release]; + } + delayed_free_list.clear(); +} + +void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) +{ + if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) { + Device::build_bvh(bvh, progress, refit); + return; + } + + BVHMetal *bvh_metal = static_cast<BVHMetal *>(bvh); + bvh_metal->motion_blur = motion_blur; + if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) { + + if (@available(macos 11.0, *)) { + if (bvh->params.top_level) { + bvhMetalRT = bvh_metal; + } + } + } +} + +CCL_NAMESPACE_END + +#endif |