diff options
author | Charles Giessen <charles@lunarg.com> | 2022-05-28 01:36:35 +0300 |
---|---|---|
committer | Charles Giessen <46324611+charles-lunarg@users.noreply.github.com> | 2022-05-28 02:25:52 +0300 |
commit | 9308d2f8f1d483555c8f9c4012c7d8a499e878a6 (patch) | |
tree | f082ec2b15579106322c68b853be9d23e9090433 | |
parent | e52b98e356590627fd87f167d767bdbd5c962fdb (diff) |
Fix crashes from OOM in vkDestroyInstance
Various situations could cause an OOM to turn into a hard crash due to double freeing
of memory that was improperly cleaned up. Also fixed memory leaks when layers would
report OOM.
-rw-r--r-- | loader/loader.c | 44 | ||||
-rw-r--r-- | loader/loader.h | 4 | ||||
-rw-r--r-- | loader/trampoline.c | 10 | ||||
-rw-r--r-- | tests/framework/layer/test_layer.cpp | 34 | ||||
-rw-r--r-- | tests/framework/layer/test_layer.h | 11 | ||||
-rw-r--r-- | tests/loader_alloc_callback_tests.cpp | 91 |
6 files changed, 175 insertions, 19 deletions
diff --git a/loader/loader.c b/loader/loader.c index 6dea865fe..07f760ed5 100644 --- a/loader/loader.c +++ b/loader/loader.c @@ -806,6 +806,7 @@ void loader_destroy_layer_list(const struct loader_instance *inst, struct loader } layer_list->count = 0; layer_list->capacity = 0; + layer_list->list = NULL; } // Append layer properties defined in prop_list to the given layer_info list @@ -1253,8 +1254,8 @@ void loader_remove_logical_device(const struct loader_instance *inst, struct loa loader_destroy_logical_device(inst, found_dev, pAllocator); } -static void loader_icd_destroy(struct loader_instance *ptr_inst, struct loader_icd_term *icd_term, - const VkAllocationCallbacks *pAllocator) { +void loader_icd_destroy(struct loader_instance *ptr_inst, struct loader_icd_term *icd_term, + const VkAllocationCallbacks *pAllocator) { ptr_inst->total_icd_count--; for (struct loader_device *dev = icd_term->logical_device_list; dev;) { struct loader_device *next_dev = dev->next; @@ -1265,21 +1266,10 @@ static void loader_icd_destroy(struct loader_instance *ptr_inst, struct loader_i loader_instance_heap_free(ptr_inst, icd_term); } -static struct loader_icd_term *loader_icd_create(const struct loader_instance *inst) { - struct loader_icd_term *icd_term; - - icd_term = loader_instance_heap_calloc(inst, sizeof(struct loader_icd_term), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!icd_term) { - return NULL; - } - - return icd_term; -} - static struct loader_icd_term *loader_icd_add(struct loader_instance *ptr_inst, const struct loader_scanned_icd *scanned_icd) { struct loader_icd_term *icd_term; - icd_term = loader_icd_create(ptr_inst); + icd_term = loader_instance_heap_calloc(ptr_inst, sizeof(struct loader_icd_term), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!icd_term) { return NULL; } @@ -4328,6 +4318,32 @@ out: // Failure cleanup if (VK_SUCCESS != res) { if (NULL != dev) { + // Find the icd_term this device belongs to then remove it from that icd_term. + // Need to iterate the linked lists and remove the device from it. Don't delete + // the device here since it may not have been added to the icd_term and there + // are other allocations attached to it. + struct loader_icd_term *icd_term = inst->icd_terms; + bool found = false; + while (!found && NULL != icd_term) { + struct loader_device *cur_dev = icd_term->logical_device_list; + struct loader_device *prev_dev = NULL; + while (NULL != cur_dev) { + if (cur_dev == dev) { + if (cur_dev == icd_term->logical_device_list) { + icd_term->logical_device_list = cur_dev->next; + } else if (prev_dev) { + prev_dev->next = cur_dev->next; + } + + found = true; + break; + } + prev_dev = cur_dev; + cur_dev = cur_dev->next; + } + icd_term = icd_term->next; + } + // Now destroy the device and the allocations associated with it. loader_destroy_logical_device(inst, dev, pAllocator); } } diff --git a/loader/loader.h b/loader/loader.h index 35765a3c2..10fa14acd 100644 --- a/loader/loader.h +++ b/loader/loader.h @@ -114,6 +114,8 @@ void loader_delete_layer_list_and_properties(const struct loader_instance *inst, VkResult loader_add_layer_name_to_list(const struct loader_instance *inst, const char *name, const enum layer_type_flags type_flags, const struct loader_layer_list *source_list, struct loader_layer_list *target_list, struct loader_layer_list *expanded_target_list); +void loader_icd_destroy(struct loader_instance *ptr_inst, struct loader_icd_term *icd_term, + const VkAllocationCallbacks *pAllocator); void loader_scanned_icd_clear(const struct loader_instance *inst, struct loader_icd_tramp_list *icd_tramp_list); VkResult loader_icd_scan(const struct loader_instance *inst, struct loader_icd_tramp_list *icd_tramp_list, bool *skipped_portability_drivers); @@ -181,4 +183,4 @@ bool loader_check_version_meets_required(loader_api_version required, loader_api #ifndef LOADER_VERSION_1_1_0 #define LOADER_VERSION_1_1_0 loader_combine_version(1, 1, 0) -#endif
\ No newline at end of file +#endif diff --git a/loader/trampoline.c b/loader/trampoline.c index 7c243ed0c..274b16696 100644 --- a/loader/trampoline.c +++ b/loader/trampoline.c @@ -632,6 +632,16 @@ out: loader_scanned_icd_clear(ptr_instance, &ptr_instance->icd_tramp_list); loader_destroy_generic_list(ptr_instance, (struct loader_generic_list *)&ptr_instance->ext_list); + // Free any icd_terms that were created. + // If an OOM occurs from a layer, terminator_CreateInstance won't be reached where this kind of + // cleanup normally occurs + struct loader_icd_term *icd_term = NULL; + while (NULL != ptr_instance->icd_terms) { + icd_term = ptr_instance->icd_terms; + ptr_instance->icd_terms = icd_term->next; + loader_icd_destroy(ptr_instance, icd_term, pAllocator); + } + loader_instance_heap_free(ptr_instance, ptr_instance); } else { // success path, swap out created debug callbacks out so they aren't used until instance destruction diff --git a/tests/framework/layer/test_layer.cpp b/tests/framework/layer/test_layer.cpp index 5be8d8ddc..c5c333a3c 100644 --- a/tests/framework/layer/test_layer.cpp +++ b/tests/framework/layer/test_layer.cpp @@ -170,6 +170,14 @@ VKAPI_ATTR VkResult VKAPI_CALL test_vkCreateInstance(const VkInstanceCreateInfo* if (layer.create_instance_callback) result = layer.create_instance_callback(layer); + if (layer.do_spurious_allocations_in_create_instance && pAllocator && pAllocator->pfnAllocation) { + layer.spurious_instance_memory_allocation = + pAllocator->pfnAllocation(pAllocator->pUserData, 100, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (layer.spurious_instance_memory_allocation == nullptr) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + return result; } @@ -179,6 +187,11 @@ VKAPI_ATTR VkResult VKAPI_CALL test_override_vkCreateInstance(const VkInstanceCr } VKAPI_ATTR void VKAPI_CALL test_vkDestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator) { + if (layer.spurious_instance_memory_allocation && pAllocator && pAllocator->pfnFree) { + pAllocator->pfnFree(pAllocator->pUserData, layer.spurious_instance_memory_allocation); + layer.spurious_instance_memory_allocation = nullptr; + } + layer.instance_dispatch_table.DestroyInstance(instance, pAllocator); } @@ -215,6 +228,15 @@ VKAPI_ATTR VkResult VKAPI_CALL test_vkCreateDevice(VkPhysicalDevice physicalDevi // Need to add the created devices to the list so it can be freed layer.created_devices.push_back(device); + if (layer.do_spurious_allocations_in_create_device && pAllocator && pAllocator->pfnAllocation) { + void* allocation = pAllocator->pfnAllocation(pAllocator->pUserData, 110, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (allocation == nullptr) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } else { + layer.spurious_device_memory_allocations.push_back({allocation, device.device_handle}); + } + } + return result; } @@ -406,6 +428,16 @@ VKAPI_ATTR VkResult VKAPI_CALL test_vkEnumeratePhysicalDeviceGroups( // device functions VKAPI_ATTR void VKAPI_CALL test_vkDestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator) { + for (uint32_t i = 0; i < layer.spurious_device_memory_allocations.size();) { + auto& allocation = layer.spurious_device_memory_allocations[i]; + if (allocation.device == device && pAllocator && pAllocator->pfnFree) { + pAllocator->pfnFree(pAllocator->pUserData, allocation.allocation); + layer.spurious_device_memory_allocations.erase(layer.spurious_device_memory_allocations.begin() + i); + } else { + i++; + } + } + for (auto& created_device : layer.created_devices) { if (created_device.device_handle == device) { created_device.dispatch_table.DestroyDevice(device, pAllocator); @@ -612,4 +644,4 @@ vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVersionStruct return VK_ERROR_INITIALIZATION_FAILED; } #endif -} // extern "C"
\ No newline at end of file +} // extern "C" diff --git a/tests/framework/layer/test_layer.h b/tests/framework/layer/test_layer.h index 32001378a..a99a3bb3b 100644 --- a/tests/framework/layer/test_layer.h +++ b/tests/framework/layer/test_layer.h @@ -133,6 +133,15 @@ struct TestLayer { BUILDER_VECTOR(TestLayer, VulkanFunction, custom_physical_device_functions, custom_physical_device_function) + BUILDER_VALUE(TestLayer, bool, do_spurious_allocations_in_create_instance, false) + void* spurious_instance_memory_allocation = nullptr; + BUILDER_VALUE(TestLayer, bool, do_spurious_allocations_in_create_device, false) + struct DeviceMemAlloc { + void* allocation; + VkDevice device; + }; + std::vector<DeviceMemAlloc> spurious_device_memory_allocations; + PFN_vkGetInstanceProcAddr next_vkGetInstanceProcAddr = VK_NULL_HANDLE; PFN_GetPhysicalDeviceProcAddr next_GetPhysicalDeviceProcAddr = VK_NULL_HANDLE; PFN_vkGetDeviceProcAddr next_vkGetDeviceProcAddr = VK_NULL_HANDLE; @@ -151,4 +160,4 @@ using GetTestLayerFunc = TestLayer* (*)(); #define GET_TEST_LAYER_FUNC_STR "get_test_layer_func" using GetNewTestLayerFunc = TestLayer* (*)(); -#define RESET_LAYER_FUNC_STR "reset_layer_func"
\ No newline at end of file +#define RESET_LAYER_FUNC_STR "reset_layer_func" diff --git a/tests/loader_alloc_callback_tests.cpp b/tests/loader_alloc_callback_tests.cpp index 7daf317af..851b876a8 100644 --- a/tests/loader_alloc_callback_tests.cpp +++ b/tests/loader_alloc_callback_tests.cpp @@ -355,6 +355,14 @@ TEST(Allocation, DeviceButNotInstance) { FrameworkEnvironment env{}; env.add_icd(TestICDDetails(TEST_ICD_PATH_VERSION_2)); + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + MemoryTracker tracker; { auto& driver = env.get_test_icd(); @@ -403,6 +411,14 @@ TEST(Allocation, CreateInstanceIntentionalAllocFail) { FrameworkEnvironment env{}; env.add_icd(TestICDDetails(TEST_ICD_PATH_VERSION_2)); + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + size_t fail_index = 0; VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY; while (result == VK_ERROR_OUT_OF_HOST_MEMORY && fail_index <= 10000) { @@ -433,6 +449,14 @@ TEST(Allocation, CreateDeviceIntentionalAllocFail) { driver.physical_devices.emplace_back("physical_device_1"); driver.physical_devices[1].add_queue_family_properties({{VK_QUEUE_GRAPHICS_BIT, 1, 0, {1, 1, 1}}, false}); + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + InstWrapper inst{env.vulkan_functions}; inst.CheckCreate(); @@ -488,6 +512,14 @@ TEST(Allocation, CreateInstanceDeviceIntentionalAllocFail) { driver.physical_devices.emplace_back("physical_device_0"); driver.physical_devices[0].add_queue_family_properties({{VK_QUEUE_GRAPHICS_BIT, 1, 0, {1, 1, 1}}, false}); + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + size_t fail_index = 0; VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY; while (result == VK_ERROR_OUT_OF_HOST_MEMORY && fail_index <= 10000) { @@ -556,6 +588,15 @@ TEST(TryLoadWrongBinaries, CreateInstanceIntentionalAllocFail) { FrameworkEnvironment env{}; env.add_icd(TestICDDetails(TEST_ICD_PATH_VERSION_2)); env.add_icd(TestICDDetails(CURRENT_PLATFORM_DUMMY_BINARY_WRONG_TYPE).set_is_fake(true)); + + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + size_t fail_index = 0; VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY; while (result == VK_ERROR_OUT_OF_HOST_MEMORY && fail_index <= 10000) { @@ -579,6 +620,14 @@ TEST(Allocation, EnumeratePhysicalDevicesIntentionalAllocFail) { FrameworkEnvironment env{}; env.add_icd(TestICDDetails(TEST_ICD_PATH_VERSION_2)); + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + size_t fail_index = 0; bool reached_the_end = false; uint32_t starting_physical_dev_count = 3; @@ -612,6 +661,7 @@ TEST(Allocation, EnumeratePhysicalDevicesIntentionalAllocFail) { for (uint32_t i = 0; i < 2; i++) { driver.physical_devices.emplace_back(std::string("physical_device_") + std::to_string(physical_dev_count)); + driver.physical_devices.back().add_queue_family_properties({{VK_QUEUE_GRAPHICS_BIT, 1, 0, {1, 1, 1}}, false}); physical_dev_count += 1; } @@ -639,7 +689,33 @@ TEST(Allocation, EnumeratePhysicalDevicesIntentionalAllocFail) { } ASSERT_EQ(physical_dev_count, returned_physical_count); - std::cout << "fail count " << fail_index << "\n"; + std::array<VkDevice, 3> devices; + for (uint32_t i = 0; i < returned_physical_count; i++) { + uint32_t family_count = 1; + uint32_t returned_family_count = 0; + env.vulkan_functions.vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &returned_family_count, nullptr); + ASSERT_EQ(returned_family_count, family_count); + + VkQueueFamilyProperties family; + env.vulkan_functions.vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &returned_family_count, &family); + ASSERT_EQ(returned_family_count, family_count); + ASSERT_EQ(family.queueFlags, static_cast<VkQueueFlags>(VK_QUEUE_GRAPHICS_BIT)); + ASSERT_EQ(family.queueCount, family_count); + ASSERT_EQ(family.timestampValidBits, 0U); + + DeviceCreateInfo dev_create_info; + DeviceQueueCreateInfo queue_info; + queue_info.add_priority(0.0f); + dev_create_info.add_device_queue(queue_info); + + result = env.vulkan_functions.vkCreateDevice(physical_devices[i], dev_create_info.get(), tracker.get(), &devices[i]); + } + for (uint32_t i = 0; i < returned_physical_count; i++) { + if (result == VK_SUCCESS) { + env.vulkan_functions.vkDestroyDevice(devices[i], tracker.get()); + } + } + env.vulkan_functions.vkDestroyInstance(instance, tracker.get()); ASSERT_TRUE(tracker.empty()); reached_the_end = true; @@ -659,6 +735,14 @@ TEST(Allocation, CreateInstanceDeviceWithDXGIDriverIntentionalAllocFail) { driver.physical_devices[0].add_queue_family_properties({{VK_QUEUE_GRAPHICS_BIT, 1, 0, {1, 1, 1}}, false}); } + const char* layer_name = "VkLayerImplicit0"; + env.add_implicit_layer(ManifestLayer{}.add_layer(ManifestLayer::LayerDescription{} + .set_name(layer_name) + .set_lib_path(TEST_LAYER_PATH_EXPORT_VERSION_2) + .set_disable_environment("DISABLE_ENV")), + "test_layer.json"); + env.get_test_layer().set_do_spurious_allocations_in_create_instance(true).set_do_spurious_allocations_in_create_device(true); + auto& known_driver = known_driver_list.at(2); // which drive this test pretends to be DXGI_ADAPTER_DESC1 desc1{}; desc1.VendorId = known_driver.vendor_id; @@ -721,9 +805,12 @@ TEST(Allocation, CreateInstanceDeviceWithDXGIDriverIntentionalAllocFail) { dev_create_info.add_device_queue(queue_info); result = env.vulkan_functions.vkCreateDevice(physical_devices[i], dev_create_info.get(), tracker.get(), &devices[i]); + if (result == VK_ERROR_OUT_OF_HOST_MEMORY) { + devices[i] = VK_NULL_HANDLE; + } } for (uint32_t i = 0; i < returned_physical_count; i++) { - if (result == VK_SUCCESS) { + if (devices[i] != VK_NULL_HANDLE) { env.vulkan_functions.vkDestroyDevice(devices[i], tracker.get()); } } |