diff options
-rw-r--r-- | cmake/QuickCppLibBootstrap.cmake | 10 | ||||
-rw-r--r-- | cmake/tests.cmake | 1 | ||||
-rw-r--r-- | include/llfio/revision.hpp | 6 | ||||
-rw-r--r-- | include/llfio/v2.0/detail/impl/windows/map_handle.ipp | 63 | ||||
-rw-r--r-- | test/tests/issue0009.cpp | 93 |
5 files changed, 151 insertions, 22 deletions
diff --git a/cmake/QuickCppLibBootstrap.cmake b/cmake/QuickCppLibBootstrap.cmake index 29a0a830..8efa0b26 100644 --- a/cmake/QuickCppLibBootstrap.cmake +++ b/cmake/QuickCppLibBootstrap.cmake @@ -28,12 +28,20 @@ foreach(item ${CMAKE_MODULE_PATH}) set(quickcpplib_done ON) endif() endforeach() +if(DEFINED quickcpplib_DIR) + find_package(quickcpplib QUIET CONFIG) + if(quickcpplib_FOUND) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${quickcpplib_DIR}/cmakelib") + set(CTEST_QUICKCPPLIB_SCRIPTS "${quickcpplib_DIR}/scripts") + set(quickcpplib_done ON) + endif() +endif() if(NOT quickcpplib_done) # CMAKE_SOURCE_DIR is the very topmost parent cmake project # CMAKE_CURRENT_SOURCE_DIR is the current cmake subproject if(NOT DEFINED CTEST_QUICKCPPLIB_CLONE_DIR) - # If there is a magic .quickcpplib_use_siblings directory above the topmost project, use sibling edition if(EXISTS "${CMAKE_SOURCE_DIR}/../.quickcpplib_use_siblings" AND NOT QUICKCPPLIB_DISABLE_SIBLINGS) + # If there is a magic .quickcpplib_use_siblings directory above the topmost project, use sibling edition set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../quickcpplib/cmakelib") set(CTEST_QUICKCPPLIB_SCRIPTS "${CMAKE_SOURCE_DIR}/../quickcpplib/scripts") # Copy latest version of myself into end user diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 0bf01f1f..8448643e 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -12,6 +12,7 @@ set(llfio_TESTS "test/tests/file_handle_create_close/runner.cpp" "test/tests/file_handle_lock_unlock.cpp" "test/tests/handle_adapter_xor.cpp" + "test/tests/issue0009.cpp" "test/tests/issue0027.cpp" "test/tests/issue0028.cpp" "test/tests/large_pages.cpp" diff --git a/include/llfio/revision.hpp b/include/llfio/revision.hpp index 91eeb3c5..6f0c2825 100644 --- a/include/llfio/revision.hpp +++ b/include/llfio/revision.hpp @@ -1,4 +1,4 @@ // Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time -#define LLFIO_PREVIOUS_COMMIT_REF 21883faeb87fa31e209f9ed1de44b3aca89edce2 -#define LLFIO_PREVIOUS_COMMIT_DATE "2020-07-24 12:31:12 +00:00" -#define LLFIO_PREVIOUS_COMMIT_UNIQUE 21883fae +#define LLFIO_PREVIOUS_COMMIT_REF 4f4dbe1d95b7afc2b8e6d713611d391598546c8b +#define LLFIO_PREVIOUS_COMMIT_DATE "2020-09-04 17:40:59 +00:00" +#define LLFIO_PREVIOUS_COMMIT_UNIQUE 4f4dbe1d diff --git a/include/llfio/v2.0/detail/impl/windows/map_handle.ipp b/include/llfio/v2.0/detail/impl/windows/map_handle.ipp index 01a0897d..e40c480b 100644 --- a/include/llfio/v2.0/detail/impl/windows/map_handle.ipp +++ b/include/llfio/v2.0/detail/impl/windows/map_handle.ipp @@ -1,5 +1,5 @@ /* A handle to a source of mapped memory -(C) 2016-2017 Niall Douglas <http://www.nedproductions.biz/> (17 commits) +(C) 2016-2020 Niall Douglas <http://www.nedproductions.biz/> (17 commits) File Created: August 2016 @@ -377,19 +377,45 @@ QUICKCPPLIB_BITFIELD_BEGIN(win32_map_sought){committed = 1U << 0U, freed = 1U << template <class F> static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_sought sought, F &&f) { - /* Ok, so we have to be super careful here, because calling VirtualQuery() - somewhere in the middle of a region causes a linear scan until the beginning - or end of the region is found. For a reservation of say 2^42, that takes a - LONG time. - - What we do instead is to query the current block, perform F on it, then query - it again to get the next point of change, query that, perhaps perform F on it, - and so on. - - This causes twice the number of VirtualQuery() calls as ought to be necessary, - but such is this API's design. + /* Ok, so we have to be super careful here, indeed this routine has been + completely rewritten three times now. Do NOT change it unless you are aware + of the following: + + Version 1: + ---------- + We would call VirtualQuery() to find a matching region, perform + F on it, and then call VirtualQuery() on the address where the matching region + ended. Unfortunately, F might cause coalescing of the regions, and it turns out + that calling VirtualQuery() somewhere in the middle of a region + causes a linear scan until the beginning or end of the region is found. If the + region just unmapped was within say a reservation of 2^42, that takes a LONG time. + Users rightly complained about crap performance. + + Version 2: + ---------- + We would call VirtualQuery() to find a matching region, perform F on it, then query + the *same* block again to get the next point of change, query that, perhaps perform F on it, + and so on. This introduced twice the number of VirtualQuery() calls as ought to be necessary, + but this version worked well for nearly two years before the now-obvious race + condition was discovered: if the region was freed, and another thread performs + an allocation whose address just happens to land within the region just freed, the + second VirtualQuery() will now free another thread's allocation! + + Version 3: + ---------- + We iterate VirtualQuery() over the input byte range, storing all the matching results + into a vector. We then iterate the vector, calling the function. The dynamic memory + allocation is unfortunate, but we cache the vector storage thread locally. And, + generally speaking, a TLB shootdown is a lot more expensive than malloc. */ MEMORY_BASIC_INFORMATION mbi; + static thread_local std::vector<std::pair<byte *, size_t>> toinvoke([] { + std::vector<std::pair<byte *, size_t>> ret; + ret.reserve(64); + return ret; + }()); + toinvoke.clear(); + //std::cout << "win32_maps_apply addr=" << addr << " size=" << bytes << std::endl; while(bytes > 0) { if(!VirtualQuery(addr, &mbi, sizeof(mbi))) @@ -397,6 +423,8 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_ LLFIO_LOG_FATAL(nullptr, "map_handle::win32_maps_apply VirtualQuery() 1 failed"); abort(); } + //std::cout << "VQ1 baseaddr=" << mbi.BaseAddress << " size=" << mbi.RegionSize << " allocbase=" << mbi.AllocationBase << " state=" << mbi.State + // << " type=" << mbi.Type << std::endl; bool doit = false; doit = doit || ((MEM_COMMIT == mbi.State) && (sought & win32_map_sought::committed)); doit = doit || ((MEM_FREE == mbi.State) && (sought & win32_map_sought::freed)); @@ -406,12 +434,7 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_ /* mbi.RegionSize will be that from mbi.AllocationBase to end of a reserved region, so clamp to region size originally requested. */ - OUTCOME_TRYV(f(reinterpret_cast<byte *>(mbi.BaseAddress), std::min(mbi.RegionSize, bytes))); - } - if(!VirtualQuery(addr, &mbi, sizeof(mbi))) - { - LLFIO_LOG_FATAL(nullptr, "map_handle::win32_maps_apply VirtualQuery() 2 failed"); - abort(); + toinvoke.emplace_back(reinterpret_cast<byte *>(mbi.BaseAddress), std::min(mbi.RegionSize, bytes)); } addr += mbi.RegionSize; if(mbi.RegionSize < bytes) @@ -423,6 +446,10 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_ bytes = 0; } } + for(auto &i : toinvoke) + { + OUTCOME_TRY(f(i.first, i.second)); + } return success(); } // Only for memory allocated with VirtualAlloc. We can special case decommitting or releasing diff --git a/test/tests/issue0009.cpp b/test/tests/issue0009.cpp new file mode 100644 index 00000000..ca15cf43 --- /dev/null +++ b/test/tests/issue0009.cpp @@ -0,0 +1,93 @@ +/* Integration test kernel for issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows +(C) 2020 Niall Douglas <http://www.nedproductions.biz/> (2 commits) +File Created: Sept 2020 + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License in the accompanying file +Licence.txt or at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +Distributed under the Boost Software License, Version 1.0. + (See accompanying file Licence.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../test_kernel_decl.hpp" + +static inline void TestIssue09a() +{ + namespace llfio = LLFIO_V2_NAMESPACE; + auto fh = llfio::file_handle::temp_file().value(); + fh.truncate(1).value(); + auto sh = llfio::section_handle::section(fh).value(); + sh.truncate(1ULL << 40ULL).value(); + auto mh = llfio::map_handle::map(sh, 1ULL << 35ULL).value(); + auto *addr1 = mh.address(); + mh.truncate(1ULL << 36ULL).value(); + { + auto *addr2 = mh.address(); + BOOST_CHECK(addr1 == addr2); + } + mh.truncate(1ULL << 37ULL).value(); + { + auto *addr2 = mh.address(); + BOOST_CHECK(addr1 == addr2); + } + mh.truncate(1ULL << 38ULL).value(); + { + auto *addr2 = mh.address(); + BOOST_CHECK(addr1 == addr2); + } + mh.truncate(1ULL << 39ULL).value(); + { + auto *addr2 = mh.address(); + BOOST_CHECK(addr1 == addr2); + } + mh.truncate(1ULL << 40ULL).value(); + { + auto *addr2 = mh.address(); + BOOST_CHECK(addr1 == addr2); + } + auto begin = std::chrono::steady_clock::now(); + mh.close().value(); + sh.close().value(); + fh.close().value(); + auto end = std::chrono::steady_clock::now(); + auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count(); + std::cout << "Closing a map_handle (file) with six, appended, very large reservations up to 2^40 took " << diff << "ms." << std::endl; + BOOST_CHECK(diff < 250); +} + +static inline void TestIssue09b() +{ + namespace llfio = LLFIO_V2_NAMESPACE; + auto mh = llfio::map_handle::reserve(1ULL << 43ULL).value(); + const auto sixth = mh.length() / 6; + mh.commit({mh.address() + sixth * 0, 1}).value(); + mh.commit({mh.address() + sixth * 1, 1}).value(); + mh.commit({mh.address() + sixth * 2, 1}).value(); + mh.commit({mh.address() + sixth * 3, 1}).value(); + mh.commit({mh.address() + sixth * 4, 1}).value(); + mh.commit({mh.address() + sixth * 5, 1}).value(); + auto begin = std::chrono::steady_clock::now(); + mh.do_not_store({mh.address(), mh.length()}).value(); + auto end = std::chrono::steady_clock::now(); + auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count(); + std::cout << "Discard a map_handle (reservation) with six small commits within a reservation of 2^43 took " << diff << "ms." << std::endl; + BOOST_CHECK(diff < 250); +} + +KERNELTEST_TEST_KERNEL(regression, llfio, issues, 9a, + "Tests issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows", TestIssue09a()) +KERNELTEST_TEST_KERNEL(regression, llfio, issues, 9b, + "Tests issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows", TestIssue09b()) |