Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/llfio.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiall Douglas (s [underscore] sourceforge {at} nedprod [dot] com) <spamtrap@nedprod.com>2020-09-04 22:20:26 +0300
committerNiall Douglas (s [underscore] sourceforge {at} nedprod [dot] com) <spamtrap@nedprod.com>2020-09-04 22:20:26 +0300
commit99170750a9f9f6fd08359d1924ee605c1d03b07d (patch)
tree2383d7480972c11e2ddd77f08f9178440ac0088b
parent4f4dbe1d95b7afc2b8e6d713611d391598546c8b (diff)
Fix a subtle race condition in map_handle on Windows where if many threads are currently allocating and freeing maps, it could rarely occur that one thread would free the map of another thread. The newly rewritten v3 of win32_maps_apply() we really, really hope is the last time!
-rw-r--r--cmake/QuickCppLibBootstrap.cmake10
-rw-r--r--cmake/tests.cmake1
-rw-r--r--include/llfio/revision.hpp6
-rw-r--r--include/llfio/v2.0/detail/impl/windows/map_handle.ipp63
-rw-r--r--test/tests/issue0009.cpp93
5 files changed, 151 insertions, 22 deletions
diff --git a/cmake/QuickCppLibBootstrap.cmake b/cmake/QuickCppLibBootstrap.cmake
index 29a0a830..8efa0b26 100644
--- a/cmake/QuickCppLibBootstrap.cmake
+++ b/cmake/QuickCppLibBootstrap.cmake
@@ -28,12 +28,20 @@ foreach(item ${CMAKE_MODULE_PATH})
set(quickcpplib_done ON)
endif()
endforeach()
+if(DEFINED quickcpplib_DIR)
+ find_package(quickcpplib QUIET CONFIG)
+ if(quickcpplib_FOUND)
+ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${quickcpplib_DIR}/cmakelib")
+ set(CTEST_QUICKCPPLIB_SCRIPTS "${quickcpplib_DIR}/scripts")
+ set(quickcpplib_done ON)
+ endif()
+endif()
if(NOT quickcpplib_done)
# CMAKE_SOURCE_DIR is the very topmost parent cmake project
# CMAKE_CURRENT_SOURCE_DIR is the current cmake subproject
if(NOT DEFINED CTEST_QUICKCPPLIB_CLONE_DIR)
- # If there is a magic .quickcpplib_use_siblings directory above the topmost project, use sibling edition
if(EXISTS "${CMAKE_SOURCE_DIR}/../.quickcpplib_use_siblings" AND NOT QUICKCPPLIB_DISABLE_SIBLINGS)
+ # If there is a magic .quickcpplib_use_siblings directory above the topmost project, use sibling edition
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../quickcpplib/cmakelib")
set(CTEST_QUICKCPPLIB_SCRIPTS "${CMAKE_SOURCE_DIR}/../quickcpplib/scripts")
# Copy latest version of myself into end user
diff --git a/cmake/tests.cmake b/cmake/tests.cmake
index 0bf01f1f..8448643e 100644
--- a/cmake/tests.cmake
+++ b/cmake/tests.cmake
@@ -12,6 +12,7 @@ set(llfio_TESTS
"test/tests/file_handle_create_close/runner.cpp"
"test/tests/file_handle_lock_unlock.cpp"
"test/tests/handle_adapter_xor.cpp"
+ "test/tests/issue0009.cpp"
"test/tests/issue0027.cpp"
"test/tests/issue0028.cpp"
"test/tests/large_pages.cpp"
diff --git a/include/llfio/revision.hpp b/include/llfio/revision.hpp
index 91eeb3c5..6f0c2825 100644
--- a/include/llfio/revision.hpp
+++ b/include/llfio/revision.hpp
@@ -1,4 +1,4 @@
// Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time
-#define LLFIO_PREVIOUS_COMMIT_REF 21883faeb87fa31e209f9ed1de44b3aca89edce2
-#define LLFIO_PREVIOUS_COMMIT_DATE "2020-07-24 12:31:12 +00:00"
-#define LLFIO_PREVIOUS_COMMIT_UNIQUE 21883fae
+#define LLFIO_PREVIOUS_COMMIT_REF 4f4dbe1d95b7afc2b8e6d713611d391598546c8b
+#define LLFIO_PREVIOUS_COMMIT_DATE "2020-09-04 17:40:59 +00:00"
+#define LLFIO_PREVIOUS_COMMIT_UNIQUE 4f4dbe1d
diff --git a/include/llfio/v2.0/detail/impl/windows/map_handle.ipp b/include/llfio/v2.0/detail/impl/windows/map_handle.ipp
index 01a0897d..e40c480b 100644
--- a/include/llfio/v2.0/detail/impl/windows/map_handle.ipp
+++ b/include/llfio/v2.0/detail/impl/windows/map_handle.ipp
@@ -1,5 +1,5 @@
/* A handle to a source of mapped memory
-(C) 2016-2017 Niall Douglas <http://www.nedproductions.biz/> (17 commits)
+(C) 2016-2020 Niall Douglas <http://www.nedproductions.biz/> (17 commits)
File Created: August 2016
@@ -377,19 +377,45 @@ QUICKCPPLIB_BITFIELD_BEGIN(win32_map_sought){committed = 1U << 0U, freed = 1U <<
template <class F>
static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_sought sought, F &&f)
{
- /* Ok, so we have to be super careful here, because calling VirtualQuery()
- somewhere in the middle of a region causes a linear scan until the beginning
- or end of the region is found. For a reservation of say 2^42, that takes a
- LONG time.
-
- What we do instead is to query the current block, perform F on it, then query
- it again to get the next point of change, query that, perhaps perform F on it,
- and so on.
-
- This causes twice the number of VirtualQuery() calls as ought to be necessary,
- but such is this API's design.
+ /* Ok, so we have to be super careful here, indeed this routine has been
+ completely rewritten three times now. Do NOT change it unless you are aware
+ of the following:
+
+ Version 1:
+ ----------
+ We would call VirtualQuery() to find a matching region, perform
+ F on it, and then call VirtualQuery() on the address where the matching region
+ ended. Unfortunately, F might cause coalescing of the regions, and it turns out
+ that calling VirtualQuery() somewhere in the middle of a region
+ causes a linear scan until the beginning or end of the region is found. If the
+ region just unmapped was within say a reservation of 2^42, that takes a LONG time.
+ Users rightly complained about crap performance.
+
+ Version 2:
+ ----------
+ We would call VirtualQuery() to find a matching region, perform F on it, then query
+ the *same* block again to get the next point of change, query that, perhaps perform F on it,
+ and so on. This introduced twice the number of VirtualQuery() calls as ought to be necessary,
+ but this version worked well for nearly two years before the now-obvious race
+ condition was discovered: if the region was freed, and another thread performs
+ an allocation whose address just happens to land within the region just freed, the
+ second VirtualQuery() will now free another thread's allocation!
+
+ Version 3:
+ ----------
+ We iterate VirtualQuery() over the input byte range, storing all the matching results
+ into a vector. We then iterate the vector, calling the function. The dynamic memory
+ allocation is unfortunate, but we cache the vector storage thread locally. And,
+ generally speaking, a TLB shootdown is a lot more expensive than malloc.
*/
MEMORY_BASIC_INFORMATION mbi;
+ static thread_local std::vector<std::pair<byte *, size_t>> toinvoke([] {
+ std::vector<std::pair<byte *, size_t>> ret;
+ ret.reserve(64);
+ return ret;
+ }());
+ toinvoke.clear();
+ //std::cout << "win32_maps_apply addr=" << addr << " size=" << bytes << std::endl;
while(bytes > 0)
{
if(!VirtualQuery(addr, &mbi, sizeof(mbi)))
@@ -397,6 +423,8 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_
LLFIO_LOG_FATAL(nullptr, "map_handle::win32_maps_apply VirtualQuery() 1 failed");
abort();
}
+ //std::cout << "VQ1 baseaddr=" << mbi.BaseAddress << " size=" << mbi.RegionSize << " allocbase=" << mbi.AllocationBase << " state=" << mbi.State
+ // << " type=" << mbi.Type << std::endl;
bool doit = false;
doit = doit || ((MEM_COMMIT == mbi.State) && (sought & win32_map_sought::committed));
doit = doit || ((MEM_FREE == mbi.State) && (sought & win32_map_sought::freed));
@@ -406,12 +434,7 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_
/* mbi.RegionSize will be that from mbi.AllocationBase to end of a reserved region,
so clamp to region size originally requested.
*/
- OUTCOME_TRYV(f(reinterpret_cast<byte *>(mbi.BaseAddress), std::min(mbi.RegionSize, bytes)));
- }
- if(!VirtualQuery(addr, &mbi, sizeof(mbi)))
- {
- LLFIO_LOG_FATAL(nullptr, "map_handle::win32_maps_apply VirtualQuery() 2 failed");
- abort();
+ toinvoke.emplace_back(reinterpret_cast<byte *>(mbi.BaseAddress), std::min(mbi.RegionSize, bytes));
}
addr += mbi.RegionSize;
if(mbi.RegionSize < bytes)
@@ -423,6 +446,10 @@ static inline result<void> win32_maps_apply(byte *addr, size_t bytes, win32_map_
bytes = 0;
}
}
+ for(auto &i : toinvoke)
+ {
+ OUTCOME_TRY(f(i.first, i.second));
+ }
return success();
}
// Only for memory allocated with VirtualAlloc. We can special case decommitting or releasing
diff --git a/test/tests/issue0009.cpp b/test/tests/issue0009.cpp
new file mode 100644
index 00000000..ca15cf43
--- /dev/null
+++ b/test/tests/issue0009.cpp
@@ -0,0 +1,93 @@
+/* Integration test kernel for issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows
+(C) 2020 Niall Douglas <http://www.nedproductions.biz/> (2 commits)
+File Created: Sept 2020
+
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License in the accompanying file
+Licence.txt or at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file Licence.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#include "../test_kernel_decl.hpp"
+
+static inline void TestIssue09a()
+{
+ namespace llfio = LLFIO_V2_NAMESPACE;
+ auto fh = llfio::file_handle::temp_file().value();
+ fh.truncate(1).value();
+ auto sh = llfio::section_handle::section(fh).value();
+ sh.truncate(1ULL << 40ULL).value();
+ auto mh = llfio::map_handle::map(sh, 1ULL << 35ULL).value();
+ auto *addr1 = mh.address();
+ mh.truncate(1ULL << 36ULL).value();
+ {
+ auto *addr2 = mh.address();
+ BOOST_CHECK(addr1 == addr2);
+ }
+ mh.truncate(1ULL << 37ULL).value();
+ {
+ auto *addr2 = mh.address();
+ BOOST_CHECK(addr1 == addr2);
+ }
+ mh.truncate(1ULL << 38ULL).value();
+ {
+ auto *addr2 = mh.address();
+ BOOST_CHECK(addr1 == addr2);
+ }
+ mh.truncate(1ULL << 39ULL).value();
+ {
+ auto *addr2 = mh.address();
+ BOOST_CHECK(addr1 == addr2);
+ }
+ mh.truncate(1ULL << 40ULL).value();
+ {
+ auto *addr2 = mh.address();
+ BOOST_CHECK(addr1 == addr2);
+ }
+ auto begin = std::chrono::steady_clock::now();
+ mh.close().value();
+ sh.close().value();
+ fh.close().value();
+ auto end = std::chrono::steady_clock::now();
+ auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
+ std::cout << "Closing a map_handle (file) with six, appended, very large reservations up to 2^40 took " << diff << "ms." << std::endl;
+ BOOST_CHECK(diff < 250);
+}
+
+static inline void TestIssue09b()
+{
+ namespace llfio = LLFIO_V2_NAMESPACE;
+ auto mh = llfio::map_handle::reserve(1ULL << 43ULL).value();
+ const auto sixth = mh.length() / 6;
+ mh.commit({mh.address() + sixth * 0, 1}).value();
+ mh.commit({mh.address() + sixth * 1, 1}).value();
+ mh.commit({mh.address() + sixth * 2, 1}).value();
+ mh.commit({mh.address() + sixth * 3, 1}).value();
+ mh.commit({mh.address() + sixth * 4, 1}).value();
+ mh.commit({mh.address() + sixth * 5, 1}).value();
+ auto begin = std::chrono::steady_clock::now();
+ mh.do_not_store({mh.address(), mh.length()}).value();
+ auto end = std::chrono::steady_clock::now();
+ auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
+ std::cout << "Discard a map_handle (reservation) with six small commits within a reservation of 2^43 took " << diff << "ms." << std::endl;
+ BOOST_CHECK(diff < 250);
+}
+
+KERNELTEST_TEST_KERNEL(regression, llfio, issues, 9a,
+ "Tests issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows", TestIssue09a())
+KERNELTEST_TEST_KERNEL(regression, llfio, issues, 9b,
+ "Tests issue #9 map_handle and mapped_file_handle are very slow with large address reservations on Windows", TestIssue09b())