Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/llfio.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------doc/html8
-rw-r--r--include/afio/revision.hpp6
-rw-r--r--include/afio/v2.0/async_file_handle.hpp24
-rw-r--r--include/afio/v2.0/file_handle.hpp8
-rw-r--r--include/afio/v2.0/mapped_file_handle.hpp7
-rw-r--r--programs/key-value-store/Readme.md33
-rw-r--r--programs/key-value-store/include/key_value_store.hpp71
-rw-r--r--programs/key-value-store/main.cpp7
8 files changed, 94 insertions, 70 deletions
diff --git a/doc/html b/doc/html
-Subproject 1d80781cac7cb087435fb798c636896045b2daa
+Subproject 72361cf20e98412c97606335ad8ced81995c9d0
diff --git a/include/afio/revision.hpp b/include/afio/revision.hpp
index 7be09270..27af35b6 100644
--- a/include/afio/revision.hpp
+++ b/include/afio/revision.hpp
@@ -1,4 +1,4 @@
// Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time
-#define AFIO_PREVIOUS_COMMIT_REF 3afebec0fd8750ff8a83eebc4f8de0480dd67e09
-#define AFIO_PREVIOUS_COMMIT_DATE "2017-09-10 01:53:53 +00:00"
-#define AFIO_PREVIOUS_COMMIT_UNIQUE 3afebec0
+#define AFIO_PREVIOUS_COMMIT_REF 5145f65004a7834cb1fc0a57c4e8c53cf97e78ec
+#define AFIO_PREVIOUS_COMMIT_DATE "2017-09-11 00:32:11 +00:00"
+#define AFIO_PREVIOUS_COMMIT_UNIQUE 5145f650
diff --git a/include/afio/v2.0/async_file_handle.hpp b/include/afio/v2.0/async_file_handle.hpp
index d459d501..3a39d24c 100644
--- a/include/afio/v2.0/async_file_handle.hpp
+++ b/include/afio/v2.0/async_file_handle.hpp
@@ -35,6 +35,16 @@ AFIO_V2_NAMESPACE_EXPORT_BEGIN
/*! \class async_file_handle
\brief An asynchronous handle to an open something
+\note Unlike the others, `async_file_handle` defaults to `only_metadata` caching as that is the
+only use case where using async i/o makes sense given the other options below.
+
+<table>
+<tr><th></th><th>Cost of opening</th><th>Cost of i/o</th><th>Concurrency and Atomicity</th><th>Other remarks</th></tr>
+<tr><td>`file_handle`</td><td>Least</td><td>Syscall</td><td>POSIX guarantees (usually)</td><td>Least gotcha</td></tr>
+<tr><td>`async_file_handle`</td><td>More</td><td>Most (syscall + malloc/free + reactor)</td><td>POSIX guarantees (usually)</td><td>Makes no sense to use with cached i/o as it's a very expensive way to call `memcpy()`</td></tr>
+<tr><td>`mapped_file_handle`</td><td>Most</td><td>Least</td><td>None</td><td>Cannot be used with uncached i/o</td></tr>
+</table>
+
\todo Direct use of `calloc()` ought to be replaced with a user supplied STL allocator instance.
*/
class AFIO_DECL async_file_handle : public file_handle
@@ -107,7 +117,7 @@ public:
\errors Any of the values POSIX open() or CreateFile() can return.
*/
AFIO_MAKE_FREE_FUNCTION
- static AFIO_HEADERS_ONLY_MEMFUNC_SPEC result<async_file_handle> async_file(io_service &service, const path_handle &base, path_view_type _path, mode _mode = mode::read, creation _creation = creation::open_existing, caching _caching = caching::all, flag flags = flag::none) noexcept
+ static AFIO_HEADERS_ONLY_MEMFUNC_SPEC result<async_file_handle> async_file(io_service &service, const path_handle &base, path_view_type _path, mode _mode = mode::read, creation _creation = creation::open_existing, caching _caching = caching::only_metadata, flag flags = flag::none) noexcept
{
// Open it overlapped, otherwise no difference.
OUTCOME_TRY(v, file_handle::file(std::move(base), std::move(_path), std::move(_mode), std::move(_creation), std::move(_caching), flags | flag::overlapped));
@@ -118,14 +128,12 @@ public:
/*! Create an async file handle creating a randomly named file on a path.
The file is opened exclusively with `creation::only_if_not_exist` so it
- will never collide with nor overwrite any existing file. Note also
- that caching defaults to temporary which hints to the OS to only
- flush changes to physical storage as lately as possible.
+ will never collide with nor overwrite any existing file.
\errors Any of the values POSIX open() or CreateFile() can return.
*/
AFIO_MAKE_FREE_FUNCTION
- static inline result<async_file_handle> async_random_file(io_service &service, const path_handle &dirpath, mode _mode = mode::write, caching _caching = caching::temporary, flag flags = flag::none) noexcept
+ static inline result<async_file_handle> async_random_file(io_service &service, const path_handle &dirpath, mode _mode = mode::write, caching _caching = caching::only_metadata, flag flags = flag::none) noexcept
{
try
{
@@ -159,7 +167,7 @@ public:
\errors Any of the values POSIX open() or CreateFile() can return.
*/
AFIO_MAKE_FREE_FUNCTION
- static inline result<async_file_handle> async_temp_file(io_service &service, path_view_type name = path_view_type(), mode _mode = mode::write, creation _creation = creation::if_needed, caching _caching = caching::temporary, flag flags = flag::unlink_on_close) noexcept
+ static inline result<async_file_handle> async_temp_file(io_service &service, path_view_type name = path_view_type(), mode _mode = mode::write, creation _creation = creation::if_needed, caching _caching = caching::only_metadata, flag flags = flag::unlink_on_close) noexcept
{
OUTCOME_TRY(tempdirh, path_handle::path(temporary_files_directory()));
return name.empty() ? async_random_file(service, tempdirh, _mode, _caching, flags) : async_file(service, tempdirh, name, _mode, _creation, _caching, flags);
@@ -313,7 +321,7 @@ public:
AFIO_HEADERS_ONLY_VIRTUAL_SPEC io_result<buffers_type> read(io_request<buffers_type> reqs, deadline d = deadline()) noexcept override;
AFIO_HEADERS_ONLY_VIRTUAL_SPEC io_result<const_buffers_type> write(io_request<const_buffers_type> reqs, deadline d = deadline()) noexcept override;
-#if 0//def __cpp_coroutines
+#if 0 // def __cpp_coroutines
//! An
template<class BuffersType> struct awaitable
{
@@ -323,7 +331,7 @@ public:
#endif
};
-#if 0//def __cpp_coroutines
+#if 0 // def __cpp_coroutines
auto operator co_await(async_file_handle::awaitable &&a)
{
struct Awaiter {
diff --git a/include/afio/v2.0/file_handle.hpp b/include/afio/v2.0/file_handle.hpp
index b289dc7f..02f35d1f 100644
--- a/include/afio/v2.0/file_handle.hpp
+++ b/include/afio/v2.0/file_handle.hpp
@@ -56,6 +56,14 @@ class io_service;
/*! \class file_handle
\brief A handle to a regular file or device, kept data layout compatible with
async_file_handle.
+
+<table>
+<tr><th></th><th>Cost of opening</th><th>Cost of i/o</th><th>Concurrency and Atomicity</th><th>Other remarks</th></tr>
+<tr><td>`file_handle`</td><td>Least</td><td>Syscall</td><td>POSIX guarantees (usually)</td><td>Least gotcha</td></tr>
+<tr><td>`async_file_handle`</td><td>More</td><td>Most (syscall + malloc/free + reactor)</td><td>POSIX guarantees (usually)</td><td>Makes no sense to use with cached i/o as it's a very expensive way to call `memcpy()`</td></tr>
+<tr><td>`mapped_file_handle`</td><td>Most</td><td>Least</td><td>None</td><td>Cannot be used with uncached i/o</td></tr>
+</table>
+
*/
class AFIO_DECL file_handle : public io_handle, public fs_handle
{
diff --git a/include/afio/v2.0/mapped_file_handle.hpp b/include/afio/v2.0/mapped_file_handle.hpp
index ff1b8b5c..bd858580 100644
--- a/include/afio/v2.0/mapped_file_handle.hpp
+++ b/include/afio/v2.0/mapped_file_handle.hpp
@@ -34,6 +34,13 @@ AFIO_V2_NAMESPACE_EXPORT_BEGIN
/*! \class mapped_file_handle
\brief A memory mapped regular file or device
+<table>
+<tr><th></th><th>Cost of opening</th><th>Cost of i/o</th><th>Concurrency and Atomicity</th><th>Other remarks</th></tr>
+<tr><td>`file_handle`</td><td>Least</td><td>Syscall</td><td>POSIX guarantees (usually)</td><td>Least gotcha</td></tr>
+<tr><td>`async_file_handle`</td><td>More</td><td>Most (syscall + malloc/free + reactor)</td><td>POSIX guarantees (usually)</td><td>Makes no sense to use with cached i/o as it's a very expensive way to call `memcpy()`</td></tr>
+<tr><td>`mapped_file_handle`</td><td>Most</td><td>Least</td><td>None</td><td>Cannot be used with uncached i/o</td></tr>
+</table>
+
All the major OSs on all the major 64 bit CPU architectures now offer at least 127 Tb of address
spaces to user mode processes. This makes feasible mapping multi-Tb files directly into
memory, and thus avoiding the syscall overhead involved when reading and writing. This
diff --git a/programs/key-value-store/Readme.md b/programs/key-value-store/Readme.md
index e46020d4..4f84c3df 100644
--- a/programs/key-value-store/Readme.md
+++ b/programs/key-value-store/Readme.md
@@ -13,7 +13,7 @@ benchmarks fare.
- [x] Atomic append should issue gather buffers of `IOV_MAX`
- [x] Optionally use mmaps to extend smallfile instead of atomic appends.
Likely highly racy on Linux due to kernel bugs :)
-- [ ] Use mmaps for all smallfiles
+- [x] Use mmaps for all smallfiles
- Windows x64 provides 128Tb of address space
- Linux x64 provides 128Tb of address space
- On adoption of smallfile, would need to parse backwards from end to
@@ -25,27 +25,48 @@ into new small file, update index to use new small file)
index update.
## Benchmarks:
-- 1Kb values Windows with NTFS, no integrity, no durability:
+- 1Kb values Windows with NTFS, no integrity, no durability, commit appends, fetch read:
```
Inserting 1M key-value pairs ...
- Inserted at 157183 items per sec
+ Inserted at 195312 items per sec
Retrieving 1M key-value pairs ...
Fetched at 612745 items per sec
```
-- 1Kb values Windows with NTFS, integrity, no durability:
+- 1Kb values Windows with NTFS, integrity, no durability, commit appends, fetch read:
```
Inserting 1M key-value pairs ...
- Inserted at 145053 items per sec
+ Inserted at 188572 items per sec
Retrieving 1M key-value pairs ...
Fetched at 542005 items per sec
```
-- 1Kb values Windows with NTFS, integrity, durability:
+- 1Kb values Windows with NTFS, no integrity, no durability, commit appends, fetch mmaps:
+ ```
+ Inserting 1M key-value pairs ...
+ Inserted at 193012 items per sec
+ Retrieving 1M key-value pairs ...
+ Fetched at 2207505 items per sec
+ ```
+- 1Kb values Windows with NTFS, integrity, no durability, commit appends, fetch mmaps:
+ ```
+ Inserting 1M key-value pairs ...
+ Inserted at 185666 items per sec
+ Retrieving 1M key-value pairs ...
+ Fetched at 1438848 items per sec
+ ```
+- 1Kb values Windows with NTFS, integrity, durability, commit appends, fetch read:
```
Inserting 1M key-value pairs ...
Inserted at 32379 items per sec
Retrieving 1M key-value pairs ...
Fetched at 549752 items per sec
```
+- 1Kb values Windows with NTFS, integrity, durability, commit mmaps, fetch read:
+ ```
+ Inserting 1M key-value pairs ...
+ Inserted at 87282 items per sec
+ Retrieving 1M key-value pairs ...
+ Fetched at 549752 items per sec
+ ```
- 1Kb values Linux with ext4, no integrity, no durability:
```
diff --git a/programs/key-value-store/include/key_value_store.hpp b/programs/key-value-store/include/key_value_store.hpp
index 0150bab3..20c54635 100644
--- a/programs/key-value-store/include/key_value_store.hpp
+++ b/programs/key-value-store/include/key_value_store.hpp
@@ -161,14 +161,12 @@ namespace key_value_store
friend class transaction;
afio::file_handle _indexfile;
afio::file_handle _mysmallfile; // append only
- afio::map_handle _mysmallfilemapped;
afio::file_handle::extent_guard _indexfileguard, _smallfileguard;
size_t _mysmallfileidx{(size_t) -1};
struct
{
- std::vector<afio::file_handle> read;
- std::vector<afio::section_handle> section;
- std::vector<afio::map_handle> map;
+ std::vector<afio::file_handle> blocking;
+ std::vector<afio::mapped_file_handle> mapped;
} _smallfiles;
optional<index::open_hash_index> _index;
index::index *_indexheader{nullptr};
@@ -195,11 +193,11 @@ namespace key_value_store
mode
#endif
;
- if(_smallfiles.read.empty())
+ if(_smallfiles.blocking.empty())
{
// Open the small files, choosing the first unclaimed small file as "mine"
std::string name;
- _smallfiles.read.reserve(48);
+ _smallfiles.blocking.reserve(48);
for(size_t n = 0; n < 48; n++)
{
name = std::to_string(n);
@@ -218,8 +216,8 @@ namespace key_value_store
_mysmallfile.set_append_only(true).value();
_smallfileguard = std::move(smallfileclaimed).value();
_mysmallfileidx = n;
- _smallfiles.read.push_back(std::move(fh).value());
- _smallfileguard.set_handle(&_smallfiles.read.back());
+ _smallfiles.blocking.push_back(std::move(fh).value());
+ _smallfileguard.set_handle(&_smallfiles.blocking.back());
claimed = true;
}
}
@@ -229,7 +227,7 @@ namespace key_value_store
// We really need this to only have read only perms, otherwise any mmaps will extend the file ludicrously
fh = afio::file_handle::file(dir, name, afio::file_handle::mode::read, afio::file_handle::creation::open_existing, afio::file_handle::caching::all, afio::file_handle::flag::disable_prefetching);
#endif
- _smallfiles.read.push_back(std::move(fh).value());
+ _smallfiles.blocking.push_back(std::move(fh).value());
}
continue;
}
@@ -253,7 +251,7 @@ namespace key_value_store
// Set up the index, either r/w or read only with copy on write
afio::section_handle::flag mapflags = (mode == afio::file_handle::mode::write) ? afio::section_handle::flag::readwrite : (afio::section_handle::flag::read | afio::section_handle::flag::cow);
afio::section_handle sh = afio::section_handle::section(_indexfile, 0, mapflags).value();
- afio::file_handle::extent_type len = sh.length();
+ afio::file_handle::extent_type len = sh.length().value();
len -= sizeof(index::index);
len /= sizeof(index::open_hash_index::value_type);
size_t offset = sizeof(index::index);
@@ -375,11 +373,6 @@ namespace key_value_store
these circumstances, one can instead use a memory map of the end of the smallfile to append
the small objects. This can cause the kernel to not flush the map to storage until the map is
destroyed, but it also avoids the read-modify-write cycle with synchronous i/o.
-
- Be aware that memory mapping off the end of a file being modified has historically been
- full of quirks, race conditions and bugs in major OS kernels. Everything from data loss,
- data corruption, denial of service and root privilege exploits have been found from the
- unexpected interactions between memory maps and a moving end of file.
*/
void use_mmaps_for_commit(bool v)
{
@@ -408,24 +401,14 @@ namespace key_value_store
{
if(_mmap_over_extension != 0)
return;
- _smallfiles.section.reserve(_smallfiles.read.size());
- _smallfiles.map.reserve(_smallfiles.read.size());
- for(size_t n = 0; n < _smallfiles.read.size(); n++)
+ _smallfiles.mapped.reserve(_smallfiles.blocking.size());
+ for(size_t n = 0; n < _smallfiles.blocking.size(); n++)
{
- auto currentlength = _smallfiles.read[n].length().value();
- _smallfiles.section.push_back(afio::section_handle::section(_smallfiles.read[n], currentlength,
-#ifdef _WIN32
- // Yes this is confusing. But for some reason, Windows won't permit overextended views on read only sections.
- // And somehow or other, Windows permits read/write sections on read only files. Which makes zero sense.
- afio::section_handle::flag::readwrite
-#else
- afio::section_handle::flag::read
-#endif
- )
- .value());
- // The nocommit allows us to reserve all the address space now, and to fill in mapped data later as the file extends
- _smallfiles.map.push_back(afio::map_handle::map(_smallfiles.section.back(), currentlength + overextension, 0, afio::section_handle::flag::nocommit | afio::section_handle::flag::read).value());
+ auto currentlength = _smallfiles.blocking[n].length().value();
+ _smallfiles.mapped.push_back(afio::mapped_file_handle(std::move(_smallfiles.blocking[n]), currentlength + overextension));
}
+ _smallfileguard.set_handle(&_smallfiles.mapped[_mysmallfileidx]);
+ _smallfiles.blocking.clear();
_mmap_over_extension = overextension;
}
@@ -519,27 +502,27 @@ namespace key_value_store
return keyvalue_info(key);
}
size_t length = item.length, smallfilelength = _pad_length(length);
- if(item.value_identifier >= _smallfiles.read.size())
+ if(item.value_identifier >= _smallfiles.blocking.size() && item.value_identifier >= _smallfiles.mapped.size())
{
// TODO: Open newly created smallfiles
abort();
}
char *buffer;
- bool free_on_destruct = _smallfiles.map.empty() || !_smallfiles.map[item.value_identifier].is_valid();
+ bool free_on_destruct = _smallfiles.mapped.empty();
if(!free_on_destruct)
{
- if(item.value_offset * 64 > _smallfiles.section[item.value_identifier].length())
+ auto mappedlength = _smallfiles.mapped[item.value_identifier].length().value();
+ if(item.value_offset * 64 > mappedlength)
{
- auto oldsize = _smallfiles.section[item.value_identifier].length();
- auto newsize = _smallfiles.read[item.value_identifier].length().value();
- // Resize the memory section to the current size of the file
- _smallfiles.section[item.value_identifier].truncate(newsize).value();
- // Commit the newly mapped pages
- afio::map_handle::buffer_type bt{_smallfiles.map[item.value_identifier].address() + oldsize, newsize - oldsize};
- bt.data = afio::utils::round_up_to_page_size(bt.data);
- _smallfiles.map[item.value_identifier].commit(bt, afio::section_handle::flag::read).value();
+ // Update mapping to match the underlying file
+ mappedlength = _smallfiles.mapped[item.value_identifier].update_map().value();
+ if(mappedlength > _smallfiles.mapped[item.value_identifier].capacity())
+ {
+ // Need to remap into a new space
+ mappedlength = _smallfiles.mapped[item.value_identifier].reserve(mappedlength + _mmap_over_extension).value();
+ }
}
- buffer = _smallfiles.map[item.value_identifier].address() + item.value_offset * 64 - smallfilelength;
+ buffer = _smallfiles.mapped[item.value_identifier].address() + item.value_offset * 64 - smallfilelength;
}
else
{
@@ -548,7 +531,7 @@ namespace key_value_store
{
throw std::bad_alloc();
}
- _smallfiles.read[item.value_identifier].read(item.value_offset * 64 - smallfilelength, buffer, smallfilelength).value();
+ _smallfiles.blocking[item.value_identifier].read(item.value_offset * 64 - smallfilelength, buffer, smallfilelength).value();
}
index::value_tail *vt = reinterpret_cast<index::value_tail *>(buffer + smallfilelength - sizeof(index::value_tail));
if(_indexheader->contents_hashed || _indexheader->key_is_hash_of_value)
diff --git a/programs/key-value-store/main.cpp b/programs/key-value-store/main.cpp
index 26eec4bc..16bd73b0 100644
--- a/programs/key-value-store/main.cpp
+++ b/programs/key-value-store/main.cpp
@@ -227,7 +227,6 @@ int main()
}
{
key_value_store::basic_key_value_store store("teststore", 2000000);
- store.use_mmaps_for_fetch();
benchmark(store, "no integrity, no durability, commit appends");
}
{
@@ -262,9 +261,8 @@ int main()
}
{
key_value_store::basic_key_value_store store("teststore", 2000000);
- store.use_mmaps_for_commit(true);
store.use_mmaps_for_fetch();
- benchmark(store, "no integrity, no durability, commit mmaps, fetch mmaps");
+ benchmark(store, "no integrity, no durability, commit appends, fetch mmaps");
}
{
std::error_code ec;
@@ -272,9 +270,8 @@ int main()
}
{
key_value_store::basic_key_value_store store("teststore", 2000000, true);
- store.use_mmaps_for_commit(true);
store.use_mmaps_for_fetch();
- benchmark(store, "integrity, no durability, commit mmaps, fetch mmaps");
+ benchmark(store, "integrity, no durability, commit appends, fetch mmaps");
}
{
std::error_code ec;