diff options
author | Niall Douglas (s [underscore] sourceforge {at} nedprod [dot] com) <spamtrap@nedprod.com> | 2017-09-12 02:54:04 +0300 |
---|---|---|
committer | Niall Douglas (s [underscore] sourceforge {at} nedprod [dot] com) <spamtrap@nedprod.com> | 2017-09-12 02:54:04 +0300 |
commit | 5d87110ffb26e809d14d0caed10c15b12959179f (patch) | |
tree | 2a708cca85505629af60fbe45d7f50474595479c /programs | |
parent | 97dcffd3b62dc03d9e7ad2976a83d4468e21c154 (diff) |
Toy key value store now used mapped_file_handle for inserts too. Performance is amazing.
Diffstat (limited to 'programs')
-rw-r--r-- | programs/key-value-store/Readme.md | 50 | ||||
-rw-r--r-- | programs/key-value-store/include/key_value_store.hpp | 127 | ||||
-rw-r--r-- | programs/key-value-store/main.cpp | 36 |
3 files changed, 75 insertions, 138 deletions
diff --git a/programs/key-value-store/Readme.md b/programs/key-value-store/Readme.md index 4f84c3df..8fb34736 100644 --- a/programs/key-value-store/Readme.md +++ b/programs/key-value-store/Readme.md @@ -14,10 +14,7 @@ benchmarks fare. - [x] Optionally use mmaps to extend smallfile instead of atomic appends. Likely highly racy on Linux due to kernel bugs :) - [x] Use mmaps for all smallfiles - - Windows x64 provides 128Tb of address space - - Linux x64 provides 128Tb of address space - - On adoption of smallfile, would need to parse backwards from end to - find last used record. +- [ ] Does this toy store actually work with multiple concurrent users? - [ ] Online free space consolidation (copy early still in use records into new small file, update index to use new small file) - [ ] Per 1Mb free space consolidated, punch hole @@ -25,47 +22,33 @@ into new small file, update index to use new small file) index update. ## Benchmarks: -- 1Kb values Windows with NTFS, no integrity, no durability, commit appends, fetch read: +- 1Kb values Windows with NTFS, no integrity, no durability, read + append: ``` Inserting 1M key-value pairs ... Inserted at 195312 items per sec Retrieving 1M key-value pairs ... Fetched at 612745 items per sec ``` -- 1Kb values Windows with NTFS, integrity, no durability, commit appends, fetch read: +- 1Kb values Windows with NTFS, integrity, no durability, read + append: ``` Inserting 1M key-value pairs ... Inserted at 188572 items per sec Retrieving 1M key-value pairs ... Fetched at 542005 items per sec ``` -- 1Kb values Windows with NTFS, no integrity, no durability, commit appends, fetch mmaps: +- 1Kb values Windows with NTFS, no integrity, no durability, mmaps: ``` Inserting 1M key-value pairs ... - Inserted at 193012 items per sec + Inserted at 518403 items per sec Retrieving 1M key-value pairs ... - Fetched at 2207505 items per sec + Fetched at 2192982 items per sec ``` -- 1Kb values Windows with NTFS, integrity, no durability, commit appends, fetch mmaps: +- 1Kb values Windows with NTFS, integrity, no durability, mmaps: ``` Inserting 1M key-value pairs ... - Inserted at 185666 items per sec + Inserted at 455996 items per sec Retrieving 1M key-value pairs ... - Fetched at 1438848 items per sec - ``` -- 1Kb values Windows with NTFS, integrity, durability, commit appends, fetch read: - ``` - Inserting 1M key-value pairs ... - Inserted at 32379 items per sec - Retrieving 1M key-value pairs ... - Fetched at 549752 items per sec - ``` -- 1Kb values Windows with NTFS, integrity, durability, commit mmaps, fetch read: - ``` - Inserting 1M key-value pairs ... - Inserted at 87282 items per sec - Retrieving 1M key-value pairs ... - Fetched at 549752 items per sec + Fetched at 1144164 items per sec ``` - 1Kb values Linux with ext4, no integrity, no durability: @@ -83,14 +66,21 @@ index update. Fetched at 1519756 items per sec ``` -- 16 byte values Windows with NTFS, no integrity, no durability: +- 16 byte values Windows with NTFS, no integrity, no durability, read + append: + ``` + Inserting 1M key-value pairs ... + Inserted at 214178 items per sec + Retrieving 1M key-value pairs ... + Fetched at 660066 items per sec + ``` +- 16 byte values Windows with NTFS, no integrity, no durability, mmaps: ``` Inserting 1M key-value pairs ... - Inserted at 259201 items per sec + Inserted at 938967 items per sec Retrieving 1M key-value pairs ... - Fetched at 700770 items per sec + Fetched at 4739336 items per sec ``` -- 16 byte values Linux with ext4, no integrity, no durability: +- 16 byte values Linux with ext4, no integrity, no durability, read + append: ``` Inserting 1M key-value pairs ... Inserted at 1118568 items per sec diff --git a/programs/key-value-store/include/key_value_store.hpp b/programs/key-value-store/include/key_value_store.hpp index 20c54635..2035fd48 100644 --- a/programs/key-value-store/include/key_value_store.hpp +++ b/programs/key-value-store/include/key_value_store.hpp @@ -171,7 +171,6 @@ namespace key_value_store optional<index::open_hash_index> _index; index::index *_indexheader{nullptr}; std::mutex _commitlock; - bool _use_mmaps_for_commit{false}; size_t _mmap_over_extension{0}; static constexpr afio::file_handle::extent_type _indexinuseoffset = INT64_MAX; @@ -362,42 +361,14 @@ namespace key_value_store } } - /*! \brief Sets whether to use mmaps for writing small objects during commit. - - Normally, this implementation atomic-appends small objects to the smallfile via gather write. - This is simple, safe, and with reasonable performance most of the time. - - However in certain circumstances e.g. with synchronous i/o enabled, atomic-append can cause lots of - read-modify-write cycles on the storage device. This can become unusably slow if the values you - write are small, or your OS doesn't implement gather writes for file i/o (e.g. Windows). For - these circumstances, one can instead use a memory map of the end of the smallfile to append - the small objects. This can cause the kernel to not flush the map to storage until the map is - destroyed, but it also avoids the read-modify-write cycle with synchronous i/o. - */ - void use_mmaps_for_commit(bool v) - { - if(_use_mmaps_for_commit == v) - { - return; - } - if(v && !_use_mmaps_for_commit) - { - _mysmallfile.set_append_only(false).value(); - } - else - { - _mysmallfile.set_append_only(true).value(); - } - _use_mmaps_for_commit = v; - } - /*! \brief Sets whether to use mmaps for fetches. + /*! \brief Sets whether to use mmaps for fetches and appends. Requires lots of virtual address space as the entire of all the small files is mapped into memory with additional `overextension`. Also requires a kernel page cache implementation which correctly updates appends to the smallfile into the mapped view without `msync(MS_INVALIDATE)`. */ - void use_mmaps_for_fetch(size_t overextension = 1024ULL * 1024 * 1024) + void use_mmaps(size_t overextension = 1024ULL * 1024 * 1024) { if(_mmap_over_extension != 0) return; @@ -766,7 +737,7 @@ namespace key_value_store } bool items_written = false; - if(_parent->_use_mmaps_for_commit) + if(!_parent->_smallfiles.mapped.empty()) { afio::file_handle::extent_type original_length = _parent->_mysmallfile.length().value(); // How big does this map need to be? @@ -779,59 +750,53 @@ namespace key_value_store } if(totalcommitsize >= 4096) { + auto &mfh = _parent->_smallfiles.mapped[_parent->_mysmallfileidx]; + afio::file_handle::extent_type new_length = original_length + totalcommitsize; + if(new_length > mfh.capacity()) { -#ifdef _WIN32 - afio::file_handle::extent_type mapbegin = original_length & ~65535; -#else - afio::file_handle::extent_type mapbegin = afio::utils::round_down_to_page_size(original_length); -#endif - afio::file_handle::extent_type mapend = afio::utils::round_up_to_page_size(original_length + totalcommitsize); - _parent->_mysmallfile.truncate(mapend).value(); - auto sh = afio::section_handle::section(_parent->_mysmallfile, mapend).value(); - auto mh = afio::map_handle::map(sh, mapend - mapbegin, mapbegin).value(); - char *value = mh.address() + (original_length - mapbegin); - afio::file_handle::extent_type value_offset = original_length; - for(size_t n = 0; n < _items.size(); n++) + mfh.reserve(new_length + _parent->_mmap_over_extension).value(); + } + mfh.truncate(new_length).value(); + char *value = mfh.address() + original_length; + afio::file_handle::extent_type value_offset = original_length; + for(size_t n = 0; n < _items.size(); n++) + { + toupdate_type &thisupdate = toupdate[n]; + const transaction::_item &item = _items[n]; + size_t totalwrite = 0; + if(thisupdate.removal) { - toupdate_type &thisupdate = toupdate[n]; - const transaction::_item &item = _items[n]; - size_t totalwrite = 0; - if(thisupdate.removal) - { - totalwrite = 64; - } - else - { - memcpy(value, item.towrite->data(), item.towrite->size()); - totalwrite = _parent->_pad_length(item.towrite->size()); - } - index::value_tail *vt = reinterpret_cast<index::value_tail *>(value + totalwrite - sizeof(index::value_tail)); - vt->key = thisupdate.key; - vt->transaction_counter = this_transaction_counter; - if(thisupdate.removal) - { - vt->length = (uint64_t) -1; // this key is being deleted - memset(&thisupdate.history_item, 0, sizeof(thisupdate.history_item)); - } - else - { - vt->length = item.towrite->size(); - index::value_history::item &history_item = thisupdate.history_item; - history_item.transaction_counter = this_transaction_counter; - history_item.value_offset = (value_offset + totalwrite) / 64; - history_item.value_identifier = _parent->_mysmallfileidx; - history_item.length = vt->length; - } - if(_parent->_indexheader->contents_hashed) - { - vt->hash = QUICKCPPLIB_NAMESPACE::algorithm::hash::fast_hash::hash(value, totalwrite); - } - value += totalwrite; - value_offset += totalwrite; + totalwrite = 64; + } + else + { + memcpy(value, item.towrite->data(), item.towrite->size()); + totalwrite = _parent->_pad_length(item.towrite->size()); + } + index::value_tail *vt = reinterpret_cast<index::value_tail *>(value + totalwrite - sizeof(index::value_tail)); + vt->key = thisupdate.key; + vt->transaction_counter = this_transaction_counter; + if(thisupdate.removal) + { + vt->length = (uint64_t) -1; // this key is being deleted + memset(&thisupdate.history_item, 0, sizeof(thisupdate.history_item)); + } + else + { + vt->length = item.towrite->size(); + index::value_history::item &history_item = thisupdate.history_item; + history_item.transaction_counter = this_transaction_counter; + history_item.value_offset = (value_offset + totalwrite) / 64; + history_item.value_identifier = _parent->_mysmallfileidx; + history_item.length = vt->length; + } + if(_parent->_indexheader->contents_hashed) + { + vt->hash = QUICKCPPLIB_NAMESPACE::algorithm::hash::fast_hash::hash(value, totalwrite); } + value += totalwrite; + value_offset += totalwrite; } - // Place maximum extent back where it is supposed to be - _parent->_mysmallfile.truncate(original_length + totalcommitsize).value(); items_written = true; } } diff --git a/programs/key-value-store/main.cpp b/programs/key-value-store/main.cpp index 16bd73b0..2ed4d1c7 100644 --- a/programs/key-value-store/main.cpp +++ b/programs/key-value-store/main.cpp @@ -96,7 +96,7 @@ void benchmark(key_value_store::basic_key_value_store &store, const char *desc) std::cout << " Generating 1M key-value pairs ..." << std::endl; for(size_t n = 0; n < 1000000; n++) { - std::string randomvalue = AFIO_V2_NAMESPACE::utils::random_string(1024 / 2); + std::string randomvalue = AFIO_V2_NAMESPACE::utils::random_string(64 / 2); values.push_back({100 + n, randomvalue}); } } @@ -227,7 +227,7 @@ int main() } { key_value_store::basic_key_value_store store("teststore", 2000000); - benchmark(store, "no integrity, no durability, commit appends"); + benchmark(store, "no integrity, no durability, read + append"); } { std::error_code ec; @@ -235,7 +235,7 @@ int main() } { key_value_store::basic_key_value_store store("teststore", 2000000, true); - benchmark(store, "integrity, no durability, commit appends"); + benchmark(store, "integrity, no durability, read + append"); } { std::error_code ec; @@ -243,8 +243,8 @@ int main() } { key_value_store::basic_key_value_store store("teststore", 2000000); - store.use_mmaps_for_commit(true); - benchmark(store, "no integrity, no durability, commit mmaps"); + store.use_mmaps(); + benchmark(store, "no integrity, no durability, mmaps"); } { std::error_code ec; @@ -252,26 +252,8 @@ int main() } { key_value_store::basic_key_value_store store("teststore", 2000000, true); - store.use_mmaps_for_commit(true); - benchmark(store, "integrity, no durability, commit mmaps"); - } - { - std::error_code ec; - AFIO_V2_NAMESPACE::filesystem::remove_all("teststore", ec); - } - { - key_value_store::basic_key_value_store store("teststore", 2000000); - store.use_mmaps_for_fetch(); - benchmark(store, "no integrity, no durability, commit appends, fetch mmaps"); - } - { - std::error_code ec; - AFIO_V2_NAMESPACE::filesystem::remove_all("teststore", ec); - } - { - key_value_store::basic_key_value_store store("teststore", 2000000, true); - store.use_mmaps_for_fetch(); - benchmark(store, "integrity, no durability, commit appends, fetch mmaps"); + store.use_mmaps(); + benchmark(store, "integrity, no durability, mmaps"); } { std::error_code ec; @@ -279,8 +261,8 @@ int main() } { key_value_store::basic_key_value_store store("teststore", 2000000, true, AFIO_V2_NAMESPACE::file_handle::mode::write, AFIO_V2_NAMESPACE::file_handle::caching::reads); - store.use_mmaps_for_commit(true); - benchmark(store, "integrity, durability, commit mmaps"); + store.use_mmaps(); + benchmark(store, "integrity, durability, mmaps"); } } catch(const std::exception &e) |