Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/llfio.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llfio/revision.hpp6
-rw-r--r--include/llfio/v2.0/detail/impl/fast_random_file_handle.ipp88
-rw-r--r--include/llfio/v2.0/fast_random_file_handle.hpp71
-rw-r--r--test/tests/fast_random_file_handle.cpp39
4 files changed, 161 insertions, 43 deletions
diff --git a/include/llfio/revision.hpp b/include/llfio/revision.hpp
index ee51dfb3..c6402746 100644
--- a/include/llfio/revision.hpp
+++ b/include/llfio/revision.hpp
@@ -1,4 +1,4 @@
// Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time
-#define LLFIO_PREVIOUS_COMMIT_REF 5f576c797bfbadfbcc9092ce7e1b98089156eebc
-#define LLFIO_PREVIOUS_COMMIT_DATE "2018-10-18 08:50:15 +00:00"
-#define LLFIO_PREVIOUS_COMMIT_UNIQUE 5f576c79
+#define LLFIO_PREVIOUS_COMMIT_REF d5aa6ed95238f51a9184308f55b552fba836b50e
+#define LLFIO_PREVIOUS_COMMIT_DATE "2018-10-19 15:59:03 +00:00"
+#define LLFIO_PREVIOUS_COMMIT_UNIQUE d5aa6ed9
diff --git a/include/llfio/v2.0/detail/impl/fast_random_file_handle.ipp b/include/llfio/v2.0/detail/impl/fast_random_file_handle.ipp
index 489e2c04..ca042427 100644
--- a/include/llfio/v2.0/detail/impl/fast_random_file_handle.ipp
+++ b/include/llfio/v2.0/detail/impl/fast_random_file_handle.ipp
@@ -24,19 +24,6 @@ Distributed under the Boost Software License, Version 1.0.
#include "../../fast_random_file_handle.hpp"
-#ifdef __has_include
-#if __has_include("../../quickcpplib/include/algorithm/hash.hpp")
-#include "../../quickcpplib/include/algorithm/hash.hpp"
-#else
-#include "quickcpplib/include/algorithm/hash.hpp"
-#endif
-#elif __PCPP_ALWAYS_TRUE__
-#include "quickcpplib/include/algorithm/hash.hpp"
-#else
-#include "../../quickcpplib/include/algorithm/hash.hpp"
-#endif
-
-
LLFIO_V2_NAMESPACE_EXPORT_BEGIN
fast_random_file_handle::io_result<fast_random_file_handle::buffers_type> fast_random_file_handle::read(io_request<buffers_type> reqs, deadline /* unused */) noexcept
@@ -50,7 +37,7 @@ fast_random_file_handle::io_result<fast_random_file_handle::buffers_type> fast_r
}
return std::move(reqs.buffers);
}
- extent_type hashoffset, togo = _length - reqs.offset;
+ extent_type togo = _length - reqs.offset;
// Fill the scatter buffers
for(auto &buffer : reqs.buffers)
{
@@ -62,10 +49,10 @@ fast_random_file_handle::io_result<fast_random_file_handle::buffers_type> fast_r
{
for(extent_type i = 0; i < buffer.size();)
{
- hashoffset = reqs.offset >> 4; // add this offset to the state
- _iv[0] = hashoffset;
- auto hash = QUICKCPPLIB_NAMESPACE::algorithm::hash::fast_hash::hash((const char *) _iv, 16);
- extent_type thisblockoffset = reqs.offset - (hashoffset << 4), thisblocklen = 16 - thisblockoffset;
+ // How much can we do at once?
+ auto hashoffset = reqs.offset >> 2; // place this offset into the state
+ auto thisblockoffset = reqs.offset - (hashoffset << 2); // offset into our buffer due to request offset misalignment
+ extent_type thisblocklen = 64; // maximum possible block this iteration
if(thisblocklen > buffer.size() - i)
{
thisblocklen = buffer.size() - i;
@@ -74,7 +61,70 @@ fast_random_file_handle::io_result<fast_random_file_handle::buffers_type> fast_r
{
thisblocklen = togo;
}
- memcpy(buffer.data() + i, hash.as_bytes + thisblockoffset, thisblocklen);
+
+ struct _p
+ {
+ uint32_t hash[16];
+ } blk, *p = &blk;
+ // If not copying partial buffers
+ if(thisblockoffset == 0)
+ {
+ // If destination is on 4 byte multiple, we can write direct
+ if((((uintptr_t) buffer.data() + i) & 3) == 0)
+ p = (_p *) (buffer.data() + i);
+ }
+ if(thisblocklen == 64)
+ {
+ auto &hash = p->hash;
+ prng __prngs[16] = {_prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng, _prng};
+ hash[0] = __prngs[0](hashoffset + 0);
+ hash[1] = __prngs[1](hashoffset + 1);
+ hash[2] = __prngs[2](hashoffset + 2);
+ hash[3] = __prngs[3](hashoffset + 3);
+ hash[4] = __prngs[4](hashoffset + 4);
+ hash[5] = __prngs[5](hashoffset + 5);
+ hash[6] = __prngs[6](hashoffset + 6);
+ hash[7] = __prngs[7](hashoffset + 7);
+ hash[8] = __prngs[8](hashoffset + 8);
+ hash[9] = __prngs[9](hashoffset + 9);
+ hash[10] = __prngs[10](hashoffset + 10);
+ hash[11] = __prngs[11](hashoffset + 11);
+ hash[12] = __prngs[12](hashoffset + 12);
+ hash[13] = __prngs[13](hashoffset + 13);
+ hash[14] = __prngs[14](hashoffset + 14);
+ hash[15] = __prngs[15](hashoffset + 15);
+ if(thisblocklen > 64 - thisblockoffset)
+ {
+ thisblocklen = 64 - thisblockoffset;
+ }
+ }
+ else if(thisblocklen == 16)
+ {
+ auto &hash = p->hash;
+ prng __prngs[4] = {_prng, _prng, _prng, _prng};
+ hash[0] = __prngs[0](hashoffset + 0);
+ hash[1] = __prngs[1](hashoffset + 1);
+ hash[2] = __prngs[2](hashoffset + 2);
+ hash[3] = __prngs[3](hashoffset + 3);
+ if(thisblocklen > 16 - thisblockoffset)
+ {
+ thisblocklen = 16 - thisblockoffset;
+ }
+ }
+ else
+ {
+ auto &hash = p->hash;
+ auto __prng(_prng);
+ hash[0] = __prng(hashoffset);
+ if(thisblocklen > 4 - thisblockoffset)
+ {
+ thisblocklen = 4 - thisblockoffset;
+ }
+ }
+ if(p == &blk)
+ {
+ memcpy(buffer.data() + i, ((const char *) p) + thisblockoffset, thisblocklen);
+ }
reqs.offset += thisblocklen;
i += thisblocklen;
togo -= thisblocklen;
diff --git a/include/llfio/v2.0/fast_random_file_handle.hpp b/include/llfio/v2.0/fast_random_file_handle.hpp
index 9467f80d..fb51024f 100644
--- a/include/llfio/v2.0/fast_random_file_handle.hpp
+++ b/include/llfio/v2.0/fast_random_file_handle.hpp
@@ -27,6 +27,8 @@ Distributed under the Boost Software License, Version 1.0.
#include "file_handle.hpp"
+#include "quickcpplib/include/algorithm/small_prng.hpp"
+
//! \file fast_random_file_handle.hpp Provides `fast_random_file_handle`.
LLFIO_V2_NAMESPACE_EXPORT_BEGIN
@@ -35,18 +37,18 @@ LLFIO_V2_NAMESPACE_EXPORT_BEGIN
\brief A handle to synthesised, non-cryptographic, pseudo-random data.
This implementation of file handle provides read-only file data of random bits
-based on Bob Jenkin's 128-bit SpookyHash (http://burtleburtle.net/bob/hash/spooky.html).
-It works by initialising the hash state from the 128-bit seed you provide at construction,
-and then for each 16 byte block it copies the 304 bytes of hash state, adds the scatter buffer
-offset divided by 16 to the hash, generating a unique 128-bit hash for that seed and
-file offset. SpookyHash is not a cryptographically secure hash, so do not use this
-class for anything secure. However it certainly ought to produce very random data
-indeed, as SpookyHash is an excellent quality fast hash.
+based on Bob Jenkins' 32-bit JSF PRNG (http://burtleburtle.net/bob/rand/smallprng.html).
+It works by initialising the prng state from the seed you provide at construction,
+and then for each 4 byte block it copies the 16 bytes of prng state, overwrites the
+first eight bytes with the offset divided by 4, and performs a PRNG round to generate
+a fairly unique number. As there are eight bytes of randomness being mixed with eight
+bytes of counter, this will not be a particularly random stream, but it's probably not
+awful either.
Note that writes to this handle are permitted if it was opened with write permission,
but writes have no effect.
-The use for a file handle full of very random data may not be obvious. The first is
+The use for a file handle full of random data may not be obvious. The first is
to obfuscate another file's data using `algorithm::xor_file_handle`. The second is
for mock ups in testing, where this file handle stands in for some other (large) file,
and you are testing throughput or latency in processing code.
@@ -55,9 +57,22 @@ The third is for unit testing randomly corrupted file data. `algorithm::mix_file
can randomly mix scatter gather buffers from this file handle into another file handle
in order to test how well handling code copes with random data corruption.
-VS2017:
-- 416Mb/sec static function
-- 539Mb/sec preinit, add, finalise
+## Benchmarks:
+
+On a 3.1Ghz Intel Skylake CPU where `memcpy()` can do ~12Gb/sec:
+
+- GCC7: 4659 Mb/sec
+- VS2017: 3653 Mb/sec
+
+The current implementation spots when it can do 16x simultaneous PRNG rounds, and thus
+can fill a cache line at a time. The Skylake CPU used to benchmark the code dispatches
+around four times the throughput with this, however there is likely still performance
+left on the table.
+
+If someone were bothered to rewrite the JSF PRNG into SIMD, it is possible one could
+approach `memcpy()` in performance. One would probably need to use AVX-512 however,
+as the JSF PRNG makes heavy use of bit rotation, which is slow before AVX-512 as it
+must be emulated with copious bit shifting and masking.
*/
class fast_random_file_handle : public file_handle
{
@@ -80,7 +95,27 @@ public:
template <class T> using io_result = io_handle::io_result<T>;
protected:
- uint64_t _iv[12]{}; // sc_blockSize from SpookyHash
+ struct prng : public QUICKCPPLIB_NAMESPACE::algorithm::small_prng::small_prng
+ {
+ using _base = QUICKCPPLIB_NAMESPACE::algorithm::small_prng::small_prng;
+ prng() = default;
+ // Construct an instance with `seed`
+ explicit prng(span<const byte> seed) noexcept
+ {
+ a = 0xf1ea5eed;
+ b = c = d = 0xdeadbeef;
+ memcpy(&a, seed.data(), std::min(seed.size(), sizeof(*this)));
+ for(size_t i = 0; i < 20; ++i)
+ _base::operator()();
+ }
+ // Return randomness for a given extent
+ _base::value_type operator()(extent_type offset) noexcept
+ {
+ a = offset & 0xffffffff;
+ b = (offset >> 32) & 0xffffffff;
+ return _base::operator()();
+ }
+ } _prng;
extent_type _length{0};
result<void> _perms_check() const noexcept
@@ -94,16 +129,16 @@ protected:
public:
//! Default constructor
- constexpr fast_random_file_handle() {} // NOLINT
- //! Constructor. Seed can be of up to 88 bytes.
+ fast_random_file_handle() = default;
+ //! Constructor. Seed is not much use past sixteen bytes.
fast_random_file_handle(extent_type length, span<const byte> seed)
- : _length(length)
+ : _prng(seed)
+ , _length(length)
{
- memcpy(&_iv[1], seed.begin(), (seed.size() < (sizeof(_iv) - sizeof(_iv[0]))) ? seed.size() : (sizeof(_iv) - sizeof(_iv[0])));
}
//! Implicit move construction of fast_random_file_handle permitted
- fast_random_file_handle(fast_random_file_handle &&o) noexcept : _length(o._length) { memcpy(_iv, o._iv, sizeof(_iv)); }
+ fast_random_file_handle(fast_random_file_handle &&o) noexcept : _prng(o._prng), _length(o._length) {}
//! No copy construction (use `clone()`)
fast_random_file_handle(const fast_random_file_handle &) = delete;
//! Move assignment of fast_random_file_handle permitted
@@ -136,7 +171,7 @@ public:
{
return errc::invalid_argument;
}
- byte _seed[88];
+ byte _seed[16];
if(seed.empty())
{
utils::random_fill((char *) _seed, sizeof(_seed));
diff --git a/test/tests/fast_random_file_handle.cpp b/test/tests/fast_random_file_handle.cpp
index acd8ceb0..d528c44f 100644
--- a/test/tests/fast_random_file_handle.cpp
+++ b/test/tests/fast_random_file_handle.cpp
@@ -74,10 +74,43 @@ static inline void TestFastRandomFileHandlePerformance()
memset(store.begin(), 1, store.size());
fast_random_file_handle h = fast_random_file_handle::fast_random_file(testbytes).value();
auto begin = std::chrono::high_resolution_clock::now();
- h.read(0, {{store.begin(), store.size()}}).value();
+ while(std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - begin).count() < 1)
+ ;
+ QUICKCPPLIB_NAMESPACE::algorithm::small_prng::small_prng prng;
+ begin = std::chrono::high_resolution_clock::now();
+ for(size_t n = 0; n < 10; n++)
+ {
+ for(size_t i = 0; i < store.size(); i += 64)
+ {
+ *(uint32_t *) (&store[i + 0]) = prng();
+ *(uint32_t *) (&store[i + 4]) = prng();
+ *(uint32_t *) (&store[i + 8]) = prng();
+ *(uint32_t *) (&store[i + 12]) = prng();
+ *(uint32_t *) (&store[i + 16]) = prng();
+ *(uint32_t *) (&store[i + 20]) = prng();
+ *(uint32_t *) (&store[i + 24]) = prng();
+ *(uint32_t *) (&store[i + 28]) = prng();
+ *(uint32_t *) (&store[i + 32]) = prng();
+ *(uint32_t *) (&store[i + 36]) = prng();
+ *(uint32_t *) (&store[i + 40]) = prng();
+ *(uint32_t *) (&store[i + 44]) = prng();
+ *(uint32_t *) (&store[i + 48]) = prng();
+ *(uint32_t *) (&store[i + 52]) = prng();
+ *(uint32_t *) (&store[i + 56]) = prng();
+ *(uint32_t *) (&store[i + 60]) = prng();
+ }
+ }
auto end = std::chrono::high_resolution_clock::now();
- auto diff = std::chrono::duration_cast<std::chrono::microseconds>(end - begin);
- std::cout << "fast_random_file_handle produces randomness at " << ((testbytes / 1024.0 / 1024.0) / (diff.count() / 1000000.0)) << " Mb/sec" << std::endl;
+ auto diff1 = std::chrono::duration_cast<std::chrono::microseconds>(end - begin);
+ begin = std::chrono::high_resolution_clock::now();
+ for(size_t n = 0; n < 10; n++)
+ {
+ h.read(0, {{store.begin(), store.size()}}).value();
+ }
+ end = std::chrono::high_resolution_clock::now();
+ auto diff2 = std::chrono::duration_cast<std::chrono::microseconds>(end - begin);
+ std::cout << "small_prng produces randomness at " << ((testbytes / 1024.0 / 1024.0) / (diff1.count() / 10000000.0)) << " Mb/sec" << std::endl;
+ std::cout << "fast_random_file_handle produces randomness at " << ((testbytes / 1024.0 / 1024.0) / (diff2.count() / 10000000.0)) << " Mb/sec" << std::endl;
}
KERNELTEST_TEST_KERNEL(integration, llfio, fast_random_file_handle, works, "Tests that fast random file handle works as expected", TestFastRandomFileHandleWorks())