diff options
author | Maxim Pimenov <m@maps.me> | 2019-01-24 16:27:28 +0300 |
---|---|---|
committer | Maxim Pimenov <m@maps.me> | 2019-01-30 12:48:07 +0300 |
commit | 025fe3ad7774d33a5179810475bdf1737cbf6de0 (patch) | |
tree | 8ce5484b30ff0ffcfa34c948d487050d187e1d70 | |
parent | a6128cc5255f35c4076bea5a6f42c1bfacb006aa (diff) |
base::Skew instead of divsufsort.diffs-slower-exp
Slowdown is around 20x but the code is much simpler.
4 files changed, 26 insertions, 10 deletions
diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff.h b/3party/bsdiff-courgette/bsdiff/bsdiff.h index c82659bc7f..88e29ebb19 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff.h +++ b/3party/bsdiff-courgette/bsdiff/bsdiff.h @@ -83,6 +83,7 @@ #include "base/checked_cast.hpp" #include "base/logging.hpp" #include "base/string_utils.hpp" +#include "base/suffix_array.hpp" #include "base/timer.hpp" #include <array> @@ -110,6 +111,17 @@ private: MemWriter<std::vector<uint8_t>> m_writer; }; +//inline divsuf::saint_t BuildSuffixArray(const divsuf::sauchar_t *T, divsuf::saidx_t *SA, divsuf::saidx_t n) +inline int32_t BuildSuffixArray(const uint8_t *T, size_t *SA, int32_t n) +{ + SA[0] = n; // Manually add the empty string suffix. +// return divsuf::divsufsort(T, SA + 1, n); + + base::Skew(n, T, SA + 1); + + return 0; +} + inline uint32_t CalculateCrc(const uint8_t* buffer, size_t size) { // Calculate Crc by calling CRC method in zlib const auto size32 = base::checked_cast<uint32_t>(size); @@ -143,11 +155,12 @@ BSDiffStatus CreateBinaryPatch(OldReader & old_reader, old_source.Read(old_buf.data(), old_buf.size()); const uint8_t * old = old_buf.data(); - - std::vector<divsuf::saidx_t> suffix_array(old_size + 1); + std::vector<size_t> suffix_array(old_size + 1); base::Timer suf_sort_timer; - divsuf::saint_t result = divsuf::divsufsort_include_empty(old, suffix_array.data(), old_size); + divsuf::saint_t result = BuildSuffixArray( + old, suffix_array.data(), old_size); + LOG(LINFO, ("Done divsufsort", suf_sort_timer.ElapsedSeconds())); if (result != 0) return UNEXPECTED_ERROR; diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff_common.h b/3party/bsdiff-courgette/bsdiff/bsdiff_common.h index a1aeecbf07..4392118b34 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff_common.h +++ b/3party/bsdiff-courgette/bsdiff/bsdiff_common.h @@ -57,7 +57,7 @@ BSDiffStatus MBS_ReadHeader(Source & src, MBSPatchHeader* header) { return OK; } -std::string DebugPrint(BSDiffStatus status) { +inline std::string DebugPrint(BSDiffStatus status) { switch (status) { case OK: return "OK"; case MEM_ERROR: return "MEM_ERROR"; diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff_search.h b/3party/bsdiff-courgette/bsdiff/bsdiff_search.h index 6ca9350163..6902b58e4e 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff_search.h +++ b/3party/bsdiff-courgette/bsdiff/bsdiff_search.h @@ -73,9 +73,9 @@ inline int matchlen(const unsigned char* buf1, template <class T> SearchResult search(const T & sa, const unsigned char* srcbuf, - int srcsize, + size_t srcsize, const unsigned char* keybuf, - int keysize) { + size_t keysize) { int lo = 0; int hi = srcsize; while (hi - lo > 1) { diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff_tests/bsdiff_search_tests.cpp b/3party/bsdiff-courgette/bsdiff/bsdiff_tests/bsdiff_search_tests.cpp index dea1fb6fea..15ef2f285a 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff_tests/bsdiff_search_tests.cpp +++ b/3party/bsdiff-courgette/bsdiff/bsdiff_tests/bsdiff_search_tests.cpp @@ -6,6 +6,7 @@ #include <string> #include <vector> +#include "3party/bsdiff-courgette/bsdiff/bsdiff.h" #include "3party/bsdiff-courgette/bsdiff/bsdiff_search.h" #include "3party/bsdiff-courgette/divsufsort/divsufsort.h" @@ -20,8 +21,10 @@ UNIT_TEST(BSDiffSearchTest_Search) string const str = "the quick brown fox jumps over the lazy dog."; int const size = static_cast<int>(str.size()); auto buf = reinterpret_cast<unsigned char const * const>(str.data()); - vector<divsuf::saidx_t> suffix_array(size + 1); - divsuf::divsufsort_include_empty(buf, suffix_array.data(), size); + + vector<size_t> suffix_array(size + 1); + + bsdiff::BuildSuffixArray(buf, suffix_array.data(), size); // Specific queries. struct @@ -108,8 +111,8 @@ UNIT_TEST(BSDiffSearchTest_SearchExact) unsigned char const * const buf = reinterpret_cast<unsigned char const * const>(testCases[idx].data()); - vector<divsuf::saidx_t> suffix_array(size + 1); - divsuf::divsufsort_include_empty(buf, suffix_array.data(), size); + vector<size_t> suffix_array(size + 1); + bsdiff::BuildSuffixArray(buf, suffix_array.data(), size); // Test exact matches for every non-empty substring. for (int lo = 0; lo < size; ++lo) |