diff options
author | Alex Zolotarev <deathbaba@gmail.com> | 2010-12-05 19:24:16 +0300 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-22 22:33:57 +0300 |
commit | d6e12b7ce4bcbf0ccd1c07eb25de143422913c34 (patch) | |
tree | a7e910c330ce4da9b4f2d8be76067adece2561c4 /coding/diff.hpp |
One Month In Minsk. Made in Belarus.
Diffstat (limited to 'coding/diff.hpp')
-rw-r--r-- | coding/diff.hpp | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/coding/diff.hpp b/coding/diff.hpp new file mode 100644 index 0000000000..e852a62add --- /dev/null +++ b/coding/diff.hpp @@ -0,0 +1,210 @@ +#pragma once +#include "diff_patch_common.hpp" +#include "../base/assert.hpp" +#include "../base/base.hpp" +#include "../std/algorithm.hpp" +#include "../std/unordered_map.hpp" +#include "../std/vector.hpp" + +namespace diff +{ + +template <class PatchWriterT, typename SizeT = uint64_t> class PatchCoder +{ +public: + typedef SizeT size_type; + + explicit PatchCoder(PatchWriterT & patchWriter) + : m_LastOperation(COPY), m_LastOpCode(0), m_PatchWriter(patchWriter) + { + } + + void Delete(size_type n) + { + if (n != 0) + Op(DELETE, n); + } + + void Copy(size_type n) + { + if (n != 0) + Op(COPY, n); + } + + template <typename TIter> void Insert(TIter it, size_type n) + { + if (n != 0) + { + Op(INSERT, n); + m_PatchWriter.WriteData(it, n); + } + } + + void Finalize() + { + WriteLasOp(); + } + +private: + void Op(Operation op, size_type n) + { + if (m_LastOperation == op) + { + m_LastOpCode += (n << 1); + return; + } + WriteLasOp(); + m_LastOpCode = (n << 1) | ((m_LastOperation + 1) % 3 == op ? 0 : 1); + m_LastOperation = op; + } + + void WriteLasOp() + { + if (m_LastOpCode != 0) + m_PatchWriter.WriteOperation(m_LastOpCode); + else + CHECK_EQUAL(m_LastOperation, COPY, ()); // "We were just initialized." + } + + Operation m_LastOperation; + size_type m_LastOpCode; + PatchWriterT & m_PatchWriter; +}; + +// Find minimal patch, with no more than maxPatchSize edited values, that transforms A into B. +// Returns the length of the minimal patch, or -1 if no such patch found. +// Intermediate information is saved into tmpSink and can be used later to restore +// the resulting patch. +template < + typename TSignedWord, // Signed word, capable of storing position in text. + class TSrcVector, // Source data (A). + class TDstVector, // Destination data (B). + class TTmpFileSink // Sink to store temporary information. + > +TSignedWord DiffMyersSimple(TSrcVector const & A, + TDstVector const & B, + TSignedWord maxPatchSize, + TTmpFileSink & tmpSink) +{ + ASSERT_GREATER(maxPatchSize, 0, ()); + vector<TSignedWord> V(2 * maxPatchSize + 1); + for (TSignedWord d = 0; d <= maxPatchSize; ++d) + { + for (TSignedWord k = -d; k <= d; k += 2) + { + TSignedWord x; + if (k == -d || (k != d && V[maxPatchSize + k - 1] < V[maxPatchSize + k + 1])) + x = V[maxPatchSize + k + 1]; + else + x = V[maxPatchSize + k - 1] + 1; + while (x < static_cast<TSignedWord>(A.size()) && + x - k < static_cast<TSignedWord>(B.size()) && + A[x] == B[x - k]) + ++x; + V[maxPatchSize + k] = x; + if (x == static_cast<TSignedWord>(A.size()) && x - k == static_cast<TSignedWord>(B.size())) + return d; + } + tmpSink.Write(&V[maxPatchSize + d], (2 * d + 1) * sizeof(TSignedWord)); + } + return -1; +} + +// Differ that just replaces old with new, with the only optimization of skipping equal values +// at the beginning and at the end. +class SimpleReplaceDiffer +{ +public: + template <typename SrcIterT, typename DstIterT, class PatchCoderT> + void Diff(SrcIterT srcBeg, SrcIterT srcEnd, + DstIterT dstBeg, DstIterT dstEnd, + PatchCoderT & patchCoder) + { + typename PatchCoderT::size_type begCopy = 0; + for (; srcBeg != srcEnd && dstBeg != dstEnd && *srcBeg == *dstBeg; ++srcBeg, ++dstBeg) + ++begCopy; + patchCoder.Copy(begCopy); + typename PatchCoderT::size_type endCopy = 0; + for (; srcBeg != srcEnd && dstBeg != dstEnd && *(srcEnd-1) == *(dstEnd-1); --srcEnd, --dstEnd) + ++endCopy; + patchCoder.Delete(srcEnd - srcBeg); + patchCoder.Insert(dstBeg, dstEnd - dstBeg); + patchCoder.Copy(endCopy); + } +}; + +// Given FineGrainedDiff and rolling Hasher, DiffWithRollingHash splits the source sequence +// into chunks of size m_BlockSize, finds equal chunks in the destination sequence, using rolling +// hash to find good candidates, writes info about equal chunks into patchCoder and for everything +// between equal chunks, calls FineGrainedDiff::Diff(). +template <class FineGrainedDiffT, + class HasherT, + class HashPosMultiMapT = unordered_multimap<typename HasherT::hash_type, uint64_t> > +class RollingHashDiffer +{ +public: + explicit RollingHashDiffer(size_t blockSize, + FineGrainedDiffT const & fineGrainedDiff = FineGrainedDiffT()) + : m_FineGrainedDiff(fineGrainedDiff), m_BlockSize(blockSize) {} + + template <typename SrcIterT, typename DstIterT, class PatchCoderT> + void Diff(SrcIterT const srcBeg, SrcIterT const srcEnd, + DstIterT const dstBeg, DstIterT const dstEnd, + PatchCoderT & patchCoder) + { + if (srcEnd - srcBeg < m_BlockSize || dstEnd - dstBeg < m_BlockSize) + { + m_FineGrainedDiff.Diff(srcBeg, srcEnd, dstBeg, dstEnd, patchCoder); + return; + } + HasherT hasher; + HashPosMultiMapT srcHashes; + for (SrcIterT src = srcBeg; srcEnd - src >= m_BlockSize; src += m_BlockSize) + srcHashes.insert(HashPosMultiMapValue(hasher.Init(src, m_BlockSize), src - srcBeg)); + SrcIterT srcLastDiff = srcBeg; + DstIterT dst = dstBeg, dstNext = dstBeg + m_BlockSize, dstLastDiff = dstBeg; + hash_type h = hasher.Init(dst, m_BlockSize); + while (dstNext != dstEnd) + { + pair<HashPosMultiMapIterator, HashPosMultiMapIterator> iters = srcHashes.equal_range(h); + if (iters.first != iters.second) + { + pos_type const srcLastDiffPos = srcLastDiff - srcBeg; + HashPosMultiMapIterator it = srcHashes.end(); + for (HashPosMultiMapIterator i = iters.first; i != iters.second; ++i) + if (i->second >= srcLastDiffPos && + (it == srcHashes.end() || i->second < it->second)) + it = i; + if (it != srcHashes.end() && + equal(srcBeg + it->second, srcBeg + it->second + m_BlockSize, dst)) + { + pos_type srcBlockEqualPos = it->second; + m_FineGrainedDiff.Diff(srcLastDiff, srcBeg + srcBlockEqualPos, + dstLastDiff, dst, patchCoder); + patchCoder.Copy(m_BlockSize); + srcLastDiff = srcBeg + srcBlockEqualPos + m_BlockSize; + dst = dstLastDiff = dstNext; + if (dstEnd - dstNext < m_BlockSize) + break; + dstNext = dst + m_BlockSize; + h = hasher.Init(dst, m_BlockSize); + continue; + } + } + h = hasher.Scroll(*(dst++), *(dstNext++)); + } + if (srcLastDiff != srcEnd || dstLastDiff != dstEnd) + m_FineGrainedDiff.Diff(srcLastDiff, srcEnd, dstLastDiff, dstEnd, patchCoder); + } +private: + typedef typename HasherT::hash_type hash_type; + typedef typename HashPosMultiMapT::value_type::second_type pos_type; + typedef typename HashPosMultiMapT::const_iterator HashPosMultiMapIterator; + typedef typename HashPosMultiMapT::value_type HashPosMultiMapValue; + + FineGrainedDiffT m_FineGrainedDiff; + HasherT m_Hasher; + size_t m_BlockSize; +}; + +} |