// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See License.txt in the project root for license information. // // This file contain implementations details that are subject to change without notice. // Use at your own risk. // using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Text; //************************************************************************* // The code from this point on is a soure-port of the TFS diff algorithm, to be available // in cases where we don't have access to the TFS diff assembly (i.e. outside of Visual Studio). //************************************************************************* namespace Microsoft.TeamFoundation.Diff.Copy { //************************************************************************* /// /// A predicate used by Microsoft.TeamFoundation.Diff.DiffFinder /// that allows callers to stop differencing prematurely. /// /// The current index in the original sequence being differenced. /// The original sequence being differenced. /// The length of the longest match so far. /// true if the algorithm should continue processing, false to stop the algorithm. /// /// When false is returned, the algorithm stops searching for matches and uses /// the information it has computed so far to create the Microsoft.TeamFoundation.Diff.IDiffChange[] array /// that will be returned. /// //************************************************************************* public delegate bool ContinueDifferencePredicate(int originalIndex, IList originalSequence, int longestMatchSoFar); //************************************************************************* /// /// An enumeration of the possible change types for a difference operation. /// //************************************************************************* public enum DiffChangeType { //********************************************************************* /// /// Content was inserted into the modified sequence. /// //********************************************************************* Insert, //********************************************************************* /// /// Content was deleted from the original sequence. /// //********************************************************************* Delete, //********************************************************************* /// /// Content from the original sequence has changed in the modified /// sequence. /// //********************************************************************* Change } //************************************************************************* /// /// Represents information about a specific difference between two sequences. /// //************************************************************************* public interface IDiffChange { //********************************************************************* /// /// The type of difference. /// //********************************************************************* DiffChangeType ChangeType { get; } //********************************************************************* /// /// The position of the first element in the original sequence which /// this change affects. /// //********************************************************************* int OriginalStart { get; } //********************************************************************* /// /// The number of elements from the original sequence which were /// affected (deleted). /// //********************************************************************* int OriginalLength { get; } //********************************************************************* /// /// The position of the last element in the original sequence which /// this change affects. /// //********************************************************************* int OriginalEnd { get; } //********************************************************************* /// /// The position of the first element in the modified sequence which /// this change affects. /// //********************************************************************* int ModifiedStart { get; } //********************************************************************* /// /// The number of elements from the modified sequence which were /// affected (added). /// //********************************************************************* int ModifiedLength { get; } //********************************************************************* /// /// The position of the last element in the modified sequence which /// this change affects. /// //********************************************************************* int ModifiedEnd { get; } /// /// This methods combines two IDiffChange objects into one /// /// The diff change to add /// An IDiffChange that represnets this + diffChange IDiffChange Add(IDiffChange diffChange); } //************************************************************************* /// /// Represents information about a specific difference between two sequences. /// //************************************************************************* internal class DiffChange : IDiffChange { //********************************************************************* /// /// Constructs a new DiffChange with the given sequence information /// and content. /// /// The start position of the difference /// in the original sequence. /// The number of elements of the difference /// from the original sequence. /// The start position of the difference /// in the modified sequence. /// The number of elements of the difference /// from the modified sequence. //********************************************************************* internal DiffChange(int originalStart, int originalLength, int modifiedStart, int modifiedLength) { Debug.Assert(originalLength > 0 || modifiedLength > 0, "originalLength and modifiedLength cannot both be <= 0"); m_originalStart = originalStart; m_originalLength = originalLength; m_modifiedStart = modifiedStart; m_modifiedLength = modifiedLength; UpdateChangeType(); } //********************************************************************* /// /// Determines the change type from the ranges and updates the value. /// //********************************************************************* private void UpdateChangeType() { // Figure out what change type this is if (m_originalLength > 0) { if (m_modifiedLength > 0) { m_changeType = DiffChangeType.Change; } else { m_changeType = DiffChangeType.Delete; } } else if (m_modifiedLength > 0) { m_changeType = DiffChangeType.Insert; } m_updateChangeType = false; } //********************************************************************* /// /// The type of difference. /// //********************************************************************* public DiffChangeType ChangeType { get { if (m_updateChangeType) { UpdateChangeType(); } return m_changeType; } } private DiffChangeType m_changeType; //********************************************************************* /// /// The position of the first element in the original sequence which /// this change affects. /// //********************************************************************* public int OriginalStart { get { return m_originalStart; } set { m_originalStart = value; m_updateChangeType = true; } } private int m_originalStart; //********************************************************************* /// /// The number of elements from the original sequence which were /// affected. /// //********************************************************************* public int OriginalLength { get { return m_originalLength; } set { m_originalLength = value; m_updateChangeType = true; } } private int m_originalLength; //********************************************************************* /// /// The position of the last element in the original sequence which /// this change affects. /// //********************************************************************* public int OriginalEnd { get { return OriginalStart + OriginalLength; } } //********************************************************************* /// /// The position of the first element in the modified sequence which /// this change affects. /// //********************************************************************* public int ModifiedStart { get { return m_modifiedStart; } set { m_modifiedStart = value; m_updateChangeType = true; } } private int m_modifiedStart; //********************************************************************* /// /// The number of elements from the modified sequence which were /// affected (added). /// //********************************************************************* public int ModifiedLength { get { return m_modifiedLength; } set { m_modifiedLength = value; m_updateChangeType = true; } } private int m_modifiedLength; //********************************************************************* /// /// The position of the last element in the modified sequence which /// this change affects. /// //********************************************************************* public int ModifiedEnd { get { return ModifiedStart + ModifiedLength; } } /// /// This methods combines two DiffChange objects into one /// /// The diff change to add /// A IDiffChange object that represnets this + diffChange public IDiffChange Add(IDiffChange diffChange) { //If the diff change is null then just return this if (diffChange == null) { return this; } int originalStart = Math.Min(this.OriginalStart, diffChange.OriginalStart); int originalEnd = Math.Max(this.OriginalEnd, diffChange.OriginalEnd); int modifiedStart = Math.Min(this.ModifiedStart, diffChange.ModifiedStart); int modifiedEnd = Math.Max(this.ModifiedEnd, diffChange.ModifiedEnd); return new DiffChange(originalStart, originalEnd - originalStart, modifiedStart, modifiedEnd - modifiedStart); } // Member variables private bool m_updateChangeType; } //************************************************************************* /// /// The types of End of Line terminators. /// //************************************************************************* public enum EndOfLineTerminator { None, // EndOfFile LineFeed, // \n (UNIX) CarriageReturn, // \r (Mac) [SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "LineFeed")] [SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "LineFeed")] CarriageReturnLineFeed, // \r\n (DOS) LineSeparator, // \u2028 (Unicode character) ParagraphSeparator, // \u2029 (Unicode character) NextLine // \u0085 (Unicode character) } //************************************************************************* /// /// Utility class which tokenizes the given stream into string tokens. Each /// token represents a line from the file as delimited by common EOL sequences. /// (\n, \r\n, \r, \u2028, \u2029, \u85) /// //************************************************************************* [SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "Tokenizer")] public class DiffLineTokenizer { //********************************************************************* /// /// Constructs a new DiffLineTokenizer for the given stream and encoding. /// /// The stream to tokenize. /// The character encoding of the bytes /// in the stream. If null, this will be automatically detected. //********************************************************************* public DiffLineTokenizer(Stream stream, Encoding encoding) { if (encoding == null) { m_streamReader = new StreamReader(stream, true); } else { m_streamReader = new StreamReader(stream, encoding, true); } // Bug 70126: Diff calculated code churn incorrectly for file // We don't want to parse a Unicode EOL character if its not Unicode int codePage = m_streamReader.CurrentEncoding.CodePage; if (codePage == Encoding.Unicode.CodePage || codePage == Encoding.BigEndianUnicode.CodePage || codePage == Encoding.UTF32.CodePage || codePage == Encoding.UTF7.CodePage || codePage == Encoding.UTF8.CodePage) { m_isUnicodeEncoding = true; } // StreamReader has a perf optimization if we ask for at least as many chars // as the internal byte buffer could produce if it were fully filled. // Their internal byte buffer is 1024 bytes. m_bufferSize = m_streamReader.CurrentEncoding.GetMaxCharCount(1024); m_charBuffer = new char[m_bufferSize]; m_stringBuilder = new StringBuilder(80); } //********************************************************************* /// /// Fills up the internal buffer with characters from the stream. /// Returns the number of characters in the buffer and resets the /// current buffer position. /// Returns 0 when EOF. /// //********************************************************************* private int FillBuffer() { m_currBufferPos = 0; m_numCharsInBuffer = m_streamReader.Read(m_charBuffer, 0, m_bufferSize); return m_numCharsInBuffer; } //********************************************************************* /// /// Gets the next line token as a string from the stream. /// /// The next token. Returns null when the end of stream has /// been reached. //********************************************************************* public string NextLineToken(out EndOfLineTerminator endOfLine) { // The structure of this code is borrowed from StreamReader.ReadLine() // Except that it has been extended out to support Unicode EOL characters. m_stringBuilder.Length = 0; endOfLine = EndOfLineTerminator.None; if (m_currBufferPos == m_numCharsInBuffer && FillBuffer() == 0) { // If the buffer was empty, and we couldn't refill it, we're done. return null; } do { int index = m_currBufferPos; do { char ch = m_charBuffer[index]; if (ch == '\r' || ch == '\n' || (m_isUnicodeEncoding && (ch == LineSeparator || ch == ParagraphSeparator || ch == NextLine))) { switch ((int)ch) { case '\r': endOfLine = EndOfLineTerminator.CarriageReturn; break; case '\n': endOfLine = EndOfLineTerminator.LineFeed; break; case LineSeparator: endOfLine = EndOfLineTerminator.LineSeparator; break; case ParagraphSeparator: endOfLine = EndOfLineTerminator.ParagraphSeparator; break; case NextLine: endOfLine = EndOfLineTerminator.NextLine; break; } String line; if (m_stringBuilder.Length > 0) { m_stringBuilder.Append(m_charBuffer, m_currBufferPos, index - m_currBufferPos); line = m_stringBuilder.ToString(); } else { line = new String(m_charBuffer, m_currBufferPos, index - m_currBufferPos); } m_currBufferPos = index + 1; if (ch == '\r' && (m_currBufferPos < m_numCharsInBuffer || FillBuffer() > 0)) { if (m_charBuffer[m_currBufferPos] == '\n') { endOfLine = EndOfLineTerminator.CarriageReturnLineFeed; m_currBufferPos++; } } return line; } index++; } while (index < m_numCharsInBuffer); m_stringBuilder.Append(m_charBuffer, m_currBufferPos, m_numCharsInBuffer - m_currBufferPos); } while (FillBuffer() > 0); return m_stringBuilder.ToString(); } // Misc Unicode EOL characters. private const int LineSeparator = 0x2028; private const int ParagraphSeparator = 0x2029; private const int NextLine = 0x85; // Member variables private bool m_isUnicodeEncoding; private int m_bufferSize; private int m_numCharsInBuffer; private int m_currBufferPos; private char[] m_charBuffer; private StringBuilder m_stringBuilder; private StreamReader m_streamReader; } //************************************************************************* /// /// A utility class which helps to create the set of DiffChanges from /// a difference operation. This class accepts original DiffElements and /// modified DiffElements that are involved in a particular change. The /// MarktNextChange() method can be called to mark the seration between /// distinct changes. At the end, the Changes property can be called to retrieve /// the constructed changes. /// //************************************************************************* internal sealed class DiffChangeHelper : IDisposable { //********************************************************************* /// /// Constructs a new DiffChangeHelper for the given DiffSequences. /// /// The original sequence. /// The modified sequence. //********************************************************************* public DiffChangeHelper() { m_changes = new List(); m_originalStart = Int32.MaxValue; m_modifiedStart = Int32.MaxValue; } //********************************************************************* /// /// Disposes of the resources used by this helper class. /// //********************************************************************* public void Dispose() { if (m_changes != null) { m_changes = null; } GC.SuppressFinalize(this); } //********************************************************************* /// /// Marks the beginning of the next change in the set of differences. /// //********************************************************************* public void MarkNextChange() { // Only add to the list if there is something to add if (m_originalCount > 0 || m_modifiedCount > 0) { // Add the new change to our list m_changes.Add(new DiffChange(m_originalStart, m_originalCount, m_modifiedStart, m_modifiedCount)); } // Reset for the next change m_originalCount = 0; m_modifiedCount = 0; m_originalStart = Int32.MaxValue; m_modifiedStart = Int32.MaxValue; } //********************************************************************* /// /// Adds the original element at the given position to the elements /// affected by the current change. The modified index gives context /// to the change position with respect to the original sequence. /// /// The index of the original element to add. /// The index of the modified element that /// provides corresponding position in the modified sequence. //********************************************************************* public void AddOriginalElement(int originalIndex, int modifiedIndex) { // The 'true' start index is the smallest of the ones we've seen m_originalStart = Math.Min(m_originalStart, originalIndex); m_modifiedStart = Math.Min(m_modifiedStart, modifiedIndex); m_originalCount++; } //********************************************************************* /// /// Adds the modified element at the given position to the elements /// affected by the current change. The original index gives context /// to the change position with respect to the modified sequence. /// /// The index of the original element that /// provides corresponding position in the original sequence. /// The index of the modified element to add. //********************************************************************* public void AddModifiedElement(int originalIndex, int modifiedIndex) { // The 'true' start index is the smallest of the ones we've seen m_originalStart = Math.Min(m_originalStart, originalIndex); m_modifiedStart = Math.Min(m_modifiedStart, modifiedIndex); m_modifiedCount++; } //********************************************************************* /// /// Retrieves all of the changes marked by the class. /// //********************************************************************* public IDiffChange[] Changes { get { if (m_originalCount > 0 || m_modifiedCount > 0) { // Finish up on whatever is left MarkNextChange(); } return m_changes.ToArray(); } } //********************************************************************* /// /// Retrieves all of the changes marked by the class in the reverse order /// //********************************************************************* public IDiffChange[] ReverseChanges { get { if (m_originalCount > 0 || m_modifiedCount > 0) { // Finish up on whatever is left MarkNextChange(); } m_changes.Reverse(); return m_changes.ToArray(); } } // Member variables private int m_originalCount; private int m_modifiedCount; private List m_changes; private int m_originalStart; private int m_modifiedStart; } //************************************************************************* /// /// A base for classes which compute the differences between two input sequences. /// //************************************************************************* #pragma warning disable CA1063 // Implement IDisposable Correctly public abstract class DiffFinder : IDisposable { //************************************************************************* /// /// The original sequence /// //************************************************************************* protected IList OriginalSequence { get { return m_original; } } //************************************************************************* /// /// The modified sequence /// //************************************************************************* protected IList ModifiedSequence { get { return m_modified; } } //************************************************************************* /// /// The element comparer /// //************************************************************************* protected IEqualityComparer ElementComparer { get { return m_elementComparer; } } //************************************************************************* /// /// Disposes resources used by this DiffFinder /// //************************************************************************* public virtual void Dispose() #pragma warning restore CA1063 // Implement IDisposable Correctly { if (m_originalIds != null) { m_originalIds = null; } if (m_modifiedIds != null) { m_modifiedIds = null; } GC.SuppressFinalize(this); } //********************************************************************* /// /// Returns true if the specified original and modified elements are equal. /// /// The index of the original element /// The index of the modified element /// True if the specified elements are equal //********************************************************************* protected bool ElementsAreEqual(int originalIndex, int modifiedIndex) { return ElementsAreEqual(originalIndex, true, modifiedIndex, false); } //********************************************************************* /// /// Returns true if the two specified original elements are equal. /// /// The index of the first original element /// The index of the second original element /// True if the specified elements are equal //********************************************************************* protected bool OriginalElementsAreEqual(int firstIndex, int secondIndex) { return ElementsAreEqual(firstIndex, true, secondIndex, true); } //********************************************************************* /// /// Returns true if the two specified modified elements are equal. /// /// The index of the first modified element /// The index of the second modified element /// True if the specified elements are equal //********************************************************************* protected bool ModifiedElementsAreEqual(int firstIndex, int secondIndex) { return ElementsAreEqual(firstIndex, false, secondIndex, false); } //********************************************************************* /// /// Returns true if the specified elements are equal. /// /// The index of the first element /// True if the first element is an original /// element, false if modified element /// The index of the second element /// True if the second element is an original /// element, false if modified element /// True if the specified elements are equal //********************************************************************* private bool ElementsAreEqual(int firstIndex, bool firstIsOriginal, int secondIndex, bool secondIsOriginal) { int firstId = firstIsOriginal ? m_originalIds[firstIndex] : m_modifiedIds[firstIndex]; int secondId = secondIsOriginal ? m_originalIds[secondIndex] : m_modifiedIds[secondIndex]; if (firstId != 0 && secondId != 0) { return firstId == secondId; } else { T firstElement = firstIsOriginal ? OriginalSequence[firstIndex] : ModifiedSequence[firstIndex]; T secondElement = secondIsOriginal ? OriginalSequence[secondIndex] : ModifiedSequence[secondIndex]; return ElementComparer.Equals(firstElement, secondElement); } } //************************************************************************* /// /// This method hashes element groups within the specified range /// and assigns unique identifiers to identical elements. /// /// This greatly speeds up element comparison as we may compare integers /// instead of having to look at element content. /// //************************************************************************* private void ComputeUniqueIdentifiers(int originalStart, int originalEnd, int modifiedStart, int modifiedEnd) { Debug.Assert(originalStart >= 0 && originalEnd < OriginalSequence.Count, "Original range is invalid"); Debug.Assert(modifiedStart >= 0 && modifiedEnd < ModifiedSequence.Count, "Modified range is invalid"); // Create a new hash table for unique elements from the original // sequence. Dictionary hashTable = new Dictionary(OriginalSequence.Count + ModifiedSequence.Count, ElementComparer); int currentUniqueId = 1; // Fill up the hash table for unique elements for (int i = originalStart; i <= originalEnd; i++) { T originalElement = OriginalSequence[i]; if (!hashTable.TryGetValue(originalElement, out m_originalIds[i])) { // No entry in the hashtable so this is a new unique element. // Assign the element a new unique identifier and add it to the // hash table m_originalIds[i] = currentUniqueId++; hashTable.Add(originalElement, m_originalIds[i]); } } // Now match up modified elements for (int i = modifiedStart; i <= modifiedEnd; i++) { T modifiedElement = ModifiedSequence[i]; if (!hashTable.TryGetValue(modifiedElement, out m_modifiedIds[i])) { m_modifiedIds[i] = currentUniqueId++; hashTable.Add(modifiedElement, m_modifiedIds[i]); } } } //************************************************************************* /// /// Computes the differences between the given original and modified sequences. /// /// The original sequence. /// The modified sequence. /// The diff element comparer. /// The set of differences between the sequences. //************************************************************************* public IDiffChange[] Diff(IList original, IList modified, IEqualityComparer elementComparer) { return Diff(original, modified, elementComparer, null); } //************************************************************************* /// /// Computes the differences between the given original and modified sequences. /// /// The original sequence. /// The modified sequence. /// The diff element comparer. /// The set of differences between the sequences. //************************************************************************* public IDiffChange[] Diff(IList original, IList modified, IEqualityComparer elementComparer, ContinueDifferencePredicate predicate) { Debug.Assert(original != null, "original is null"); Debug.Assert(modified != null, "modified is null"); Debug.Assert(elementComparer != null, "elementComparer is null"); m_original = original; m_modified = modified; m_elementComparer = elementComparer; m_predicate = predicate; m_originalIds = new int[OriginalSequence.Count]; m_modifiedIds = new int[ModifiedSequence.Count]; int originalStart = 0; int originalEnd = OriginalSequence.Count - 1; int modifiedStart = 0; int modifiedEnd = ModifiedSequence.Count - 1; // Find the start of the differences while (originalStart <= originalEnd && modifiedStart <= modifiedEnd && ElementsAreEqual(originalStart, modifiedStart)) { originalStart++; modifiedStart++; } // Find the end of the differences while (originalEnd >= originalStart && modifiedEnd >= modifiedStart && ElementsAreEqual(originalEnd, modifiedEnd)) { originalEnd--; modifiedEnd--; } // In the very special case where one of the ranges is negative // Either the sequences are identical, or there is exactly one insertion // or there is exactly one deletion. We have no need to compute the diffs. if (originalStart > originalEnd || modifiedStart > modifiedEnd) { IDiffChange[] changes; if (modifiedStart <= modifiedEnd) { Debug.Assert(originalStart == originalEnd + 1, "originalStart should only be one more than originalEnd"); // All insertions changes = new IDiffChange[1]; changes[0] = new DiffChange(originalStart, 0, modifiedStart, modifiedEnd - modifiedStart + 1); } else if (originalStart <= originalEnd) { Debug.Assert(modifiedStart == modifiedEnd + 1, "modifiedStart should only be one more than modifiedEnd"); // All deletions changes = new IDiffChange[1]; changes[0] = new DiffChange(originalStart, originalEnd - originalStart + 1, modifiedStart, 0); } else { Debug.Assert(originalStart == originalEnd + 1, "originalStart should only be one more than originalEnd"); Debug.Assert(modifiedStart == modifiedEnd + 1, "modifiedStart should only be one more than modifiedEnd"); // Identical sequences - No differences changes = Array.Empty(); } return changes; } // Now that we have our bounds, calculate unique ids for all // IDiffElements in the bounded range. That way, we speed up // element comparisons during the diff computation. ComputeUniqueIdentifiers(originalStart, originalEnd, modifiedStart, modifiedEnd); // Compute the diffences on the bounded range return ComputeDiff(originalStart, originalEnd, modifiedStart, modifiedEnd); } //************************************************************************* /// /// Computes the differences between the original and modified input /// sequences on the bounded range. /// /// An array of the differences between the two input /// sequences. //************************************************************************* protected abstract IDiffChange[] ComputeDiff(int originalStart, int originalEnd, int modifiedStart, int modifiedEnd); //************************************************************************* /// /// Gets the LCS Diff implementation. /// //************************************************************************* [SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "Lcs")] #pragma warning disable CA1000 // Do not declare static members on generic types public static DiffFinder LcsDiff #pragma warning restore CA1000 // Do not declare static members on generic types { get { return new LcsDiff(); } } protected ContinueDifferencePredicate ContinueDifferencePredicate { get { return m_predicate; } } // Member variables private IList m_original; private IList m_modified; private int[] m_originalIds; private int[] m_modifiedIds; private IEqualityComparer m_elementComparer; //Early termination predicate private ContinueDifferencePredicate m_predicate; } }