#region Disclaimer / License // Copyright (C) 2015, The Duplicati Team // http://www.duplicati.com, info@duplicati.com // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // #endregion using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading; using Duplicati.Library.Interface; using Duplicati.Library.Common.IO; using Duplicati.Library.Utility; namespace Duplicati.Library.Snapshots { public class UsnJournalService { private readonly ISnapshotService m_snapshot; private readonly IEnumerable m_sources; private readonly Dictionary m_volumeDataDict; private readonly CancellationToken m_token; /// /// Constructor. /// /// Sources to filter /// /// Emit filter /// /// /// Journal-data of previous fileset /// public UsnJournalService(IEnumerable sources, ISnapshotService snapshot, IFilter emitFilter, FileAttributes fileAttributeFilter, long skipFilesLargerThan, IEnumerable prevJournalData, CancellationToken token) { m_sources = sources; m_snapshot = snapshot; m_volumeDataDict = Initialize(emitFilter, fileAttributeFilter, skipFilesLargerThan, prevJournalData); m_token = token; } public IEnumerable VolumeDataList => m_volumeDataDict.Select(e => e.Value); /// /// Initialize list of modified files / folder for each volume /// /// /// /// /// /// private Dictionary Initialize(IFilter emitFilter, FileAttributes fileAttributeFilter, long skipFilesLargerThan, IEnumerable prevJournalData) { if (prevJournalData == null) throw new UsnJournalSoftFailureException(Strings.USNHelper.PreviousBackupNoInfo); var result = new Dictionary(); // get hash identifying current source filter / sources configuration var configHash = Utility.Utility.ByteArrayAsHexString(MD5HashHelper.GetHash(new string[] { emitFilter == null ? string.Empty : emitFilter.ToString(), string.Join("; ", m_sources), fileAttributeFilter.ToString(), skipFilesLargerThan.ToString() })); // create lookup for journal data var journalDataDict = prevJournalData.ToDictionary(data => data.Volume); // iterate over volumes foreach (var sourcesPerVolume in SortByVolume(m_sources)) { if (m_token.IsCancellationRequested) break; var volume = sourcesPerVolume.Key; var volumeSources = sourcesPerVolume.Value; var volumeData = new VolumeData { Volume = volume, JournalData = null }; result[volume] = volumeData; try { // prepare journal data entry to store with current fileset var journal = new USNJournal(volume); var nextData = new USNJournalDataEntry { Volume = volume, JournalId = journal.JournalId, NextUsn = journal.NextUsn, ConfigHash = configHash }; // add new data to result set volumeData.JournalData = nextData; // only use change journal if: // - journal ID hasn't changed // - nextUsn isn't zero (we use this as magic value to force a rescan) // - the configuration (sources or filters) hasn't changed if (!journalDataDict.TryGetValue(volume, out var prevData)) throw new UsnJournalSoftFailureException(Strings.USNHelper.PreviousBackupNoInfo); if (prevData.JournalId != nextData.JournalId) throw new UsnJournalSoftFailureException(Strings.USNHelper.JournalIdChanged); if (prevData.NextUsn == 0) throw new UsnJournalSoftFailureException(Strings.USNHelper.NextUsnZero); if (prevData.ConfigHash != nextData.ConfigHash) throw new UsnJournalSoftFailureException(Strings.USNHelper.ConfigHashChanged); var changedFiles = new HashSet(Utility.Utility.ClientFilenameStringComparer); var changedFolders = new HashSet(Utility.Utility.ClientFilenameStringComparer); // obtain changed files and folders, per volume foreach (var source in volumeSources) { if (m_token.IsCancellationRequested) break; foreach (var entry in journal.GetChangedFileSystemEntries(source, prevData.NextUsn)) { if (m_token.IsCancellationRequested) break; if (entry.Item2.HasFlag(USNJournal.EntryType.File)) { changedFiles.Add(entry.Item1); } else { changedFolders.Add(Util.AppendDirSeparator(entry.Item1)); } } } // At this point we have: // - a list of folders (changedFolders) that were possibly modified // - a list of files (changedFiles) that were possibly modified // // With this, we need still need to do the following: // // 1. Simplify the folder list, such that it only contains the parent-most entries // (eg. { "C:\A\B\", "C:\A\B\C\", "C:\A\B\D\E\" } => { "C:\A\B\" } volumeData.Folders = Utility.Utility.SimplifyFolderList(changedFolders).ToList(); // 2. Our list of files may contain entries inside one of the simplified folders (from step 1., above). // Since that folder is going to be fully scanned, those files can be removed. // Note: it would be wrong to use the result from step 2. as the folder list! The entries removed // between 1. and 2. are *excluded* folders, and files below them are to be *excluded*, too. volumeData.Files = new HashSet(Utility.Utility.GetFilesNotInFolders(changedFiles, volumeData.Folders)); // Record success for volume volumeData.IsFullScan = false; } catch (Exception e) { // full scan is required this time (eg. due to missing journal entries) volumeData.Exception = e; volumeData.IsFullScan = true; volumeData.Folders = new List(); volumeData.Files = new HashSet(); // use original sources foreach (var path in volumeSources) { var isFolder = path.EndsWith(Util.DirectorySeparatorString, StringComparison.Ordinal); if (isFolder) { volumeData.Folders.Add(path); } else { volumeData.Files.Add(path); } } } } return result; } /// /// Filters sources, returning sub-set having been modified since last /// change, as specified by journalData. /// /// Filter callback to exclude filtered items /// Filtered sources public IEnumerable GetModifiedSources(Utility.Utility.EnumerationFilterDelegate filter) { // iterate over volumes foreach (var volumeData in m_volumeDataDict) { // prepare cache for includes (value = true) and excludes (value = false, will be populated // on-demand) var cache = new Dictionary(); foreach (var source in m_sources) { if (m_token.IsCancellationRequested) { break; } cache[source] = true; } // Check the simplified folders, and their parent folders against the exclusion filter. // This is needed because the filter may exclude "C:\A\", but this won't match the more // specific "C:\A\B\" in our list, even though it's meant to be excluded. // The reason why the filter doesn't exclude it is because during a regular (non-USN) full scan, // FilterHandler.EnumerateFilesAndFolders() works top-down, and won't even enumerate child // folders. // The sources are needed to stop evaluating parent folders above the specified source folders if (volumeData.Value.Folders != null) { foreach (var folder in FilterExcludedFolders(volumeData.Value.Folders, filter, cache).Where(m_snapshot.DirectoryExists)) { if (m_token.IsCancellationRequested) { break; } yield return folder; } } // The simplified file list also needs to be checked against the exclusion filter, as it // may contain entries excluded due to attributes, but also because they are below excluded // folders, which themselves aren't in the folder list from step 1. // Note that the simplified file list may contain entries that have been deleted! They need to // be kept in the list (unless excluded by the filter) in order for the backup handler to record their // deletion. if (volumeData.Value.Files != null) { foreach (var files in FilterExcludedFiles(volumeData.Value.Files, filter, cache).Where(m_snapshot.FileExists)) { if (m_token.IsCancellationRequested) { break; } yield return files; } } } } /// /// Filter supplied files, removing any files which itself, or one /// of its parent folders, is excluded by the filter. /// /// Files to filter /// Exclusion filter /// Cache of included and excluded files / folders /// /// Filtered files private IEnumerable FilterExcludedFiles(IEnumerable files, Utility.Utility.EnumerationFilterDelegate filter, IDictionary cache, Utility.Utility.ReportAccessError errorCallback = null) { var result = new List(); foreach (var file in files) { if (m_token.IsCancellationRequested) { break; } var attr = m_snapshot.FileExists(file) ? m_snapshot.GetAttributes(file) : FileAttributes.Normal; try { if (!filter(file, file, attr)) continue; if (!IsFolderOrAncestorsExcluded(Utility.Utility.GetParent(file, true), filter, cache)) { result.Add(file); } } catch (System.Threading.ThreadAbortException) { throw; } catch (Exception ex) { errorCallback?.Invoke(file, file, ex); filter(file, file, attr | Utility.Utility.ATTRIBUTE_ERROR); } } return result; } /// /// Filter supplied folders, removing any folder which itself, or one /// of its ancestors, is excluded by the filter. /// /// Folder to filter /// Exclusion filter /// Cache of excluded folders (optional) /// /// Filtered folders private IEnumerable FilterExcludedFolders(IEnumerable folders, Utility.Utility.EnumerationFilterDelegate filter, IDictionary cache, Utility.Utility.ReportAccessError errorCallback = null) { var result = new List(); foreach (var folder in folders) { if (m_token.IsCancellationRequested) { break; } try { if (!IsFolderOrAncestorsExcluded(folder, filter, cache)) { result.Add(folder); } } catch (System.Threading.ThreadAbortException) { throw; } catch (Exception ex) { errorCallback?.Invoke(folder, folder, ex); filter(folder, folder, FileAttributes.Directory | Utility.Utility.ATTRIBUTE_ERROR); } } return result; } /// /// Tests if specified folder, or any of its ancestors, is excluded by the filter /// /// Folder to test /// Filter /// Cache of excluded folders (optional) /// True if excluded, false otherwise private bool IsFolderOrAncestorsExcluded(string folder, Utility.Utility.EnumerationFilterDelegate filter, IDictionary cache) { List parents = null; while (folder != null) { if (m_token.IsCancellationRequested) { break; } // first check cache if (cache.TryGetValue(folder, out var include)) { if (include) return false; break; // hit! } // remember folder for cache if (parents == null) { parents = new List(); // create on-demand } parents.Add(folder); var attr = m_snapshot.DirectoryExists(folder) ? m_snapshot.GetAttributes(folder) : FileAttributes.Directory; if (!filter(folder, folder, attr)) break; // excluded folder = Utility.Utility.GetParent(folder, true); } if (folder != null) { // update cache parents?.ForEach(p => cache[p] = false); } return folder != null; } /// /// Sort sources by root volume /// /// List of sources /// Dictionary of volumes, with list of sources as values private static Dictionary> SortByVolume(IEnumerable sources) { var sourcesByVolume = new Dictionary>(); foreach (var path in sources) { // get NTFS volume root var volumeRoot = USNJournal.GetVolumeRootFromPath(path); if (!sourcesByVolume.TryGetValue(volumeRoot, out var list)) { list = new List(); sourcesByVolume.Add(volumeRoot, list); } list.Add(path); } return sourcesByVolume; } /// /// Returns true if path was enumerated by journal service /// /// /// public bool IsPathEnumerated(string path) { // get NTFS volume root var volumeRoot = USNJournal.GetVolumeRootFromPath(path); // get volume data if (!m_volumeDataDict.TryGetValue(volumeRoot, out var volumeData)) return false; if (volumeData.IsFullScan) return true; // do not append from previous set, already scanned if (volumeData.Files.Contains(path)) return true; // do not append from previous set, already scanned foreach (var folder in volumeData.Folders) { if (m_token.IsCancellationRequested) { break; } if (path.Equals(folder, Utility.Utility.ClientFilenameStringComparison)) return true; // do not append from previous set, already scanned if (Utility.Utility.IsPathBelowFolder(path, folder)) return true; // do not append from previous set, already scanned } return false; // append from previous set } } /// /// Filtered sources /// public class VolumeData { /// /// Volume /// public string Volume { get; set; } /// /// Set of potentially modified files /// public HashSet Files { get; internal set; } /// /// Set of folders that are potentially modified, or whose children /// are potentially modified /// public List Folders { get; internal set; } /// /// Journal data to use for next backup /// public USNJournalDataEntry JournalData { get; internal set; } /// /// If true, a full scan for this volume was required /// public bool IsFullScan { get; internal set; } /// /// Optional exception message for volume /// public Exception Exception { get; internal set; } } }