From b1ce24d504d19a88159aa82d84e8da2746ee96b0 Mon Sep 17 00:00:00 2001 From: Kenneth Skovhede Date: Wed, 2 Apr 2014 11:02:06 +0200 Subject: Added hardlink detection on Linux/OSX --- Duplicati/Library/Main/Operation/BackupHandler.cs | 49 ++++++++++++++++++-- .../Library/Main/Operation/TestFilterHandler.cs | 2 +- Duplicati/Library/Main/Options.cs | 54 +++++++++++++++++++--- Duplicati/Library/Main/Strings/Options.Designer.cs | 12 +++++ Duplicati/Library/Main/Strings/Options.resx | 7 +++ Duplicati/Library/Snapshots/ISnapshotService.cs | 7 +++ Duplicati/Library/Snapshots/LinuxSnapshot.cs | 16 +++++++ Duplicati/Library/Snapshots/NoSnapshot.cs | 7 +++ Duplicati/Library/Snapshots/NoSnapshotLinux.cs | 16 ++++++- Duplicati/Library/Snapshots/NoSnapshotWindows.cs | 10 ++++ Duplicati/Library/Snapshots/WindowsSnapshot.cs | 10 ++++ 11 files changed, 178 insertions(+), 12 deletions(-) (limited to 'Duplicati/Library') diff --git a/Duplicati/Library/Main/Operation/BackupHandler.cs b/Duplicati/Library/Main/Operation/BackupHandler.cs index 778b81253..f7126c16b 100644 --- a/Duplicati/Library/Main/Operation/BackupHandler.cs +++ b/Duplicati/Library/Main/Operation/BackupHandler.cs @@ -84,15 +84,19 @@ namespace Duplicati.Library.Main.Operation private FileAttributes m_attributeFilter; private Duplicati.Library.Utility.IFilter m_filter; private Options.SymlinkStrategy m_symlinkPolicy; + private Options.HardlinkStrategy m_hardlinkPolicy; private ILogWriter m_logWriter; + private Dictionary m_hardlinkmap; - public FilterHandler(Snapshots.ISnapshotService snapshot,FileAttributes attributeFilter, Duplicati.Library.Utility.IFilter filter, Options.SymlinkStrategy symlinkPolicy, ILogWriter logWriter) + public FilterHandler(Snapshots.ISnapshotService snapshot,FileAttributes attributeFilter, Duplicati.Library.Utility.IFilter filter, Options.SymlinkStrategy symlinkPolicy, Options.HardlinkStrategy hardlinkPolicy, ILogWriter logWriter) { m_snapshot = snapshot; m_attributeFilter = attributeFilter; m_filter = filter; m_symlinkPolicy = symlinkPolicy; + m_hardlinkPolicy = hardlinkPolicy; m_logWriter = logWriter; + m_hardlinkmap = new Dictionary(); } public bool AttributeFilter(string rootpath, string path, FileAttributes attributes) @@ -111,7 +115,44 @@ namespace Duplicati.Library.Main.Operation if (m_logWriter != null) m_logWriter.AddWarning(string.Format("Failed to process path: {0}", path), ex); return false; - } + } + + if (m_hardlinkPolicy != Options.HardlinkStrategy.All) + { + try + { + var id = m_snapshot.HardlinkTargetID(path); + if (id != null) + { + if (m_hardlinkPolicy == Options.HardlinkStrategy.None) + { + if (m_logWriter != null) + m_logWriter.AddVerboseMessage("Excluding hardlink: {0} ({1})", path, id); + return false; + } + else if (m_hardlinkPolicy == Options.HardlinkStrategy.First) + { + string prevPath; + if (m_hardlinkmap.TryGetValue(id, out prevPath)) + { + if (m_logWriter != null) + m_logWriter.AddVerboseMessage("Excluding hardlink ({1}) for: {0}, previous hardlink: {2}", path, id, prevPath); + return false; + } + else + { + m_hardlinkmap.Add(id, path); + } + } + } + } + catch (Exception ex) + { + if (m_logWriter != null) + m_logWriter.AddWarning(string.Format("Failed to process path: {0}", path), ex); + return false; + } + } if ((m_attributeFilter & attributes) != 0) { @@ -174,7 +215,7 @@ namespace Duplicati.Library.Main.Operation var size = 0L; var followSymlinks = m_options.SymlinkPolicy != Duplicati.Library.Main.Options.SymlinkStrategy.Follow; - foreach(var path in m_snapshot.EnumerateFilesAndFolders(new FilterHandler(m_snapshot, m_attributeFilter, m_filter, m_symlinkPolicy, null).AttributeFilter)) + foreach(var path in m_snapshot.EnumerateFilesAndFolders(new FilterHandler(m_snapshot, m_attributeFilter, m_filter, m_symlinkPolicy, m_options.HardlinkPolicy, null).AttributeFilter)) { var fa = FileAttributes.Normal; try { fa = m_snapshot.GetAttributes(path); } @@ -345,7 +386,7 @@ namespace Duplicati.Library.Main.Operation m_indexvolume.VolumeID = m_database.RegisterRemoteVolume(m_indexvolume.RemoteFilename, RemoteVolumeType.Index, RemoteVolumeState.Temporary, m_transaction); } - var filterhandler = new FilterHandler(m_snapshot, m_attributeFilter, m_filter, m_symlinkPolicy, m_result); + var filterhandler = new FilterHandler(m_snapshot, m_attributeFilter, m_filter, m_symlinkPolicy, m_options.HardlinkPolicy, m_result); using(new Logging.Timer("BackupMainOperation")) { diff --git a/Duplicati/Library/Main/Operation/TestFilterHandler.cs b/Duplicati/Library/Main/Operation/TestFilterHandler.cs index 749a18d1a..7c3b8799b 100644 --- a/Duplicati/Library/Main/Operation/TestFilterHandler.cs +++ b/Duplicati/Library/Main/Operation/TestFilterHandler.cs @@ -37,7 +37,7 @@ namespace Duplicati.Library.Main.Operation using(var snapshot = BackupHandler.GetSnapshot(sources, m_options, m_result)) { - foreach(var path in snapshot.EnumerateFilesAndFolders(new BackupHandler.FilterHandler(snapshot, m_options.FileAttributeFilter, filter, m_options.SymlinkPolicy, m_result).AttributeFilter)) + foreach(var path in snapshot.EnumerateFilesAndFolders(new BackupHandler.FilterHandler(snapshot, m_options.FileAttributeFilter, filter, m_options.SymlinkPolicy, m_options.HardlinkPolicy, m_result).AttributeFilter)) { var fa = FileAttributes.Normal; try diff --git a/Duplicati/Library/Main/Options.cs b/Duplicati/Library/Main/Options.cs index 6b7793e62..8f9964a81 100644 --- a/Duplicati/Library/Main/Options.cs +++ b/Duplicati/Library/Main/Options.cs @@ -111,6 +111,27 @@ namespace Duplicati.Library.Main /// Ignore } + + /// + /// The possible settings for the hardlink strategy + /// + public enum HardlinkStrategy + { + /// + /// Process only the first hardlink + /// + First, + + /// + /// Process all hardlinks + /// + All, + + /// + /// Ignore all hardlinks + /// + None + } /// /// The possible settings for index file usage @@ -218,6 +239,7 @@ namespace Duplicati.Library.Main "vss-use-mapping", "usn-policy", "symlink-policy", + "hardlink-policy", "exclude-files-attributes", "compression-extension-file", "full-remote-verification" @@ -425,7 +447,8 @@ namespace Duplicati.Library.Main new CommandLineArgument("quota-size", CommandLineArgument.ArgumentType.Size, Strings.Options.QuotasizeShort, Strings.Options.QuotasizeLong), - new CommandLineArgument("symlink-policy", CommandLineArgument.ArgumentType.Enumeration, Strings.Options.SymlinkpolicyShort, string.Format(Strings.Options.SymlinkpolicyLong, "store", "ignore", "follow"), "store", null, Enum.GetNames(typeof(SymlinkStrategy))), + new CommandLineArgument("symlink-policy", CommandLineArgument.ArgumentType.Enumeration, Strings.Options.SymlinkpolicyShort, string.Format(Strings.Options.SymlinkpolicyLong, "store", "ignore", "follow"), Enum.GetName(typeof(SymlinkStrategy), SymlinkStrategy.Store), null, Enum.GetNames(typeof(SymlinkStrategy))), + new CommandLineArgument("hardlink-policy", CommandLineArgument.ArgumentType.Enumeration, Strings.Options.HardlinkpolicyShort, string.Format(Strings.Options.HardlinkpolicyLong, "first", "all", "none"), Enum.GetName(typeof(HardlinkStrategy), HardlinkStrategy.All), null, Enum.GetNames(typeof(HardlinkStrategy))), new CommandLineArgument("exclude-files-attributes", CommandLineArgument.ArgumentType.String, Strings.Options.ExcludefilesattributesShort, string.Format(Strings.Options.ExcludefilesattributesLong, string.Join(", ", Enum.GetNames(typeof(System.IO.FileAttributes))))), new CommandLineArgument("backup-name", CommandLineArgument.ArgumentType.String, Strings.Options.BackupnameShort, Strings.Options.BackupnameLong, DefaultBackupName), new CommandLineArgument("compression-extension-file", CommandLineArgument.ArgumentType.Path, Strings.Options.CompressionextensionfileShort, string.Format(Strings.Options.CompressionextensionfileLong, DEFAULT_COMPRESSED_EXTENSION_FILE), DEFAULT_COMPRESSED_EXTENSION_FILE), @@ -1002,23 +1025,42 @@ namespace Duplicati.Library.Main } /// - /// Gets the snapshot strategy to use + /// Gets the symlink strategy to use /// public SymlinkStrategy SymlinkPolicy { get { - string strategy; - if (!m_options.TryGetValue("symlink-policy", out strategy)) - strategy = ""; + string policy; + if (!m_options.TryGetValue("symlink-policy", out policy)) + policy = ""; SymlinkStrategy r; - if (!Enum.TryParse(strategy, true, out r)) + if (!Enum.TryParse(policy, true, out r)) r = SymlinkStrategy.Store; return r; } } + + /// + /// Gets the hardlink strategy to use + /// + public HardlinkStrategy HardlinkPolicy + { + get + { + string policy; + if (!m_options.TryGetValue("hardlink-policy", out policy)) + policy = ""; + + HardlinkStrategy r; + if (!Enum.TryParse(policy, true, out r)) + r = HardlinkStrategy.All; + + return r; + } + } /// /// Gets the snapshot strategy to use /// diff --git a/Duplicati/Library/Main/Strings/Options.Designer.cs b/Duplicati/Library/Main/Strings/Options.Designer.cs index fe874e4ca..bb826c41f 100644 --- a/Duplicati/Library/Main/Strings/Options.Designer.cs +++ b/Duplicati/Library/Main/Strings/Options.Designer.cs @@ -826,6 +826,12 @@ namespace Duplicati.Library.Main.Strings { } } + internal static string HardlinkpolicyLong { + get { + return ResourceManager.GetString("HardlinkpolicyLong", resourceCulture); + } + } + internal static string DisableStreamingLong { get { return ResourceManager.GetString("DisableStreamingLong", resourceCulture); @@ -982,6 +988,12 @@ namespace Duplicati.Library.Main.Strings { } } + internal static string HardlinkpolicyShort { + get { + return ResourceManager.GetString("HardlinkpolicyShort", resourceCulture); + } + } + internal static string ListfoldercontentsShort { get { return ResourceManager.GetString("ListfoldercontentsShort", resourceCulture); diff --git a/Duplicati/Library/Main/Strings/Options.resx b/Duplicati/Library/Main/Strings/Options.resx index 590f0b94c..97eaa0c0f 100644 --- a/Duplicati/Library/Main/Strings/Options.resx +++ b/Duplicati/Library/Main/Strings/Options.resx @@ -390,6 +390,13 @@ Using this option to handle symlinks different. The "{0}" option will simply record a symlink with its name and destination, and a restore will recreate the symlink as a link. Use the option "{1}" to ignore all symlinks and not store any information about them. Previous versions of Duplicati used the setting "{2}", which will cause symlinked files to be included and restore as normal files. + + Hardlink handling + + + Using this option to handle hardlinks (only works on Linux/OSX). The "{0}" option will record a hardlink ID for each hardlink to avoid storing hardlinked paths multiple times. The option "{1}" will ignore hardlink information, and treat each hardlink as a unique path. The option "{2}" will ignore all hardlinks with more than one link. + + Exclude files by attribute diff --git a/Duplicati/Library/Snapshots/ISnapshotService.cs b/Duplicati/Library/Snapshots/ISnapshotService.cs index 9d0ab621d..94f9061f2 100644 --- a/Duplicati/Library/Snapshots/ISnapshotService.cs +++ b/Duplicati/Library/Snapshots/ISnapshotService.cs @@ -82,5 +82,12 @@ namespace Duplicati.Library.Snapshots /// true if this instance is a block device; otherwise, false. /// The file or folder to examine bool IsBlockDevice(string file); + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + string HardlinkTargetID(string path); } } diff --git a/Duplicati/Library/Snapshots/LinuxSnapshot.cs b/Duplicati/Library/Snapshots/LinuxSnapshot.cs index 5590cfb45..ebde7ca7a 100644 --- a/Duplicati/Library/Snapshots/LinuxSnapshot.cs +++ b/Duplicati/Library/Snapshots/LinuxSnapshot.cs @@ -484,6 +484,22 @@ namespace Duplicati.Library.Snapshots return true; } } + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + public string HardlinkTargetID(string path) + { + var local = ConvertToSnapshotPath(FindSnapShotByLocalPath(path), path); + local = local.EndsWith(DIR_SEP) ? local.Substring(0, local.Length - 1) : local; + + if (UnixSupport.File.GetHardlinkCount(local) <= 1) + return null; + + return UnixSupport.File.GetInodeTargetID(local); + } #endregion #region IDisposable Members diff --git a/Duplicati/Library/Snapshots/NoSnapshot.cs b/Duplicati/Library/Snapshots/NoSnapshot.cs index 1500e6cc1..b2408d462 100644 --- a/Duplicati/Library/Snapshots/NoSnapshot.cs +++ b/Duplicati/Library/Snapshots/NoSnapshot.cs @@ -169,6 +169,13 @@ namespace Duplicati.Library.Snapshots /// true if this instance is a block device; otherwise, false. /// The file or folder to examine public abstract bool IsBlockDevice(string file); + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + public abstract string HardlinkTargetID(string path); #endregion } } diff --git a/Duplicati/Library/Snapshots/NoSnapshotLinux.cs b/Duplicati/Library/Snapshots/NoSnapshotLinux.cs index bf2d19592..8e52040c6 100644 --- a/Duplicati/Library/Snapshots/NoSnapshotLinux.cs +++ b/Duplicati/Library/Snapshots/NoSnapshotLinux.cs @@ -85,7 +85,21 @@ namespace Duplicati.Library.Snapshots default: return true; } - } + } + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + public override string HardlinkTargetID(string path) + { + path = path.EndsWith(DIR_SEP) ? path.Substring(0, path.Length - 1) : path; + if (UnixSupport.File.GetHardlinkCount(path) <= 1) + return null; + + return UnixSupport.File.GetInodeTargetID(path); + } } } diff --git a/Duplicati/Library/Snapshots/NoSnapshotWindows.cs b/Duplicati/Library/Snapshots/NoSnapshotWindows.cs index 5bf9a98da..988e89a1b 100644 --- a/Duplicati/Library/Snapshots/NoSnapshotWindows.cs +++ b/Duplicati/Library/Snapshots/NoSnapshotWindows.cs @@ -154,6 +154,16 @@ namespace Duplicati.Library.Snapshots { return false; } + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + public override string HardlinkTargetID(string path) + { + return null; + } } } diff --git a/Duplicati/Library/Snapshots/WindowsSnapshot.cs b/Duplicati/Library/Snapshots/WindowsSnapshot.cs index 35d8be32a..7692f9189 100644 --- a/Duplicati/Library/Snapshots/WindowsSnapshot.cs +++ b/Duplicati/Library/Snapshots/WindowsSnapshot.cs @@ -383,6 +383,16 @@ namespace Duplicati.Library.Snapshots { return false; } + + /// + /// Gets a unique hardlink target ID + /// + /// The hardlink ID + /// The file or folder to examine + public string HardlinkTargetID(string path) + { + return null; + } #endregion #region IDisposable Members -- cgit v1.2.3