From 9a29218ca6d26c9320a393d0c990778e7eb6981f Mon Sep 17 00:00:00 2001 From: Aron Griffis Date: Wed, 22 Aug 2018 20:18:48 -0400 Subject: walkdir: add option to stay on same file system This commit includes a new method, `same_file_system`, which when enabled, will cause walkdir to only descend into directories that are on the same file system as the root path. Closes #8, Closes #107 --- Cargo.toml | 2 +- examples/walkdir.rs | 100 +++++++++++++++++++++++++++------------------------- src/lib.rs | 86 ++++++++++++++++++++++++++++++++++++++++---- src/tests.rs | 37 +++++++++++++++++++ src/windows.rs | 34 ++++++++++++++++++ 5 files changed, 203 insertions(+), 56 deletions(-) create mode 100644 src/windows.rs diff --git a/Cargo.toml b/Cargo.toml index c2ab187..f4acb33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ same-file = "1" [target.'cfg(windows)'.dependencies.winapi] version = "0.3" -features = ["std", "winnt"] +features = ["std", "fileapi", "winbase", "winnt"] [dev-dependencies] docopt = "1" diff --git a/examples/walkdir.rs b/examples/walkdir.rs index 5d5fa06..658b317 100644 --- a/examples/walkdir.rs +++ b/examples/walkdir.rs @@ -11,24 +11,25 @@ use walkdir::WalkDir; const USAGE: &'static str = " Usage: - walkdir [options] [] + walkdir [options] [ ...] Options: -h, --help - -L, --follow-links Follow symlinks. - --min-depth NUM Minimum depth. - --max-depth NUM Maximum depth. - -n, --fd-max NUM Maximum open file descriptors. [default: 32] - --tree Show output as a tree. - --sort Sort the output. - -q, --ignore-errors Ignore errors. - -d, --depth Show directory's contents before the directory itself. + -L, --follow-links Follow symlinks. + --min-depth NUM Minimum depth. + --max-depth NUM Maximum depth. + -n, --fd-max NUM Maximum open file descriptors. [default: 32] + --tree Show output as a tree. + --sort Sort the output. + -q, --ignore-errors Ignore errors. + -d, --depth Show directory's contents before the directory itself. + -x, --same-file-system Stay on the same file system. "; #[derive(Debug, Deserialize)] #[allow(dead_code)] struct Args { - arg_dir: Option, + arg_dir: Option>, flag_follow_links: bool, flag_min_depth: Option, flag_max_depth: Option, @@ -37,6 +38,7 @@ struct Args { flag_ignore_errors: bool, flag_sort: bool, flag_depth: bool, + flag_same_file_system: bool, } macro_rules! wout { ($($tt:tt)*) => { {writeln!($($tt)*)}.unwrap() } } @@ -47,47 +49,49 @@ fn main() { .unwrap_or_else(|e| e.exit()); let mind = args.flag_min_depth.unwrap_or(0); let maxd = args.flag_max_depth.unwrap_or(::std::usize::MAX); - let dir = args.arg_dir.clone().unwrap_or(".".to_owned()); - let mut walkdir = WalkDir::new(dir) - .max_open(args.flag_fd_max) - .follow_links(args.flag_follow_links) - .min_depth(mind) - .max_depth(maxd); - if args.flag_sort { - walkdir = walkdir.sort_by(|a,b| a.file_name().cmp(b.file_name())); - } - if args.flag_depth { - walkdir = walkdir.contents_first(true) - } - let it = walkdir.into_iter(); - let mut out = io::BufWriter::new(io::stdout()); - let mut eout = io::stderr(); - if args.flag_tree { - for dent in it { - match dent { - Err(err) => { - out.flush().unwrap(); - wout!(eout, "ERROR: {}", err); - } - Ok(dent) => { - let name = dent.file_name().to_string_lossy(); - wout!(out, "{}{}", indent(dent.depth()), name); - } - } + for dir in args.arg_dir.unwrap_or(vec![".".to_string()]) { + let mut walkdir = WalkDir::new(dir) + .max_open(args.flag_fd_max) + .follow_links(args.flag_follow_links) + .min_depth(mind) + .max_depth(maxd) + .same_file_system(args.flag_same_file_system); + if args.flag_sort { + walkdir = walkdir.sort_by(|a,b| a.file_name().cmp(b.file_name())); } - } else if args.flag_ignore_errors { - for dent in it.filter_map(|e| e.ok()) { - wout!(out, "{}", dent.path().display()); + if args.flag_depth { + walkdir = walkdir.contents_first(true) } - } else { - for dent in it { - match dent { - Err(err) => { - out.flush().unwrap(); - wout!(eout, "ERROR: {}", err); + let it = walkdir.into_iter(); + let mut out = io::BufWriter::new(io::stdout()); + let mut eout = io::stderr(); + if args.flag_tree { + for dent in it { + match dent { + Err(err) => { + out.flush().unwrap(); + wout!(eout, "ERROR: {}", err); + } + Ok(dent) => { + let name = dent.file_name().to_string_lossy(); + wout!(out, "{}{}", indent(dent.depth()), name); + } } - Ok(dent) => { - wout!(out, "{}", dent.path().display()); + } + } else if args.flag_ignore_errors { + for dent in it.filter_map(|e| e.ok()) { + wout!(out, "{}", dent.path().display()); + } + } else { + for dent in it { + match dent { + Err(err) => { + out.flush().unwrap(); + wout!(eout, "ERROR: {}", err); + } + Ok(dent) => { + wout!(out, "{}", dent.path().display()); + } } } } diff --git a/src/lib.rs b/src/lib.rs index 6ce95a3..d60c2d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -132,6 +132,8 @@ pub use unix::DirEntryExt; mod tests; #[cfg(unix)] mod unix; +#[cfg(windows)] +mod windows; /// Like try, but for iterators that return [`Option>`]. /// @@ -247,6 +249,7 @@ struct WalkDirOptions { FnMut(&DirEntry,&DirEntry) -> Ordering + Send + Sync + 'static >>, contents_first: bool, + same_file_system: bool, } impl fmt::Debug for WalkDirOptions { @@ -264,6 +267,7 @@ impl fmt::Debug for WalkDirOptions { .field("max_depth", &self.max_depth) .field("sorter", &sorter_str) .field("contents_first", &self.contents_first) + .field("same_file_system", &self.same_file_system) .finish() } } @@ -283,6 +287,7 @@ impl WalkDir { max_depth: ::std::usize::MAX, sorter: None, contents_first: false, + same_file_system: false, }, root: root.as_ref().to_path_buf(), } @@ -448,6 +453,19 @@ impl WalkDir { self.opts.contents_first = yes; self } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(mut self, yes: bool) -> Self { + self.opts.same_file_system = yes; + self + } } impl IntoIterator for WalkDir { @@ -463,6 +481,7 @@ impl IntoIterator for WalkDir { oldest_opened: 0, depth: 0, deferred_dirs: vec![], + root_device: None, } } } @@ -512,6 +531,13 @@ pub struct IntoIter { /// yielded after their contents has been fully yielded. This is only /// used when `contents_first` is enabled. deferred_dirs: Vec, + /// The device of the root file path when the first call to `next` was + /// made. + /// + /// If the `same_file_system` option isn't enabled, then this is always + /// `None`. Conversely, if it is enabled, this is always `Some(...)` after + /// handling the root path. + root_device: Option, } /// An ancestor is an item in the directory tree traversed by walkdir, and is @@ -650,6 +676,11 @@ impl Iterator for IntoIter { /// an error value. The error will be wrapped in an Option::Some. fn next(&mut self) -> Option> { if let Some(start) = self.start.take() { + if self.opts.same_file_system { + let result = device_num(&start) + .map_err(|e| Error::from_path(0, start.clone(), e)); + self.root_device = Some(itry!(result)); + } let dent = itry!(DirEntry::from_path(0, start, false)); if let Some(result) = self.handle_entry(dent) { return Some(result); @@ -668,7 +699,11 @@ impl Iterator for IntoIter { } // Unwrap is safe here because we've verified above that // `self.stack_list` is not empty - match self.stack_list.last_mut().expect("bug in walkdir").next() { + let next = self.stack_list + .last_mut() + .expect("BUG: stack should be non-empty") + .next(); + match next { None => self.pop(), Some(Err(err)) => return Some(Err(err)), Some(Ok(dent)) => { @@ -802,7 +837,13 @@ impl IntoIter { } let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); if is_normal_dir { - itry!(self.push(&dent)); + if self.opts.same_file_system && dent.depth > 0 { + if itry!(self.is_same_file_system(&dent)) { + itry!(self.push(&dent)); + } + } else { + itry!(self.push(&dent)); + } } if is_normal_dir && self.opts.contents_first { self.deferred_dirs.push(dent); @@ -820,7 +861,7 @@ impl IntoIter { // Unwrap is safe here because we've guaranteed that // `self.deferred_dirs.len()` can never be less than 1 let deferred: DirEntry = self.deferred_dirs.pop() - .expect("bug in walkdir"); + .expect("BUG: deferred_dirs should be non-empty"); if !self.skippable() { return Some(deferred); } @@ -874,7 +915,7 @@ impl IntoIter { } fn pop(&mut self) { - self.stack_list.pop().expect("cannot pop from empty stack"); + self.stack_list.pop().expect("BUG: cannot pop from empty stack"); if self.opts.follow_links { self.stack_path.pop().expect("BUG: list/path stacks out of sync"); } @@ -920,6 +961,14 @@ impl IntoIter { Ok(()) } + fn is_same_file_system(&mut self, dent: &DirEntry) -> Result { + let dent_device = device_num(&dent.path) + .map_err(|err| Error::from_entry(dent, err))?; + Ok(self.root_device + .map(|d| d == dent_device) + .expect("BUG: called is_same_file_system without root device")) + } + fn skippable(&self) -> bool { self.depth < self.opts.min_depth || self.depth > self.opts.max_depth } @@ -1584,10 +1633,12 @@ impl fmt::Display for Error { } impl From for io::Error { - /// Convert the [`Error`] to an [`io::Error`], preserving the original [`Error`] as the ["inner - /// error"]. Note that this also makes the display of the error include the context. + /// Convert the [`Error`] to an [`io::Error`], preserving the original + /// [`Error`] as the ["inner error"]. Note that this also makes the display + /// of the error include the context. /// - /// This is different from [`into_io_error`] which returns the original [`io::Error`]. + /// This is different from [`into_io_error`] which returns the original + /// [`io::Error`]. /// /// [`Error`]: struct.Error.html /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html @@ -1605,3 +1656,24 @@ impl From for io::Error { io::Error::new(kind, walk_err) } } + +#[cfg(unix)] +fn device_num>(path: P)-> std::io::Result { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + + #[cfg(windows)] +fn device_num>(path: P) -> std::io::Result { + windows::windows_file_handle_info(path) + .map(|info| info.dwVolumeSerialNumber as u64) +} + +#[cfg(not(any(unix, windows)))] +fn device_num>(_: P)-> std::io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} diff --git a/src/tests.rs b/src/tests.rs index 8847323..b1df193 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -828,3 +828,40 @@ fn walk_dir_send_sync_traits() { assert_send::>(); assert_sync::>(); } + +// We cannot mount different volumes for the sake of the test, but +// on Linux systems we can assume that /sys is a mounted volume. +#[test] +#[cfg(target_os = "linux")] +fn walk_dir_stay_on_file_system() { + // If for some reason /sys doesn't exist or isn't a directory, just skip + // this test. + if !Path::new("/sys").is_dir() { + return; + } + + let actual = td("same_file", vec![ + td("a", vec![tld("/sys", "alink")]), + ]); + let unfollowed = td("same_file", vec![ + td("a", vec![tld("/sys", "alink")]), + ]); + let (_tmp, got) = dir_setup_with(&actual, |wd| wd); + assert_tree_eq!(unfollowed, got); + + // Create a symlink to sys and enable following symlinks. If the + // same_file_system option doesn't work, then this probably will hit a + // permission error. Otherwise, it should just skip over the symlink + // completely. + let actual = td("same_file", vec![ + td("a", vec![tld("/sys", "alink")]), + ]); + let followed = td("same_file", vec![ + td("a", vec![td("alink", vec![])]), + ]); + let (_tmp, got) = dir_setup_with(&actual, |wd| { + wd.follow_links(true).same_file_system(true) + }); + assert_tree_eq!(followed, got); +} + diff --git a/src/windows.rs b/src/windows.rs new file mode 100644 index 0000000..85b726e --- /dev/null +++ b/src/windows.rs @@ -0,0 +1,34 @@ +use std::fs::OpenOptions; +use std::io::Error; +use std::mem; +use std::os::windows::fs::OpenOptionsExt; +use std::os::windows::io::AsRawHandle; +use std::path::Path; + +use winapi::um::fileapi::{ + BY_HANDLE_FILE_INFORMATION, + GetFileInformationByHandle, +}; +use winapi::um::winbase::FILE_FLAG_BACKUP_SEMANTICS; + +/// Return metadata for the file at the given path. +pub fn windows_file_handle_info>( + path: P, +) -> Result { + // The FILE_FLAG_BACKUP_SEMANTICS flag is needed to open directories + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365258(v=vs.85).aspx + let file = OpenOptions::new() + .create(false) + .write(false) + .read(true) + .custom_flags(FILE_FLAG_BACKUP_SEMANTICS) + .open(path)?; + + unsafe { + let mut info = mem::zeroed(); + if GetFileInformationByHandle(file.as_raw_handle(), &mut info) == 0 { + return Err(Error::last_os_error()); + } + Ok(info) + } +} -- cgit v1.2.3