Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/walkdir.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2017-10-21 06:50:13 +0300
committerAndrew Gallant <jamslam@gmail.com>2017-10-21 15:10:18 +0300
commit3d4c9f7eca53cd6bd56306d7a1b2c4478cac8acb (patch)
tree0557337320c53b8ba3700d27f543a63b2b03cbd4
parent5ce6b1071682784e4f8d01a739e4455cf3089a7b (diff)
symlinks: optimize check loop on Windows
Broadly speaking, this commit is an attempt to fix this issue: https://github.com/BurntSushi/ripgrep/issues/633 It was reported that symlink checking was taking a long amount of time, and that one possible way to fix this was to reduce number of times a file descriptor is opened. In this commit, we amortize opening file descriptors by keeping a file handle open for each ancestor in the directory tree. We also open a handle for the candidate file path at most once, instead of once every iteration. Note that we only perform this optimization on Windows, where opening a file handle seems inordinately expensive. In particular, this now causes us to potentially open more file descriptors than the limit set by the user, which only happens when following symbolic links. We document this behavior.
-rw-r--r--src/lib.rs75
1 files changed, 67 insertions, 8 deletions
diff --git a/src/lib.rs b/src/lib.rs
index e5db08b..c43734d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -126,7 +126,7 @@ use std::path::{Path, PathBuf};
use std::result;
use std::vec;
-use same_file::is_same_file;
+use same_file::Handle;
#[cfg(unix)]
pub use unix::DirEntryExt;
@@ -356,6 +356,12 @@ impl WalkDir {
///
/// Note that this value does not impact the number of system calls made by
/// an exhausted iterator.
+ ///
+ /// # Platform behavior
+ ///
+ /// On Windows, if `follow_links` is enabled, then this limit is not
+ /// respected. In particular, the maximum number of file descriptors opened
+ /// is proportional to the depth of the directory tree traversed.
pub fn max_open(mut self, mut n: usize) -> Self {
if n == 0 {
n = 1;
@@ -496,7 +502,7 @@ pub struct IntoIter {
/// cases this stack is empty.
///
/// [`follow_links`]: struct.WalkDir.html#method.follow_links
- stack_path: Vec<PathBuf>,
+ stack_path: Vec<Ancestor>,
/// An index into `stack_list` that points to the oldest open directory
/// handle. If the maximum fd limit is reached and a new directory needs to
/// be read, the handle at this index is closed before the new directory is
@@ -511,6 +517,52 @@ pub struct IntoIter {
deferred_dirs: Vec<DirEntry>,
}
+/// An ancestor is an item in the directory tree traversed by walkdir, and is
+/// used to check for loops in the tree when traversing symlinks.
+#[derive(Debug)]
+struct Ancestor {
+ /// The path of this ancestor.
+ path: PathBuf,
+ /// An open file to this ancesor. This is only used on Windows where
+ /// opening a file handle appears to be quite expensive, so we choose to
+ /// cache it. This comes at the cost of not respecting the file descriptor
+ /// limit set by the user.
+ #[cfg(windows)]
+ handle: Handle,
+}
+
+impl Ancestor {
+ /// Create a new ancestor from the given directory path.
+ #[cfg(windows)]
+ fn new(dent: &DirEntry) -> io::Result<Ancestor> {
+ let handle = Handle::from_path(dent.path())?;
+ Ok(Ancestor {
+ path: dent.path().to_path_buf(),
+ handle: handle,
+ })
+ }
+
+ /// Create a new ancestor from the given directory path.
+ #[cfg(not(windows))]
+ fn new(dent: &DirEntry) -> io::Result<Ancestor> {
+ Ok(Ancestor { path: dent.path().to_path_buf() })
+ }
+
+ /// Returns true if and only if the given open file handle corresponds to
+ /// the same directory as this ancestor.
+ #[cfg(windows)]
+ fn is_same(&self, child: &Handle) -> io::Result<bool> {
+ Ok(child == &self.handle)
+ }
+
+ /// Returns true if and only if the given open file handle corresponds to
+ /// the same directory as this ancestor.
+ #[cfg(not(windows))]
+ fn is_same(&self, child: &Handle) -> io::Result<bool> {
+ Ok(child == &Handle::from_path(&self.path)?)
+ }
+}
+
/// A sequence of unconsumed directory entries.
///
/// This represents the opened or closed state of a directory handle. When
@@ -744,7 +796,7 @@ impl IntoIter {
dent = itry!(self.follow(dent));
}
if dent.file_type().is_dir() {
- self.push(&dent);
+ itry!(self.push(&dent));
}
if dent.file_type().is_dir() && self.opts.contents_first {
self.deferred_dirs.push(dent);
@@ -771,7 +823,7 @@ impl IntoIter {
None
}
- fn push(&mut self, dent: &DirEntry) {
+ fn push(&mut self, dent: &DirEntry) -> Result<()> {
// Make room for another open file descriptor if we've hit the max.
let free = self.stack_list
.len()
@@ -805,8 +857,12 @@ impl IntoIter {
}
self.stack_list.push(list);
if self.opts.follow_links {
- self.stack_path.push(dent.path().to_path_buf());
+ let ancestor = Ancestor::new(&dent).map_err(|err| {
+ Error::from_io(self.depth, err)
+ })?;
+ self.stack_path.push(ancestor);
}
+ Ok(())
}
fn pop(&mut self) {
@@ -832,15 +888,18 @@ impl IntoIter {
}
fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
+ let hchild = Handle::from_path(&child).map_err(|err| {
+ Error::from_io(self.depth, err)
+ })?;
for ancestor in self.stack_path.iter().rev() {
- let same = is_same_file(ancestor, &child).map_err(|err| {
+ let is_same = ancestor.is_same(&hchild).map_err(|err| {
Error::from_io(self.depth, err)
})?;
- if same {
+ if is_same {
return Err(Error {
depth: self.depth,
inner: ErrorInner::Loop {
- ancestor: ancestor.to_path_buf(),
+ ancestor: ancestor.path.to_path_buf(),
child: child.as_ref().to_path_buf(),
},
});