diff options
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 1119 |
1 files changed, 11 insertions, 1108 deletions
@@ -1,1127 +1,30 @@ /*! -Crate `walkdir` provides an efficient and cross platform implementation -of recursive directory traversal. Several options are exposed to control -iteration, such as whether to follow symbolic links (default off), limit the -maximum number of simultaneous open file descriptors and the ability to -efficiently skip descending into directories. - -To use this crate, add `walkdir` as a dependency to your project's -`Cargo.toml`: - -```toml -[dependencies] -walkdir = "2" -``` - -# From the top - -The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values -yielded by the iterator. Finally, the [`Error`] type is a small wrapper around -[`std::io::Error`] with additional information, such as if a loop was detected -while following symbolic links (not enabled by default). - -[`WalkDir`]: struct.WalkDir.html -[`DirEntry`]: struct.DirEntry.html -[`Error`]: struct.Error.html -[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html - -# Example - -The following code recursively iterates over the directory given and prints -the path for each entry: - -```no_run -use walkdir::WalkDir; -# use walkdir::Error; - -# fn try_main() -> Result<(), Error> { -for entry in WalkDir::new("foo") { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -Or, if you'd like to iterate over all entries and ignore any errors that -may arise, use [`filter_map`]. (e.g., This code below will silently skip -directories that the owner of the running process does not have permission to -access.) - -```no_run -use walkdir::WalkDir; - -for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { - println!("{}", entry.path().display()); -} -``` - -[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map - -# Example: follow symbolic links - -The same code as above, except [`follow_links`] is enabled: - -```no_run -use walkdir::WalkDir; -# use walkdir::Error; - -# fn try_main() -> Result<(), Error> { -for entry in WalkDir::new("foo").follow_links(true) { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -[`follow_links`]: struct.WalkDir.html#method.follow_links - -# Example: skip hidden files and directories on unix - -This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files -and directories efficiently (i.e. without recursing into hidden directories): - -```no_run -use walkdir::{DirEntry, WalkDir}; -# use walkdir::Error; - -fn is_hidden(entry: &DirEntry) -> bool { - entry.file_name() - .to_str() - .map(|s| s.starts_with(".")) - .unwrap_or(false) -} - -# fn try_main() -> Result<(), Error> { -let walker = WalkDir::new("foo").into_iter(); -for entry in walker.filter_entry(|e| !is_hidden(e)) { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -[`filter_entry`]: struct.IntoIter.html#method.filter_entry +TODO */ -#![deny(missing_docs)] +// #![deny(missing_docs)] #![allow(unknown_lints)] #![allow(warnings)] #[cfg(test)] doc_comment::doctest!("../README.md"); -use std::cmp::{min, Ordering}; -use std::fmt; -use std::fs::{self, ReadDir}; -use std::io; -use std::path::{Path, PathBuf}; -use std::result; -use std::vec; - -use same_file::Handle; - pub use crate::dent::DirEntry; #[cfg(unix)] pub use crate::dent::DirEntryExt; -pub use crate::error::Error; +pub use crate::error::{Error, Result}; +pub use crate::walk::{FilterEntry, IntoIter, WalkDir}; +#[cfg(not(windows))] +pub use cursor::*; + +#[cfg(not(windows))] +mod cursor; mod dent; +mod dir; mod error; pub mod os; #[cfg(test)] mod tests; mod util; - -/// Like try, but for iterators that return [`Option<Result<_, _>>`]. -/// -/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html -macro_rules! itry { - ($e:expr) => { - match $e { - Ok(v) => v, - Err(err) => return Some(Err(From::from(err))), - } - }; -} - -/// A result type for walkdir operations. -/// -/// Note that this result type embeds the error type in this crate. This -/// is only useful if you care about the additional information provided by -/// the error (such as the path associated with the error or whether a loop -/// was dectected). If you want things to Just Work, then you can use -/// [`io::Result`] instead since the error type in this package will -/// automatically convert to an [`io::Result`] when using the [`try!`] macro. -/// -/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html -/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html -pub type Result<T> = ::std::result::Result<T, Error>; - -/// A builder to create an iterator for recursively walking a directory. -/// -/// Results are returned in depth first fashion, with directories yielded -/// before their contents. If [`contents_first`] is true, contents are yielded -/// before their directories. The order is unspecified but if [`sort_by`] is -/// given, directory entries are sorted according to this function. Directory -/// entries `.` and `..` are always omitted. -/// -/// If an error occurs at any point during iteration, then it is returned in -/// place of its corresponding directory entry and iteration continues as -/// normal. If an error occurs while opening a directory for reading, then it -/// is not descended into (but the error is still yielded by the iterator). -/// Iteration may be stopped at any time. When the iterator is destroyed, all -/// resources associated with it are freed. -/// -/// [`contents_first`]: struct.WalkDir.html#method.contents_first -/// [`sort_by`]: struct.WalkDir.html#method.sort_by -/// -/// # Usage -/// -/// This type implements [`IntoIterator`] so that it may be used as the subject -/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want -/// to use iterator adapters such as [`filter_entry`]. -/// -/// Idiomatic use of this type should use method chaining to set desired -/// options. For example, this only shows entries with a depth of `1`, `2` or -/// `3` (relative to `foo`): -/// -/// ```no_run -/// use walkdir::WalkDir; -/// # use walkdir::Error; -/// -/// # fn try_main() -> Result<(), Error> { -/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { -/// println!("{}", entry?.path().display()); -/// } -/// # Ok(()) -/// # } -/// ``` -/// -/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html -/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter -/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry -/// -/// Note that the iterator by default includes the top-most directory. Since -/// this is the only directory yielded with depth `0`, it is easy to ignore it -/// with the [`min_depth`] setting: -/// -/// ```no_run -/// use walkdir::WalkDir; -/// # use walkdir::Error; -/// -/// # fn try_main() -> Result<(), Error> { -/// for entry in WalkDir::new("foo").min_depth(1) { -/// println!("{}", entry?.path().display()); -/// } -/// # Ok(()) -/// # } -/// ``` -/// -/// [`min_depth`]: struct.WalkDir.html#method.min_depth -/// -/// This will only return descendents of the `foo` directory and not `foo` -/// itself. -/// -/// # Loops -/// -/// This iterator (like most/all recursive directory iterators) assumes that -/// no loops can be made with *hard* links on your file system. In particular, -/// this would require creating a hard link to a directory such that it creates -/// a loop. On most platforms, this operation is illegal. -/// -/// Note that when following symbolic/soft links, loops are detected and an -/// error is reported. -#[derive(Debug)] -pub struct WalkDir { - opts: WalkDirOptions, - root: PathBuf, -} - -struct WalkDirOptions { - follow_links: bool, - max_open: usize, - min_depth: usize, - max_depth: usize, - sorter: Option< - Box< - dyn FnMut(&DirEntry, &DirEntry) -> Ordering - + Send - + Sync - + 'static, - >, - >, - contents_first: bool, - same_file_system: bool, -} - -impl fmt::Debug for WalkDirOptions { - fn fmt( - &self, - f: &mut fmt::Formatter<'_>, - ) -> result::Result<(), fmt::Error> { - let sorter_str = if self.sorter.is_some() { - // FnMut isn't `Debug` - "Some(...)" - } else { - "None" - }; - f.debug_struct("WalkDirOptions") - .field("follow_links", &self.follow_links) - .field("max_open", &self.max_open) - .field("min_depth", &self.min_depth) - .field("max_depth", &self.max_depth) - .field("sorter", &sorter_str) - .field("contents_first", &self.contents_first) - .field("same_file_system", &self.same_file_system) - .finish() - } -} - -impl WalkDir { - /// Create a builder for a recursive directory iterator starting at the - /// file path `root`. If `root` is a directory, then it is the first item - /// yielded by the iterator. If `root` is a file, then it is the first - /// and only item yielded by the iterator. If `root` is a symlink, then it - /// is always followed for the purposes of directory traversal. (A root - /// `DirEntry` still obeys its documentation with respect to symlinks and - /// the `follow_links` setting.) - pub fn new<P: AsRef<Path>>(root: P) -> Self { - WalkDir { - opts: WalkDirOptions { - follow_links: false, - max_open: 10, - min_depth: 0, - max_depth: ::std::usize::MAX, - sorter: None, - contents_first: false, - same_file_system: false, - }, - root: root.as_ref().to_path_buf(), - } - } - - /// Set the minimum depth of entries yielded by the iterator. - /// - /// The smallest depth is `0` and always corresponds to the path given - /// to the `new` function on this type. Its direct descendents have depth - /// `1`, and their descendents have depth `2`, and so on. - pub fn min_depth(mut self, depth: usize) -> Self { - self.opts.min_depth = depth; - if self.opts.min_depth > self.opts.max_depth { - self.opts.min_depth = self.opts.max_depth; - } - self - } - - /// Set the maximum depth of entries yield by the iterator. - /// - /// The smallest depth is `0` and always corresponds to the path given - /// to the `new` function on this type. Its direct descendents have depth - /// `1`, and their descendents have depth `2`, and so on. - /// - /// Note that this will not simply filter the entries of the iterator, but - /// it will actually avoid descending into directories when the depth is - /// exceeded. - pub fn max_depth(mut self, depth: usize) -> Self { - self.opts.max_depth = depth; - if self.opts.max_depth < self.opts.min_depth { - self.opts.max_depth = self.opts.min_depth; - } - self - } - - /// Follow symbolic links. By default, this is disabled. - /// - /// When `yes` is `true`, symbolic links are followed as if they were - /// normal directories and files. If a symbolic link is broken or is - /// involved in a loop, an error is yielded. - /// - /// When enabled, the yielded [`DirEntry`] values represent the target of - /// the link while the path corresponds to the link. See the [`DirEntry`] - /// type for more details. - /// - /// [`DirEntry`]: struct.DirEntry.html - pub fn follow_links(mut self, yes: bool) -> Self { - self.opts.follow_links = yes; - self - } - - /// Set the maximum number of simultaneously open file descriptors used - /// by the iterator. - /// - /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set - /// to `1` automatically. If this is not set, then it defaults to some - /// reasonably low number. - /// - /// This setting has no impact on the results yielded by the iterator - /// (even when `n` is `1`). Instead, this setting represents a trade off - /// between scarce resources (file descriptors) and memory. Namely, when - /// the maximum number of file descriptors is reached and a new directory - /// needs to be opened to continue iteration, then a previous directory - /// handle is closed and has its unyielded entries stored in memory. In - /// practice, this is a satisfying trade off because it scales with respect - /// to the *depth* of your file tree. Therefore, low values (even `1`) are - /// acceptable. - /// - /// Note that this value does not impact the number of system calls made by - /// an exhausted iterator. - /// - /// # Platform behavior - /// - /// On Windows, if `follow_links` is enabled, then this limit is not - /// respected. In particular, the maximum number of file descriptors opened - /// is proportional to the depth of the directory tree traversed. - pub fn max_open(mut self, mut n: usize) -> Self { - if n == 0 { - n = 1; - } - self.opts.max_open = n; - self - } - - /// Set a function for sorting directory entries. - /// - /// If a compare function is set, the resulting iterator will return all - /// paths in sorted order. The compare function will be called to compare - /// entries from the same directory. - /// - /// ```rust,no-run - /// use std::cmp; - /// use std::ffi::OsString; - /// use walkdir::WalkDir; - /// - /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); - /// ``` - pub fn sort_by<F>(mut self, cmp: F) -> Self - where - F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, - { - self.opts.sorter = Some(Box::new(cmp)); - self - } - - /// Yield a directory's contents before the directory itself. By default, - /// this is disabled. - /// - /// When `yes` is `false` (as is the default), the directory is yielded - /// before its contents are read. This is useful when, e.g. you want to - /// skip processing of some directories. - /// - /// When `yes` is `true`, the iterator yields the contents of a directory - /// before yielding the directory itself. This is useful when, e.g. you - /// want to recursively delete a directory. - /// - /// # Example - /// - /// Assume the following directory tree: - /// - /// ```text - /// foo/ - /// abc/ - /// qrs - /// tuv - /// def/ - /// ``` - /// - /// With contents_first disabled (the default), the following code visits - /// the directory tree in depth-first order: - /// - /// ```no_run - /// use walkdir::WalkDir; - /// - /// for entry in WalkDir::new("foo") { - /// let entry = entry.unwrap(); - /// println!("{}", entry.path().display()); - /// } - /// - /// // foo - /// // foo/abc - /// // foo/abc/qrs - /// // foo/abc/tuv - /// // foo/def - /// ``` - /// - /// With contents_first enabled: - /// - /// ```no_run - /// use walkdir::WalkDir; - /// - /// for entry in WalkDir::new("foo").contents_first(true) { - /// let entry = entry.unwrap(); - /// println!("{}", entry.path().display()); - /// } - /// - /// // foo/abc/qrs - /// // foo/abc/tuv - /// // foo/abc - /// // foo/def - /// // foo - /// ``` - pub fn contents_first(mut self, yes: bool) -> Self { - self.opts.contents_first = yes; - self - } - - /// Do not cross file system boundaries. - /// - /// When this option is enabled, directory traversal will not descend into - /// directories that are on a different file system from the root path. - /// - /// Currently, this option is only supported on Unix and Windows. If this - /// option is used on an unsupported platform, then directory traversal - /// will immediately return an error and will not yield any entries. - pub fn same_file_system(mut self, yes: bool) -> Self { - self.opts.same_file_system = yes; - self - } -} - -impl IntoIterator for WalkDir { - type Item = Result<DirEntry>; - type IntoIter = IntoIter; - - fn into_iter(self) -> IntoIter { - IntoIter { - opts: self.opts, - start: Some(self.root), - stack_list: vec![], - stack_path: vec![], - oldest_opened: 0, - depth: 0, - deferred_dirs: vec![], - root_device: None, - } - } -} - -/// An iterator for recursively descending into a directory. -/// -/// A value with this type must be constructed with the [`WalkDir`] type, which -/// uses a builder pattern to set options such as min/max depth, max open file -/// descriptors and whether the iterator should follow symbolic links. After -/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. -/// -/// The order of elements yielded by this iterator is unspecified. -/// -/// [`WalkDir`]: struct.WalkDir.html -/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v -#[derive(Debug)] -pub struct IntoIter { - /// Options specified in the builder. Depths, max fds, etc. - opts: WalkDirOptions, - /// The start path. - /// - /// This is only `Some(...)` at the beginning. After the first iteration, - /// this is always `None`. - start: Option<PathBuf>, - /// A stack of open (up to max fd) or closed handles to directories. - /// An open handle is a plain [`fs::ReadDir`] while a closed handle is - /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries. - /// - /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html - stack_list: Vec<DirList>, - /// A stack of file paths. - /// - /// This is *only* used when [`follow_links`] is enabled. In all other - /// cases this stack is empty. - /// - /// [`follow_links`]: struct.WalkDir.html#method.follow_links - stack_path: Vec<Ancestor>, - /// An index into `stack_list` that points to the oldest open directory - /// handle. If the maximum fd limit is reached and a new directory needs to - /// be read, the handle at this index is closed before the new directory is - /// opened. - oldest_opened: usize, - /// The current depth of iteration (the length of the stack at the - /// beginning of each iteration). - depth: usize, - /// A list of DirEntries corresponding to directories, that are - /// yielded after their contents has been fully yielded. This is only - /// used when `contents_first` is enabled. - deferred_dirs: Vec<DirEntry>, - /// The device of the root file path when the first call to `next` was - /// made. - /// - /// If the `same_file_system` option isn't enabled, then this is always - /// `None`. Conversely, if it is enabled, this is always `Some(...)` after - /// handling the root path. - root_device: Option<u64>, -} - -/// An ancestor is an item in the directory tree traversed by walkdir, and is -/// used to check for loops in the tree when traversing symlinks. -#[derive(Debug)] -struct Ancestor { - /// The path of this ancestor. - path: PathBuf, - /// An open file to this ancesor. This is only used on Windows where - /// opening a file handle appears to be quite expensive, so we choose to - /// cache it. This comes at the cost of not respecting the file descriptor - /// limit set by the user. - #[cfg(windows)] - handle: Handle, -} - -impl Ancestor { - /// Create a new ancestor from the given directory path. - #[cfg(windows)] - fn new(dent: &DirEntry) -> io::Result<Ancestor> { - let handle = Handle::from_path(dent.path())?; - Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle }) - } - - /// Create a new ancestor from the given directory path. - #[cfg(not(windows))] - fn new(dent: &DirEntry) -> io::Result<Ancestor> { - Ok(Ancestor { path: dent.path().to_path_buf() }) - } - - /// Returns true if and only if the given open file handle corresponds to - /// the same directory as this ancestor. - #[cfg(windows)] - fn is_same(&self, child: &Handle) -> io::Result<bool> { - Ok(child == &self.handle) - } - - /// Returns true if and only if the given open file handle corresponds to - /// the same directory as this ancestor. - #[cfg(not(windows))] - fn is_same(&self, child: &Handle) -> io::Result<bool> { - Ok(child == &Handle::from_path(&self.path)?) - } -} - -/// A sequence of unconsumed directory entries. -/// -/// This represents the opened or closed state of a directory handle. When -/// open, future entries are read by iterating over the raw `fs::ReadDir`. -/// When closed, all future entries are read into memory. Iteration then -/// proceeds over a [`Vec<fs::DirEntry>`]. -/// -/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html -/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html -#[derive(Debug)] -enum DirList { - /// An opened handle. - /// - /// This includes the depth of the handle itself. - /// - /// If there was an error with the initial [`fs::read_dir`] call, then it - /// is stored here. (We use an [`Option<...>`] to make yielding the error - /// exactly once simpler.) - /// - /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html - /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html - Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> }, - /// A closed handle. - /// - /// All remaining directory entries are read into memory. - Closed(vec::IntoIter<Result<DirEntry>>), -} - -impl Iterator for IntoIter { - type Item = Result<DirEntry>; - /// Advances the iterator and returns the next value. - /// - /// # Errors - /// - /// If the iterator fails to retrieve the next value, this method returns - /// an error value. The error will be wrapped in an Option::Some. - fn next(&mut self) -> Option<Result<DirEntry>> { - if let Some(start) = self.start.take() { - if self.opts.same_file_system { - let result = util::device_num(&start) - .map_err(|e| Error::from_path(0, start.clone(), e)); - self.root_device = Some(itry!(result)); - } - let dent = itry!(DirEntry::from_path(0, start, false)); - if let Some(result) = self.handle_entry(dent) { - return Some(result); - } - } - while !self.stack_list.is_empty() { - self.depth = self.stack_list.len(); - if let Some(dentry) = self.get_deferred_dir() { - return Some(Ok(dentry)); - } - if self.depth > self.opts.max_depth { - // If we've exceeded the max depth, pop the current dir - // so that we don't descend. - self.pop(); - continue; - } - // Unwrap is safe here because we've verified above that - // `self.stack_list` is not empty - let next = self - .stack_list - .last_mut() - .expect("BUG: stack should be non-empty") - .next(); - match next { - None => self.pop(), - Some(Err(err)) => return Some(Err(err)), - Some(Ok(dent)) => { - if let Some(result) = self.handle_entry(dent) { - return Some(result); - } - } - } - } - if self.opts.contents_first { - self.depth = self.stack_list.len(); - if let Some(dentry) = self.get_deferred_dir() { - return Some(Ok(dentry)); - } - } - None - } -} - -impl IntoIter { - /// Skips the current directory. - /// - /// This causes the iterator to stop traversing the contents of the least - /// recently yielded directory. This means any remaining entries in that - /// directory will be skipped (including sub-directories). - /// - /// Note that the ergonomics of this method are questionable since it - /// borrows the iterator mutably. Namely, you must write out the looping - /// condition manually. For example, to skip hidden entries efficiently on - /// unix systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// let mut it = WalkDir::new("foo").into_iter(); - /// loop { - /// let entry = match it.next() { - /// None => break, - /// Some(Err(err)) => panic!("ERROR: {}", err), - /// Some(Ok(entry)) => entry, - /// }; - /// if is_hidden(&entry) { - /// if entry.file_type().is_dir() { - /// it.skip_current_dir(); - /// } - /// continue; - /// } - /// println!("{}", entry.path().display()); - /// } - /// ``` - /// - /// You may find it more convenient to use the [`filter_entry`] iterator - /// adapter. (See its documentation for the same example functionality as - /// above.) - /// - /// [`filter_entry`]: #method.filter_entry - pub fn skip_current_dir(&mut self) { - if !self.stack_list.is_empty() { - self.pop(); - } - } - - /// Yields only entries which satisfy the given predicate and skips - /// descending into directories that do not satisfy the given predicate. - /// - /// The predicate is applied to all entries. If the predicate is - /// true, iteration carries on as normal. If the predicate is false, the - /// entry is ignored and if it is a directory, it is not descended into. - /// - /// This is often more convenient to use than [`skip_current_dir`]. For - /// example, to skip hidden files and directories efficiently on unix - /// systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// # use walkdir::Error; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// # fn try_main() -> Result<(), Error> { - /// for entry in WalkDir::new("foo") - /// .into_iter() - /// .filter_entry(|e| !is_hidden(e)) { - /// println!("{}", entry?.path().display()); - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Note that the iterator will still yield errors for reading entries that - /// may not satisfy the predicate. - /// - /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not - /// passed to this predicate. - /// - /// Note that if the iterator has `contents_first` enabled, then this - /// method is no different than calling the standard `Iterator::filter` - /// method (because directory entries are yielded after they've been - /// descended into). - /// - /// [`skip_current_dir`]: #method.skip_current_dir - /// [`min_depth`]: struct.WalkDir.html#method.min_depth - /// [`max_depth`]: struct.WalkDir.html#method.max_depth - pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> - where - P: FnMut(&DirEntry) -> bool, - { - FilterEntry { it: self, predicate: predicate } - } - - fn handle_entry( - &mut self, - mut dent: DirEntry, - ) -> Option<Result<DirEntry>> { - if self.opts.follow_links && dent.file_type().is_symlink() { - dent = itry!(self.follow(dent)); - } - let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); - if is_normal_dir { - if self.opts.same_file_system && dent.depth() > 0 { - if itry!(self.is_same_file_system(&dent)) { - itry!(self.push(&dent)); - } - } else { - itry!(self.push(&dent)); - } - } else if dent.depth() == 0 && dent.file_type().is_symlink() { - // As a special case, if we are processing a root entry, then we - // always follow it even if it's a symlink and follow_links is - // false. We are careful to not let this change the semantics of - // the DirEntry however. Namely, the DirEntry should still respect - // the follow_links setting. When it's disabled, it should report - // itself as a symlink. When it's enabled, it should always report - // itself as the target. - let md = itry!(fs::metadata(dent.path()).map_err(|err| { - Error::from_path(dent.depth(), dent.path().to_path_buf(), err) - })); - if md.file_type().is_dir() { - itry!(self.push(&dent)); - } - } - if is_normal_dir && self.opts.contents_first { - self.deferred_dirs.push(dent); - None - } else if self.skippable() { - None - } else { - Some(Ok(dent)) - } - } - - fn get_deferred_dir(&mut self) -> Option<DirEntry> { - if self.opts.contents_first { - if self.depth < self.deferred_dirs.len() { - // Unwrap is safe here because we've guaranteed that - // `self.deferred_dirs.len()` can never be less than 1 - let deferred: DirEntry = self - .deferred_dirs - .pop() - .expect("BUG: deferred_dirs should be non-empty"); - if !self.skippable() { - return Some(deferred); - } - } - } - None - } - - fn push(&mut self, dent: &DirEntry) -> Result<()> { - // Make room for another open file descriptor if we've hit the max. - let free = - self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); - if free == self.opts.max_open { - self.stack_list[self.oldest_opened].close(); - } - // Open a handle to reading the directory's entries. - let rd = fs::read_dir(dent.path()).map_err(|err| { - Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) - }); - let mut list = DirList::Opened { depth: self.depth, it: rd }; - if let Some(ref mut cmp) = self.opts.sorter { - let mut entries: Vec<_> = list.collect(); - entries.sort_by(|a, b| match (a, b) { - (&Ok(ref a), &Ok(ref b)) => cmp(a, b), - (&Err(_), &Err(_)) => Ordering::Equal, - (&Ok(_), &Err(_)) => Ordering::Greater, - (&Err(_), &Ok(_)) => Ordering::Less, - }); - list = DirList::Closed(entries.into_iter()); - } - if self.opts.follow_links { - let ancestor = Ancestor::new(&dent) - .map_err(|err| Error::from_io(self.depth, err))?; - self.stack_path.push(ancestor); - } - // We push this after stack_path since creating the Ancestor can fail. - // If it fails, then we return the error and won't descend. - self.stack_list.push(list); - // If we had to close out a previous directory stream, then we need to - // increment our index the oldest still-open stream. We do this only - // after adding to our stack, in order to ensure that the oldest_opened - // index remains valid. The worst that can happen is that an already - // closed stream will be closed again, which is a no-op. - // - // We could move the close of the stream above into this if-body, but - // then we would have more than the maximum number of file descriptors - // open at a particular point in time. - if free == self.opts.max_open { - // Unwrap is safe here because self.oldest_opened is guaranteed to - // never be greater than `self.stack_list.len()`, which implies - // that the subtraction won't underflow and that adding 1 will - // never overflow. - self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); - } - Ok(()) - } - - fn pop(&mut self) { - self.stack_list.pop().expect("BUG: cannot pop from empty stack"); - if self.opts.follow_links { - self.stack_path.pop().expect("BUG: list/path stacks out of sync"); - } - // If everything in the stack is already closed, then there is - // room for at least one more open descriptor and it will - // always be at the top of the stack. - self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); - } - - fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> { - dent = - DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; - // The only way a symlink can cause a loop is if it points - // to a directory. Otherwise, it always points to a leaf - // and we can omit any loop checks. - if dent.is_dir() { - self.check_loop(dent.path())?; - } - Ok(dent) - } - - fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> { - let hchild = Handle::from_path(&child) - .map_err(|err| Error::from_io(self.depth, err))?; - for ancestor in self.stack_path.iter().rev() { - let is_same = ancestor - .is_same(&hchild) - .map_err(|err| Error::from_io(self.depth, err))?; - if is_same { - return Err(Error::from_loop( - self.depth, - &ancestor.path, - child.as_ref(), - )); - } - } - Ok(()) - } - - fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> { - let dent_device = util::device_num(dent.path()) - .map_err(|err| Error::from_entry(dent, err))?; - Ok(self - .root_device - .map(|d| d == dent_device) - .expect("BUG: called is_same_file_system without root device")) - } - - fn skippable(&self) -> bool { - self.depth < self.opts.min_depth || self.depth > self.opts.max_depth - } -} - -impl DirList { - fn close(&mut self) { - if let DirList::Opened { .. } = *self { - *self = DirList::Closed(self.collect::<Vec<_>>().into_iter()); - } - } -} - -impl Iterator for DirList { - type Item = Result<DirEntry>; - - #[inline(always)] - fn next(&mut self) -> Option<Result<DirEntry>> { - match *self { - DirList::Closed(ref mut it) => it.next(), - DirList::Opened { depth, ref mut it } => match *it { - Err(ref mut err) => err.take().map(Err), - Ok(ref mut rd) => rd.next().map(|r| match r { - Ok(r) => DirEntry::from_entry(depth + 1, &r), - Err(err) => Err(Error::from_io(depth + 1, err)), - }), - }, - } - } -} - -/// A recursive directory iterator that skips entries. -/// -/// Values of this type are created by calling [`.filter_entry()`] on an -/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. -/// -/// Directories that fail the predicate `P` are skipped. Namely, they are -/// never yielded and never descended into. -/// -/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options -/// are not passed through this filter. -/// -/// If opening a handle to a directory resulted in an error, then it is yielded -/// and no corresponding call to the predicate is made. -/// -/// Type parameter `I` refers to the underlying iterator and `P` refers to the -/// predicate, which is usually `FnMut(&DirEntry) -> bool`. -/// -/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry -/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v -/// [`min_depth`]: struct.WalkDir.html#method.min_depth -/// [`max_depth`]: struct.WalkDir.html#method.max_depth -#[derive(Debug)] -pub struct FilterEntry<I, P> { - it: I, - predicate: P, -} - -impl<P> Iterator for FilterEntry<IntoIter, P> -where - P: FnMut(&DirEntry) -> bool, -{ - type Item = Result<DirEntry>; - - /// Advances the iterator and returns the next value. - /// - /// # Errors - /// - /// If the iterator fails to retrieve the next value, this method returns - /// an error value. The error will be wrapped in an `Option::Some`. - fn next(&mut self) -> Option<Result<DirEntry>> { - loop { - let dent = match self.it.next() { - None => return None, - Some(result) => itry!(result), - }; - if !(self.predicate)(&dent) { - if dent.is_dir() { - self.it.skip_current_dir(); - } - continue; - } - return Some(Ok(dent)); - } - } -} - -impl<P> FilterEntry<IntoIter, P> -where - P: FnMut(&DirEntry) -> bool, -{ - /// Yields only entries which satisfy the given predicate and skips - /// descending into directories that do not satisfy the given predicate. - /// - /// The predicate is applied to all entries. If the predicate is - /// true, iteration carries on as normal. If the predicate is false, the - /// entry is ignored and if it is a directory, it is not descended into. - /// - /// This is often more convenient to use than [`skip_current_dir`]. For - /// example, to skip hidden files and directories efficiently on unix - /// systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// # use walkdir::Error; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// # fn try_main() -> Result<(), Error> { - /// for entry in WalkDir::new("foo") - /// .into_iter() - /// .filter_entry(|e| !is_hidden(e)) { - /// println!("{}", entry?.path().display()); - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Note that the iterator will still yield errors for reading entries that - /// may not satisfy the predicate. - /// - /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not - /// passed to this predicate. - /// - /// Note that if the iterator has `contents_first` enabled, then this - /// method is no different than calling the standard `Iterator::filter` - /// method (because directory entries are yielded after they've been - /// descended into). - /// - /// [`skip_current_dir`]: #method.skip_current_dir - /// [`min_depth`]: struct.WalkDir.html#method.min_depth - /// [`max_depth`]: struct.WalkDir.html#method.max_depth - pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> { - FilterEntry { it: self, predicate: predicate } - } - - /// Skips the current directory. - /// - /// This causes the iterator to stop traversing the contents of the least - /// recently yielded directory. This means any remaining entries in that - /// directory will be skipped (including sub-directories). - /// - /// Note that the ergonomics of this method are questionable since it - /// borrows the iterator mutably. Namely, you must write out the looping - /// condition manually. For example, to skip hidden entries efficiently on - /// unix systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// let mut it = WalkDir::new("foo").into_iter(); - /// loop { - /// let entry = match it.next() { - /// None => break, - /// Some(Err(err)) => panic!("ERROR: {}", err), - /// Some(Ok(entry)) => entry, - /// }; - /// if is_hidden(&entry) { - /// if entry.file_type().is_dir() { - /// it.skip_current_dir(); - /// } - /// continue; - /// } - /// println!("{}", entry.path().display()); - /// } - /// ``` - /// - /// You may find it more convenient to use the [`filter_entry`] iterator - /// adapter. (See its documentation for the same example functionality as - /// above.) - /// - /// [`filter_entry`]: #method.filter_entry - pub fn skip_current_dir(&mut self) { - self.it.skip_current_dir(); - } -} +mod walk; |