From 1d7293a5a1ef548ce587a0b08abce5f21571a100 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Wed, 5 Jun 2019 19:01:37 -0400 Subject: progress --- .gitignore | 1 - Cargo.toml | 4 +- build.rs | 18 +- src/cursor.rs | 177 +++++++ src/dir.rs | 100 ++++ src/error.rs | 16 +- src/lib.rs | 1119 +------------------------------------------- src/oldlib.rs | 1123 +++++++++++++++++++++++++++++++++++++++++++++ src/os/linux/mod.rs | 82 +++- src/os/unix/mod.rs | 116 +---- src/os/unix/rawpath.rs | 353 ++++++++++++++ src/os/unix/stat.rs | 297 ++++++++++++ src/os/windows/mod.rs | 236 +++++----- src/os/windows/rawpath.rs | 29 ++ src/os/windows/stat.rs | 269 +++++++++++ src/tests/mod.rs | 4 +- src/tests/scratch.rs | 45 ++ src/walk.rs | 339 ++++++++++++++ 18 files changed, 2961 insertions(+), 1367 deletions(-) create mode 100644 src/cursor.rs create mode 100644 src/dir.rs create mode 100644 src/oldlib.rs create mode 100644 src/os/unix/rawpath.rs create mode 100644 src/os/unix/stat.rs create mode 100644 src/os/windows/rawpath.rs create mode 100644 src/os/windows/stat.rs create mode 100644 src/tests/scratch.rs create mode 100644 src/walk.rs diff --git a/.gitignore b/.gitignore index d63756d..ba188fc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,5 @@ examples/ss10pusa.csv build target Cargo.lock -scratch* bench_large/huge tmp diff --git a/Cargo.toml b/Cargo.toml index 1b1f691..2f9d7da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,9 @@ cc = "1.0.36" [target.'cfg(windows)'.dependencies.winapi] version = "0.3" -features = ["std", "impl-debug", "fileapi", "handleapi", "winnt"] +features = [ + "std", "impl-debug", "fileapi", "handleapi", "ioapiset", "winioctl", "winnt", +] [target.'cfg(windows)'.dependencies.winapi-util] version = "0.1.1" diff --git a/build.rs b/build.rs index 9f26302..de9ae04 100644 --- a/build.rs +++ b/build.rs @@ -1,9 +1,25 @@ #[cfg(not(target_os = "dragonfly"))] -fn main() {} +fn main() { + enable_getdents(); +} #[cfg(target_os = "dragonfly")] fn main() { + enable_getdents(); cc::Build::new() .file("src/os/unix/errno-dragonfly.c") .compile("errno-dragonfly"); } + +fn enable_getdents() { + if std::env::var_os("CARGO_CFG_WALKDIR_DISABLE_GETDENTS").is_some() { + return; + } + let os = match std::env::var("CARGO_CFG_TARGET_OS") { + Err(_) => return, + Ok(os) => os, + }; + if os == "linux" { + println!("cargo:rustc-cfg=walkdir_getdents"); + } +} diff --git a/src/cursor.rs b/src/cursor.rs new file mode 100644 index 0000000..b736438 --- /dev/null +++ b/src/cursor.rs @@ -0,0 +1,177 @@ +use std::cmp; +use std::fmt; +use std::io; +use std::path::{Path, PathBuf}; + +#[cfg(unix)] +use crate::os::unix as os; +#[cfg(windows)] +use crate::os::windows as os; + +#[derive(Debug)] +pub struct Cursor { + options: Options, + stack: Vec, + + root: bool, + current: PathBuf, + file_type: Option, +} + +impl Cursor { + pub fn new>(root: P) -> Cursor { + Cursor { + options: Options::default(), + stack: vec![], + root: true, + current: root.into(), + file_type: None, + } + } + + pub fn reset>(root: P) { + unimplemented!() + } + + pub fn read(&mut self) -> io::Result> { + if let Some(ft) = self.file_type.take() { + if !ft.is_dir() { + self.current.pop(); + } + } else { + let ft = os::stat(self.current.clone())?.file_type().into_api(); + if ft.is_dir() { + self.push(); + } + self.file_type = Some(ft); + return Ok(Some(CursorEntry { cursor: self })); + } + while !self.stack.is_empty() { + let dcur = self.stack.last_mut().unwrap(); + match dcur.read() { + None => { + self.stack.pop().unwrap(); + // If the stack is empty, then we've reached the root. + // At this point, `current` is just the original root path, + // so we should not pop anything from it. + if !self.stack.is_empty() { + self.current.pop(); + } + } + Some(Err(err)) => return Err(err), + Some(Ok(dent)) => { + let name = dent.file_name_os(); + if name == "." || name == ".." { + continue; + } + self.current.push(name); + self.file_type = + Some(dent.file_type().unwrap().into_api()); + if dent.file_type().unwrap().is_dir() { + self.push(); + } + return Ok(Some(CursorEntry { cursor: self })); + } + } + } + Ok(None) + } + + fn push(&mut self) { + let res = os::Dir::open(self.current.clone()); + self.stack.push(DirCursor(res.map_err(Some))); + } +} + +#[derive(Debug)] +struct DirCursor(Result>); + +impl DirCursor { + fn read(&mut self) -> Option> { + match self.0 { + Err(ref mut err) => err.take().map(Err), + Ok(ref mut dir) => dir.read(), + } + } +} + +#[derive(Debug)] +pub struct CursorEntry<'a> { + cursor: &'a mut Cursor, +} + +impl<'a> CursorEntry<'a> { + pub fn path(&self) -> &Path { + &self.cursor.current + } + + pub fn file_type(&self) -> FileType { + self.cursor.file_type.unwrap() + } +} + +#[derive(Debug)] +struct Options { + follow_links: bool, + max_open: usize, + min_depth: usize, + max_depth: usize, + sorter: Option, + contents_first: bool, + same_file_system: bool, +} + +impl Default for Options { + fn default() -> Options { + Options { + follow_links: false, + max_open: 10, + min_depth: 0, + max_depth: std::usize::MAX, + sorter: None, + contents_first: false, + same_file_system: false, + } + } +} + +struct Sorter( + Box cmp::Ordering + Send + Sync + 'static>, +); + +impl fmt::Debug for Sorter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "") + } +} + +#[derive(Debug)] +pub struct DirEntry { + os: os::DirEntry, + file_type: FileType, +} + +impl DirEntry {} + +#[derive(Clone, Copy, Debug)] +pub struct FileType(os::FileType); + +impl FileType { + pub fn is_file(&self) -> bool { + self.0.is_file() + } + + pub fn is_dir(&self) -> bool { + self.0.is_dir() + } + + pub fn is_symlink(&self) -> bool { + self.0.is_symlink() + } +} + +impl From for FileType { + fn from(osft: os::FileType) -> FileType { + FileType(osft) + } +} diff --git a/src/dir.rs b/src/dir.rs new file mode 100644 index 0000000..667f3a4 --- /dev/null +++ b/src/dir.rs @@ -0,0 +1,100 @@ +#[cfg(unix)] +use std::ffi::CStr; +use std::io; +#[cfg(unix)] +use std::os::unix::io::RawFd; + +#[cfg(target_os = "linux")] +use crate::os::linux; +#[cfg(unix)] +use crate::os::unix; +#[cfg(unix)] +use crate::os::unix::RawPathBuf; + +#[derive(Debug)] +pub struct Cursor { + #[cfg(unix)] + dir: unix::Dir, + #[cfg(unix)] + dent: unix::DirEntry, + #[cfg(target_os = "linux")] + linux_cursor: linux::DirEntryCursor, +} + +impl Cursor { + #[cfg(unix)] + pub fn new(parent: RawFd, dir_name: &CStr) -> io::Result { + let dir = unix::Dir::openat_c(parent, dir_name)?; + Ok(Cursor { + dir, + #[cfg(unix)] + dent: unix::DirEntry::empty(), + #[cfg(target_os = "linux")] + linux_cursor: linux::DirEntryCursor::new(), + }) + } + + /// Reset this cursor to the beginning of the given directory. + /// + /// An error is returned if the given directory could not be opened for + /// reading. If an error is returned, the behavior of this cursor is + /// unspecified until a subsequent and successful `reset` call is made. + #[cfg(unix)] + pub fn reset(&mut self, parent: RawFd, dir_name: &CStr) -> io::Result<()> { + self.dir = unix::Dir::openat_c(parent, dir_name)?; + Ok(()) + } + + #[cfg(all(unix, walkdir_getdents))] + pub fn read(&mut self) -> io::Result> { + use std::os::unix::io::AsRawFd; + + let c = &mut self.linux_cursor; + loop { + if c.advance() { + if is_dots(c.current().file_name_bytes()) { + continue; + } + return Ok(Some(CursorEntry { linux_dent: c.current() })); + } + if !linux::getdents(self.dir.as_raw_fd(), c)? { + return Ok(None); + } + // This is guaranteed since getdents returning true means + // that the buffer has at least one item in it. + assert!(c.advance()); + if is_dots(c.current().file_name_bytes()) { + continue; + } + return Ok(Some(CursorEntry { linux_dent: c.current() })); + } + } + + #[cfg(all(unix, not(walkdir_getdents)))] + pub fn read(&mut self) -> io::Result> { + loop { + return if self.dir.read_into(&mut self.dent)? { + if is_dots(dent.file_name_bytes()) { + continue; + } + Ok(Some(CursorEntry { cursor: self })) + } else { + Ok(None) + }; + } + } +} + +#[derive(Debug)] +pub struct CursorEntry<'a> { + #[cfg(not(all(unix, walkdir_getdents)))] + cursor: &'a Cursor, + #[cfg(all(unix, walkdir_getdents))] + linux_dent: linux::DirEntry<'a>, +} + +impl<'a> CursorEntry<'a> {} + +fn is_dots(file_name: &[u8]) -> bool { + file_name == b"." || file_name == b".." +} diff --git a/src/error.rs b/src/error.rs index 3fb619c..49bf0b5 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,9 +2,23 @@ use std::error; use std::fmt; use std::io; use std::path::{Path, PathBuf}; +use std::result; use crate::DirEntry; +/// A result type for walkdir operations. +/// +/// Note that this result type embeds the error type in this crate. This +/// is only useful if you care about the additional information provided by +/// the error (such as the path associated with the error or whether a loop +/// was dectected). If you want things to Just Work, then you can use +/// [`io::Result`] instead since the error type in this package will +/// automatically convert to an [`io::Result`] when using the [`try!`] macro. +/// +/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html +/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html +pub type Result = result::Result; + /// An error produced by recursively walking a directory. /// /// This error type is a light wrapper around [`std::io::Error`]. In @@ -98,7 +112,7 @@ impl Error { /// /// # Example /// - /// ```rust,no-run + /// ```rust,no_run /// use std::io; /// use std::path::Path; /// diff --git a/src/lib.rs b/src/lib.rs index b2fb759..e35b922 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,1127 +1,30 @@ /*! -Crate `walkdir` provides an efficient and cross platform implementation -of recursive directory traversal. Several options are exposed to control -iteration, such as whether to follow symbolic links (default off), limit the -maximum number of simultaneous open file descriptors and the ability to -efficiently skip descending into directories. - -To use this crate, add `walkdir` as a dependency to your project's -`Cargo.toml`: - -```toml -[dependencies] -walkdir = "2" -``` - -# From the top - -The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values -yielded by the iterator. Finally, the [`Error`] type is a small wrapper around -[`std::io::Error`] with additional information, such as if a loop was detected -while following symbolic links (not enabled by default). - -[`WalkDir`]: struct.WalkDir.html -[`DirEntry`]: struct.DirEntry.html -[`Error`]: struct.Error.html -[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html - -# Example - -The following code recursively iterates over the directory given and prints -the path for each entry: - -```no_run -use walkdir::WalkDir; -# use walkdir::Error; - -# fn try_main() -> Result<(), Error> { -for entry in WalkDir::new("foo") { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -Or, if you'd like to iterate over all entries and ignore any errors that -may arise, use [`filter_map`]. (e.g., This code below will silently skip -directories that the owner of the running process does not have permission to -access.) - -```no_run -use walkdir::WalkDir; - -for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { - println!("{}", entry.path().display()); -} -``` - -[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map - -# Example: follow symbolic links - -The same code as above, except [`follow_links`] is enabled: - -```no_run -use walkdir::WalkDir; -# use walkdir::Error; - -# fn try_main() -> Result<(), Error> { -for entry in WalkDir::new("foo").follow_links(true) { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -[`follow_links`]: struct.WalkDir.html#method.follow_links - -# Example: skip hidden files and directories on unix - -This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files -and directories efficiently (i.e. without recursing into hidden directories): - -```no_run -use walkdir::{DirEntry, WalkDir}; -# use walkdir::Error; - -fn is_hidden(entry: &DirEntry) -> bool { - entry.file_name() - .to_str() - .map(|s| s.starts_with(".")) - .unwrap_or(false) -} - -# fn try_main() -> Result<(), Error> { -let walker = WalkDir::new("foo").into_iter(); -for entry in walker.filter_entry(|e| !is_hidden(e)) { - println!("{}", entry?.path().display()); -} -# Ok(()) -# } -``` - -[`filter_entry`]: struct.IntoIter.html#method.filter_entry +TODO */ -#![deny(missing_docs)] +// #![deny(missing_docs)] #![allow(unknown_lints)] #![allow(warnings)] #[cfg(test)] doc_comment::doctest!("../README.md"); -use std::cmp::{min, Ordering}; -use std::fmt; -use std::fs::{self, ReadDir}; -use std::io; -use std::path::{Path, PathBuf}; -use std::result; -use std::vec; - -use same_file::Handle; - pub use crate::dent::DirEntry; #[cfg(unix)] pub use crate::dent::DirEntryExt; -pub use crate::error::Error; +pub use crate::error::{Error, Result}; +pub use crate::walk::{FilterEntry, IntoIter, WalkDir}; +#[cfg(not(windows))] +pub use cursor::*; + +#[cfg(not(windows))] +mod cursor; mod dent; +mod dir; mod error; pub mod os; #[cfg(test)] mod tests; mod util; - -/// Like try, but for iterators that return [`Option>`]. -/// -/// [`Option>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html -macro_rules! itry { - ($e:expr) => { - match $e { - Ok(v) => v, - Err(err) => return Some(Err(From::from(err))), - } - }; -} - -/// A result type for walkdir operations. -/// -/// Note that this result type embeds the error type in this crate. This -/// is only useful if you care about the additional information provided by -/// the error (such as the path associated with the error or whether a loop -/// was dectected). If you want things to Just Work, then you can use -/// [`io::Result`] instead since the error type in this package will -/// automatically convert to an [`io::Result`] when using the [`try!`] macro. -/// -/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html -/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html -pub type Result = ::std::result::Result; - -/// A builder to create an iterator for recursively walking a directory. -/// -/// Results are returned in depth first fashion, with directories yielded -/// before their contents. If [`contents_first`] is true, contents are yielded -/// before their directories. The order is unspecified but if [`sort_by`] is -/// given, directory entries are sorted according to this function. Directory -/// entries `.` and `..` are always omitted. -/// -/// If an error occurs at any point during iteration, then it is returned in -/// place of its corresponding directory entry and iteration continues as -/// normal. If an error occurs while opening a directory for reading, then it -/// is not descended into (but the error is still yielded by the iterator). -/// Iteration may be stopped at any time. When the iterator is destroyed, all -/// resources associated with it are freed. -/// -/// [`contents_first`]: struct.WalkDir.html#method.contents_first -/// [`sort_by`]: struct.WalkDir.html#method.sort_by -/// -/// # Usage -/// -/// This type implements [`IntoIterator`] so that it may be used as the subject -/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want -/// to use iterator adapters such as [`filter_entry`]. -/// -/// Idiomatic use of this type should use method chaining to set desired -/// options. For example, this only shows entries with a depth of `1`, `2` or -/// `3` (relative to `foo`): -/// -/// ```no_run -/// use walkdir::WalkDir; -/// # use walkdir::Error; -/// -/// # fn try_main() -> Result<(), Error> { -/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { -/// println!("{}", entry?.path().display()); -/// } -/// # Ok(()) -/// # } -/// ``` -/// -/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html -/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter -/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry -/// -/// Note that the iterator by default includes the top-most directory. Since -/// this is the only directory yielded with depth `0`, it is easy to ignore it -/// with the [`min_depth`] setting: -/// -/// ```no_run -/// use walkdir::WalkDir; -/// # use walkdir::Error; -/// -/// # fn try_main() -> Result<(), Error> { -/// for entry in WalkDir::new("foo").min_depth(1) { -/// println!("{}", entry?.path().display()); -/// } -/// # Ok(()) -/// # } -/// ``` -/// -/// [`min_depth`]: struct.WalkDir.html#method.min_depth -/// -/// This will only return descendents of the `foo` directory and not `foo` -/// itself. -/// -/// # Loops -/// -/// This iterator (like most/all recursive directory iterators) assumes that -/// no loops can be made with *hard* links on your file system. In particular, -/// this would require creating a hard link to a directory such that it creates -/// a loop. On most platforms, this operation is illegal. -/// -/// Note that when following symbolic/soft links, loops are detected and an -/// error is reported. -#[derive(Debug)] -pub struct WalkDir { - opts: WalkDirOptions, - root: PathBuf, -} - -struct WalkDirOptions { - follow_links: bool, - max_open: usize, - min_depth: usize, - max_depth: usize, - sorter: Option< - Box< - dyn FnMut(&DirEntry, &DirEntry) -> Ordering - + Send - + Sync - + 'static, - >, - >, - contents_first: bool, - same_file_system: bool, -} - -impl fmt::Debug for WalkDirOptions { - fn fmt( - &self, - f: &mut fmt::Formatter<'_>, - ) -> result::Result<(), fmt::Error> { - let sorter_str = if self.sorter.is_some() { - // FnMut isn't `Debug` - "Some(...)" - } else { - "None" - }; - f.debug_struct("WalkDirOptions") - .field("follow_links", &self.follow_links) - .field("max_open", &self.max_open) - .field("min_depth", &self.min_depth) - .field("max_depth", &self.max_depth) - .field("sorter", &sorter_str) - .field("contents_first", &self.contents_first) - .field("same_file_system", &self.same_file_system) - .finish() - } -} - -impl WalkDir { - /// Create a builder for a recursive directory iterator starting at the - /// file path `root`. If `root` is a directory, then it is the first item - /// yielded by the iterator. If `root` is a file, then it is the first - /// and only item yielded by the iterator. If `root` is a symlink, then it - /// is always followed for the purposes of directory traversal. (A root - /// `DirEntry` still obeys its documentation with respect to symlinks and - /// the `follow_links` setting.) - pub fn new>(root: P) -> Self { - WalkDir { - opts: WalkDirOptions { - follow_links: false, - max_open: 10, - min_depth: 0, - max_depth: ::std::usize::MAX, - sorter: None, - contents_first: false, - same_file_system: false, - }, - root: root.as_ref().to_path_buf(), - } - } - - /// Set the minimum depth of entries yielded by the iterator. - /// - /// The smallest depth is `0` and always corresponds to the path given - /// to the `new` function on this type. Its direct descendents have depth - /// `1`, and their descendents have depth `2`, and so on. - pub fn min_depth(mut self, depth: usize) -> Self { - self.opts.min_depth = depth; - if self.opts.min_depth > self.opts.max_depth { - self.opts.min_depth = self.opts.max_depth; - } - self - } - - /// Set the maximum depth of entries yield by the iterator. - /// - /// The smallest depth is `0` and always corresponds to the path given - /// to the `new` function on this type. Its direct descendents have depth - /// `1`, and their descendents have depth `2`, and so on. - /// - /// Note that this will not simply filter the entries of the iterator, but - /// it will actually avoid descending into directories when the depth is - /// exceeded. - pub fn max_depth(mut self, depth: usize) -> Self { - self.opts.max_depth = depth; - if self.opts.max_depth < self.opts.min_depth { - self.opts.max_depth = self.opts.min_depth; - } - self - } - - /// Follow symbolic links. By default, this is disabled. - /// - /// When `yes` is `true`, symbolic links are followed as if they were - /// normal directories and files. If a symbolic link is broken or is - /// involved in a loop, an error is yielded. - /// - /// When enabled, the yielded [`DirEntry`] values represent the target of - /// the link while the path corresponds to the link. See the [`DirEntry`] - /// type for more details. - /// - /// [`DirEntry`]: struct.DirEntry.html - pub fn follow_links(mut self, yes: bool) -> Self { - self.opts.follow_links = yes; - self - } - - /// Set the maximum number of simultaneously open file descriptors used - /// by the iterator. - /// - /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set - /// to `1` automatically. If this is not set, then it defaults to some - /// reasonably low number. - /// - /// This setting has no impact on the results yielded by the iterator - /// (even when `n` is `1`). Instead, this setting represents a trade off - /// between scarce resources (file descriptors) and memory. Namely, when - /// the maximum number of file descriptors is reached and a new directory - /// needs to be opened to continue iteration, then a previous directory - /// handle is closed and has its unyielded entries stored in memory. In - /// practice, this is a satisfying trade off because it scales with respect - /// to the *depth* of your file tree. Therefore, low values (even `1`) are - /// acceptable. - /// - /// Note that this value does not impact the number of system calls made by - /// an exhausted iterator. - /// - /// # Platform behavior - /// - /// On Windows, if `follow_links` is enabled, then this limit is not - /// respected. In particular, the maximum number of file descriptors opened - /// is proportional to the depth of the directory tree traversed. - pub fn max_open(mut self, mut n: usize) -> Self { - if n == 0 { - n = 1; - } - self.opts.max_open = n; - self - } - - /// Set a function for sorting directory entries. - /// - /// If a compare function is set, the resulting iterator will return all - /// paths in sorted order. The compare function will be called to compare - /// entries from the same directory. - /// - /// ```rust,no-run - /// use std::cmp; - /// use std::ffi::OsString; - /// use walkdir::WalkDir; - /// - /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); - /// ``` - pub fn sort_by(mut self, cmp: F) -> Self - where - F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, - { - self.opts.sorter = Some(Box::new(cmp)); - self - } - - /// Yield a directory's contents before the directory itself. By default, - /// this is disabled. - /// - /// When `yes` is `false` (as is the default), the directory is yielded - /// before its contents are read. This is useful when, e.g. you want to - /// skip processing of some directories. - /// - /// When `yes` is `true`, the iterator yields the contents of a directory - /// before yielding the directory itself. This is useful when, e.g. you - /// want to recursively delete a directory. - /// - /// # Example - /// - /// Assume the following directory tree: - /// - /// ```text - /// foo/ - /// abc/ - /// qrs - /// tuv - /// def/ - /// ``` - /// - /// With contents_first disabled (the default), the following code visits - /// the directory tree in depth-first order: - /// - /// ```no_run - /// use walkdir::WalkDir; - /// - /// for entry in WalkDir::new("foo") { - /// let entry = entry.unwrap(); - /// println!("{}", entry.path().display()); - /// } - /// - /// // foo - /// // foo/abc - /// // foo/abc/qrs - /// // foo/abc/tuv - /// // foo/def - /// ``` - /// - /// With contents_first enabled: - /// - /// ```no_run - /// use walkdir::WalkDir; - /// - /// for entry in WalkDir::new("foo").contents_first(true) { - /// let entry = entry.unwrap(); - /// println!("{}", entry.path().display()); - /// } - /// - /// // foo/abc/qrs - /// // foo/abc/tuv - /// // foo/abc - /// // foo/def - /// // foo - /// ``` - pub fn contents_first(mut self, yes: bool) -> Self { - self.opts.contents_first = yes; - self - } - - /// Do not cross file system boundaries. - /// - /// When this option is enabled, directory traversal will not descend into - /// directories that are on a different file system from the root path. - /// - /// Currently, this option is only supported on Unix and Windows. If this - /// option is used on an unsupported platform, then directory traversal - /// will immediately return an error and will not yield any entries. - pub fn same_file_system(mut self, yes: bool) -> Self { - self.opts.same_file_system = yes; - self - } -} - -impl IntoIterator for WalkDir { - type Item = Result; - type IntoIter = IntoIter; - - fn into_iter(self) -> IntoIter { - IntoIter { - opts: self.opts, - start: Some(self.root), - stack_list: vec![], - stack_path: vec![], - oldest_opened: 0, - depth: 0, - deferred_dirs: vec![], - root_device: None, - } - } -} - -/// An iterator for recursively descending into a directory. -/// -/// A value with this type must be constructed with the [`WalkDir`] type, which -/// uses a builder pattern to set options such as min/max depth, max open file -/// descriptors and whether the iterator should follow symbolic links. After -/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. -/// -/// The order of elements yielded by this iterator is unspecified. -/// -/// [`WalkDir`]: struct.WalkDir.html -/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v -#[derive(Debug)] -pub struct IntoIter { - /// Options specified in the builder. Depths, max fds, etc. - opts: WalkDirOptions, - /// The start path. - /// - /// This is only `Some(...)` at the beginning. After the first iteration, - /// this is always `None`. - start: Option, - /// A stack of open (up to max fd) or closed handles to directories. - /// An open handle is a plain [`fs::ReadDir`] while a closed handle is - /// a `Vec` corresponding to the as-of-yet consumed entries. - /// - /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html - stack_list: Vec, - /// A stack of file paths. - /// - /// This is *only* used when [`follow_links`] is enabled. In all other - /// cases this stack is empty. - /// - /// [`follow_links`]: struct.WalkDir.html#method.follow_links - stack_path: Vec, - /// An index into `stack_list` that points to the oldest open directory - /// handle. If the maximum fd limit is reached and a new directory needs to - /// be read, the handle at this index is closed before the new directory is - /// opened. - oldest_opened: usize, - /// The current depth of iteration (the length of the stack at the - /// beginning of each iteration). - depth: usize, - /// A list of DirEntries corresponding to directories, that are - /// yielded after their contents has been fully yielded. This is only - /// used when `contents_first` is enabled. - deferred_dirs: Vec, - /// The device of the root file path when the first call to `next` was - /// made. - /// - /// If the `same_file_system` option isn't enabled, then this is always - /// `None`. Conversely, if it is enabled, this is always `Some(...)` after - /// handling the root path. - root_device: Option, -} - -/// An ancestor is an item in the directory tree traversed by walkdir, and is -/// used to check for loops in the tree when traversing symlinks. -#[derive(Debug)] -struct Ancestor { - /// The path of this ancestor. - path: PathBuf, - /// An open file to this ancesor. This is only used on Windows where - /// opening a file handle appears to be quite expensive, so we choose to - /// cache it. This comes at the cost of not respecting the file descriptor - /// limit set by the user. - #[cfg(windows)] - handle: Handle, -} - -impl Ancestor { - /// Create a new ancestor from the given directory path. - #[cfg(windows)] - fn new(dent: &DirEntry) -> io::Result { - let handle = Handle::from_path(dent.path())?; - Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle }) - } - - /// Create a new ancestor from the given directory path. - #[cfg(not(windows))] - fn new(dent: &DirEntry) -> io::Result { - Ok(Ancestor { path: dent.path().to_path_buf() }) - } - - /// Returns true if and only if the given open file handle corresponds to - /// the same directory as this ancestor. - #[cfg(windows)] - fn is_same(&self, child: &Handle) -> io::Result { - Ok(child == &self.handle) - } - - /// Returns true if and only if the given open file handle corresponds to - /// the same directory as this ancestor. - #[cfg(not(windows))] - fn is_same(&self, child: &Handle) -> io::Result { - Ok(child == &Handle::from_path(&self.path)?) - } -} - -/// A sequence of unconsumed directory entries. -/// -/// This represents the opened or closed state of a directory handle. When -/// open, future entries are read by iterating over the raw `fs::ReadDir`. -/// When closed, all future entries are read into memory. Iteration then -/// proceeds over a [`Vec`]. -/// -/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html -/// [`Vec`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html -#[derive(Debug)] -enum DirList { - /// An opened handle. - /// - /// This includes the depth of the handle itself. - /// - /// If there was an error with the initial [`fs::read_dir`] call, then it - /// is stored here. (We use an [`Option<...>`] to make yielding the error - /// exactly once simpler.) - /// - /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html - /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html - Opened { depth: usize, it: result::Result> }, - /// A closed handle. - /// - /// All remaining directory entries are read into memory. - Closed(vec::IntoIter>), -} - -impl Iterator for IntoIter { - type Item = Result; - /// Advances the iterator and returns the next value. - /// - /// # Errors - /// - /// If the iterator fails to retrieve the next value, this method returns - /// an error value. The error will be wrapped in an Option::Some. - fn next(&mut self) -> Option> { - if let Some(start) = self.start.take() { - if self.opts.same_file_system { - let result = util::device_num(&start) - .map_err(|e| Error::from_path(0, start.clone(), e)); - self.root_device = Some(itry!(result)); - } - let dent = itry!(DirEntry::from_path(0, start, false)); - if let Some(result) = self.handle_entry(dent) { - return Some(result); - } - } - while !self.stack_list.is_empty() { - self.depth = self.stack_list.len(); - if let Some(dentry) = self.get_deferred_dir() { - return Some(Ok(dentry)); - } - if self.depth > self.opts.max_depth { - // If we've exceeded the max depth, pop the current dir - // so that we don't descend. - self.pop(); - continue; - } - // Unwrap is safe here because we've verified above that - // `self.stack_list` is not empty - let next = self - .stack_list - .last_mut() - .expect("BUG: stack should be non-empty") - .next(); - match next { - None => self.pop(), - Some(Err(err)) => return Some(Err(err)), - Some(Ok(dent)) => { - if let Some(result) = self.handle_entry(dent) { - return Some(result); - } - } - } - } - if self.opts.contents_first { - self.depth = self.stack_list.len(); - if let Some(dentry) = self.get_deferred_dir() { - return Some(Ok(dentry)); - } - } - None - } -} - -impl IntoIter { - /// Skips the current directory. - /// - /// This causes the iterator to stop traversing the contents of the least - /// recently yielded directory. This means any remaining entries in that - /// directory will be skipped (including sub-directories). - /// - /// Note that the ergonomics of this method are questionable since it - /// borrows the iterator mutably. Namely, you must write out the looping - /// condition manually. For example, to skip hidden entries efficiently on - /// unix systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// let mut it = WalkDir::new("foo").into_iter(); - /// loop { - /// let entry = match it.next() { - /// None => break, - /// Some(Err(err)) => panic!("ERROR: {}", err), - /// Some(Ok(entry)) => entry, - /// }; - /// if is_hidden(&entry) { - /// if entry.file_type().is_dir() { - /// it.skip_current_dir(); - /// } - /// continue; - /// } - /// println!("{}", entry.path().display()); - /// } - /// ``` - /// - /// You may find it more convenient to use the [`filter_entry`] iterator - /// adapter. (See its documentation for the same example functionality as - /// above.) - /// - /// [`filter_entry`]: #method.filter_entry - pub fn skip_current_dir(&mut self) { - if !self.stack_list.is_empty() { - self.pop(); - } - } - - /// Yields only entries which satisfy the given predicate and skips - /// descending into directories that do not satisfy the given predicate. - /// - /// The predicate is applied to all entries. If the predicate is - /// true, iteration carries on as normal. If the predicate is false, the - /// entry is ignored and if it is a directory, it is not descended into. - /// - /// This is often more convenient to use than [`skip_current_dir`]. For - /// example, to skip hidden files and directories efficiently on unix - /// systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// # use walkdir::Error; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// # fn try_main() -> Result<(), Error> { - /// for entry in WalkDir::new("foo") - /// .into_iter() - /// .filter_entry(|e| !is_hidden(e)) { - /// println!("{}", entry?.path().display()); - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Note that the iterator will still yield errors for reading entries that - /// may not satisfy the predicate. - /// - /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not - /// passed to this predicate. - /// - /// Note that if the iterator has `contents_first` enabled, then this - /// method is no different than calling the standard `Iterator::filter` - /// method (because directory entries are yielded after they've been - /// descended into). - /// - /// [`skip_current_dir`]: #method.skip_current_dir - /// [`min_depth`]: struct.WalkDir.html#method.min_depth - /// [`max_depth`]: struct.WalkDir.html#method.max_depth - pub fn filter_entry

(self, predicate: P) -> FilterEntry - where - P: FnMut(&DirEntry) -> bool, - { - FilterEntry { it: self, predicate: predicate } - } - - fn handle_entry( - &mut self, - mut dent: DirEntry, - ) -> Option> { - if self.opts.follow_links && dent.file_type().is_symlink() { - dent = itry!(self.follow(dent)); - } - let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); - if is_normal_dir { - if self.opts.same_file_system && dent.depth() > 0 { - if itry!(self.is_same_file_system(&dent)) { - itry!(self.push(&dent)); - } - } else { - itry!(self.push(&dent)); - } - } else if dent.depth() == 0 && dent.file_type().is_symlink() { - // As a special case, if we are processing a root entry, then we - // always follow it even if it's a symlink and follow_links is - // false. We are careful to not let this change the semantics of - // the DirEntry however. Namely, the DirEntry should still respect - // the follow_links setting. When it's disabled, it should report - // itself as a symlink. When it's enabled, it should always report - // itself as the target. - let md = itry!(fs::metadata(dent.path()).map_err(|err| { - Error::from_path(dent.depth(), dent.path().to_path_buf(), err) - })); - if md.file_type().is_dir() { - itry!(self.push(&dent)); - } - } - if is_normal_dir && self.opts.contents_first { - self.deferred_dirs.push(dent); - None - } else if self.skippable() { - None - } else { - Some(Ok(dent)) - } - } - - fn get_deferred_dir(&mut self) -> Option { - if self.opts.contents_first { - if self.depth < self.deferred_dirs.len() { - // Unwrap is safe here because we've guaranteed that - // `self.deferred_dirs.len()` can never be less than 1 - let deferred: DirEntry = self - .deferred_dirs - .pop() - .expect("BUG: deferred_dirs should be non-empty"); - if !self.skippable() { - return Some(deferred); - } - } - } - None - } - - fn push(&mut self, dent: &DirEntry) -> Result<()> { - // Make room for another open file descriptor if we've hit the max. - let free = - self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); - if free == self.opts.max_open { - self.stack_list[self.oldest_opened].close(); - } - // Open a handle to reading the directory's entries. - let rd = fs::read_dir(dent.path()).map_err(|err| { - Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) - }); - let mut list = DirList::Opened { depth: self.depth, it: rd }; - if let Some(ref mut cmp) = self.opts.sorter { - let mut entries: Vec<_> = list.collect(); - entries.sort_by(|a, b| match (a, b) { - (&Ok(ref a), &Ok(ref b)) => cmp(a, b), - (&Err(_), &Err(_)) => Ordering::Equal, - (&Ok(_), &Err(_)) => Ordering::Greater, - (&Err(_), &Ok(_)) => Ordering::Less, - }); - list = DirList::Closed(entries.into_iter()); - } - if self.opts.follow_links { - let ancestor = Ancestor::new(&dent) - .map_err(|err| Error::from_io(self.depth, err))?; - self.stack_path.push(ancestor); - } - // We push this after stack_path since creating the Ancestor can fail. - // If it fails, then we return the error and won't descend. - self.stack_list.push(list); - // If we had to close out a previous directory stream, then we need to - // increment our index the oldest still-open stream. We do this only - // after adding to our stack, in order to ensure that the oldest_opened - // index remains valid. The worst that can happen is that an already - // closed stream will be closed again, which is a no-op. - // - // We could move the close of the stream above into this if-body, but - // then we would have more than the maximum number of file descriptors - // open at a particular point in time. - if free == self.opts.max_open { - // Unwrap is safe here because self.oldest_opened is guaranteed to - // never be greater than `self.stack_list.len()`, which implies - // that the subtraction won't underflow and that adding 1 will - // never overflow. - self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); - } - Ok(()) - } - - fn pop(&mut self) { - self.stack_list.pop().expect("BUG: cannot pop from empty stack"); - if self.opts.follow_links { - self.stack_path.pop().expect("BUG: list/path stacks out of sync"); - } - // If everything in the stack is already closed, then there is - // room for at least one more open descriptor and it will - // always be at the top of the stack. - self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); - } - - fn follow(&self, mut dent: DirEntry) -> Result { - dent = - DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; - // The only way a symlink can cause a loop is if it points - // to a directory. Otherwise, it always points to a leaf - // and we can omit any loop checks. - if dent.is_dir() { - self.check_loop(dent.path())?; - } - Ok(dent) - } - - fn check_loop>(&self, child: P) -> Result<()> { - let hchild = Handle::from_path(&child) - .map_err(|err| Error::from_io(self.depth, err))?; - for ancestor in self.stack_path.iter().rev() { - let is_same = ancestor - .is_same(&hchild) - .map_err(|err| Error::from_io(self.depth, err))?; - if is_same { - return Err(Error::from_loop( - self.depth, - &ancestor.path, - child.as_ref(), - )); - } - } - Ok(()) - } - - fn is_same_file_system(&mut self, dent: &DirEntry) -> Result { - let dent_device = util::device_num(dent.path()) - .map_err(|err| Error::from_entry(dent, err))?; - Ok(self - .root_device - .map(|d| d == dent_device) - .expect("BUG: called is_same_file_system without root device")) - } - - fn skippable(&self) -> bool { - self.depth < self.opts.min_depth || self.depth > self.opts.max_depth - } -} - -impl DirList { - fn close(&mut self) { - if let DirList::Opened { .. } = *self { - *self = DirList::Closed(self.collect::>().into_iter()); - } - } -} - -impl Iterator for DirList { - type Item = Result; - - #[inline(always)] - fn next(&mut self) -> Option> { - match *self { - DirList::Closed(ref mut it) => it.next(), - DirList::Opened { depth, ref mut it } => match *it { - Err(ref mut err) => err.take().map(Err), - Ok(ref mut rd) => rd.next().map(|r| match r { - Ok(r) => DirEntry::from_entry(depth + 1, &r), - Err(err) => Err(Error::from_io(depth + 1, err)), - }), - }, - } - } -} - -/// A recursive directory iterator that skips entries. -/// -/// Values of this type are created by calling [`.filter_entry()`] on an -/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. -/// -/// Directories that fail the predicate `P` are skipped. Namely, they are -/// never yielded and never descended into. -/// -/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options -/// are not passed through this filter. -/// -/// If opening a handle to a directory resulted in an error, then it is yielded -/// and no corresponding call to the predicate is made. -/// -/// Type parameter `I` refers to the underlying iterator and `P` refers to the -/// predicate, which is usually `FnMut(&DirEntry) -> bool`. -/// -/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry -/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v -/// [`min_depth`]: struct.WalkDir.html#method.min_depth -/// [`max_depth`]: struct.WalkDir.html#method.max_depth -#[derive(Debug)] -pub struct FilterEntry { - it: I, - predicate: P, -} - -impl

Iterator for FilterEntry -where - P: FnMut(&DirEntry) -> bool, -{ - type Item = Result; - - /// Advances the iterator and returns the next value. - /// - /// # Errors - /// - /// If the iterator fails to retrieve the next value, this method returns - /// an error value. The error will be wrapped in an `Option::Some`. - fn next(&mut self) -> Option> { - loop { - let dent = match self.it.next() { - None => return None, - Some(result) => itry!(result), - }; - if !(self.predicate)(&dent) { - if dent.is_dir() { - self.it.skip_current_dir(); - } - continue; - } - return Some(Ok(dent)); - } - } -} - -impl

FilterEntry -where - P: FnMut(&DirEntry) -> bool, -{ - /// Yields only entries which satisfy the given predicate and skips - /// descending into directories that do not satisfy the given predicate. - /// - /// The predicate is applied to all entries. If the predicate is - /// true, iteration carries on as normal. If the predicate is false, the - /// entry is ignored and if it is a directory, it is not descended into. - /// - /// This is often more convenient to use than [`skip_current_dir`]. For - /// example, to skip hidden files and directories efficiently on unix - /// systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// # use walkdir::Error; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// # fn try_main() -> Result<(), Error> { - /// for entry in WalkDir::new("foo") - /// .into_iter() - /// .filter_entry(|e| !is_hidden(e)) { - /// println!("{}", entry?.path().display()); - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Note that the iterator will still yield errors for reading entries that - /// may not satisfy the predicate. - /// - /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not - /// passed to this predicate. - /// - /// Note that if the iterator has `contents_first` enabled, then this - /// method is no different than calling the standard `Iterator::filter` - /// method (because directory entries are yielded after they've been - /// descended into). - /// - /// [`skip_current_dir`]: #method.skip_current_dir - /// [`min_depth`]: struct.WalkDir.html#method.min_depth - /// [`max_depth`]: struct.WalkDir.html#method.max_depth - pub fn filter_entry(self, predicate: P) -> FilterEntry { - FilterEntry { it: self, predicate: predicate } - } - - /// Skips the current directory. - /// - /// This causes the iterator to stop traversing the contents of the least - /// recently yielded directory. This means any remaining entries in that - /// directory will be skipped (including sub-directories). - /// - /// Note that the ergonomics of this method are questionable since it - /// borrows the iterator mutably. Namely, you must write out the looping - /// condition manually. For example, to skip hidden entries efficiently on - /// unix systems: - /// - /// ```no_run - /// use walkdir::{DirEntry, WalkDir}; - /// - /// fn is_hidden(entry: &DirEntry) -> bool { - /// entry.file_name() - /// .to_str() - /// .map(|s| s.starts_with(".")) - /// .unwrap_or(false) - /// } - /// - /// let mut it = WalkDir::new("foo").into_iter(); - /// loop { - /// let entry = match it.next() { - /// None => break, - /// Some(Err(err)) => panic!("ERROR: {}", err), - /// Some(Ok(entry)) => entry, - /// }; - /// if is_hidden(&entry) { - /// if entry.file_type().is_dir() { - /// it.skip_current_dir(); - /// } - /// continue; - /// } - /// println!("{}", entry.path().display()); - /// } - /// ``` - /// - /// You may find it more convenient to use the [`filter_entry`] iterator - /// adapter. (See its documentation for the same example functionality as - /// above.) - /// - /// [`filter_entry`]: #method.filter_entry - pub fn skip_current_dir(&mut self) { - self.it.skip_current_dir(); - } -} +mod walk; diff --git a/src/oldlib.rs b/src/oldlib.rs new file mode 100644 index 0000000..711eed9 --- /dev/null +++ b/src/oldlib.rs @@ -0,0 +1,1123 @@ +/*! +Crate `walkdir` provides an efficient and cross platform implementation +of recursive directory traversal. Several options are exposed to control +iteration, such as whether to follow symbolic links (default off), limit the +maximum number of simultaneous open file descriptors and the ability to +efficiently skip descending into directories. + +To use this crate, add `walkdir` as a dependency to your project's +`Cargo.toml`: + +```toml +[dependencies] +walkdir = "2" +``` + +# From the top + +The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values +yielded by the iterator. Finally, the [`Error`] type is a small wrapper around +[`std::io::Error`] with additional information, such as if a loop was detected +while following symbolic links (not enabled by default). + +[`WalkDir`]: struct.WalkDir.html +[`DirEntry`]: struct.DirEntry.html +[`Error`]: struct.Error.html +[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html + +# Example + +The following code recursively iterates over the directory given and prints +the path for each entry: + +```no_run +use walkdir::WalkDir; +# use walkdir::Error; + +# fn try_main() -> Result<(), Error> { +for entry in WalkDir::new("foo") { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +Or, if you'd like to iterate over all entries and ignore any errors that +may arise, use [`filter_map`]. (e.g., This code below will silently skip +directories that the owner of the running process does not have permission to +access.) + +```no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { + println!("{}", entry.path().display()); +} +``` + +[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map + +# Example: follow symbolic links + +The same code as above, except [`follow_links`] is enabled: + +```no_run +use walkdir::WalkDir; +# use walkdir::Error; + +# fn try_main() -> Result<(), Error> { +for entry in WalkDir::new("foo").follow_links(true) { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +[`follow_links`]: struct.WalkDir.html#method.follow_links + +# Example: skip hidden files and directories on unix + +This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files +and directories efficiently (i.e. without recursing into hidden directories): + +```no_run +use walkdir::{DirEntry, WalkDir}; +# use walkdir::Error; + +fn is_hidden(entry: &DirEntry) -> bool { + entry.file_name() + .to_str() + .map(|s| s.starts_with(".")) + .unwrap_or(false) +} + +# fn try_main() -> Result<(), Error> { +let walker = WalkDir::new("foo").into_iter(); +for entry in walker.filter_entry(|e| !is_hidden(e)) { + println!("{}", entry?.path().display()); +} +# Ok(()) +# } +``` + +[`filter_entry`]: struct.IntoIter.html#method.filter_entry +*/ + +#![deny(missing_docs)] +#![allow(warnings)] + +#[cfg(test)] +#[macro_use] +extern crate doc_comment; +#[cfg(unix)] +extern crate libc; +extern crate same_file; +#[cfg(windows)] +extern crate winapi; +#[cfg(windows)] +extern crate winapi_util; + +#[cfg(test)] +doctest!("../README.md"); + +use std::cmp::{Ordering, min}; +use std::fmt; +use std::fs::{self, FileType, ReadDir}; +use std::io; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; +use std::result; +use std::vec; + +use same_file::Handle; + +pub use dent::DirEntry; +#[cfg(unix)] +pub use dent::DirEntryExt; +pub use error::Error; + +mod dent; +mod error; +pub mod os; +#[cfg(test)] +mod tests; +mod util; + +/// Like try, but for iterators that return [`Option>`]. +/// +/// [`Option>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html +macro_rules! itry { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(err) => return Some(Err(From::from(err))), + } + } +} + +/// A result type for walkdir operations. +/// +/// Note that this result type embeds the error type in this crate. This +/// is only useful if you care about the additional information provided by +/// the error (such as the path associated with the error or whether a loop +/// was dectected). If you want things to Just Work, then you can use +/// [`io::Result`] instead since the error type in this package will +/// automatically convert to an [`io::Result`] when using the [`try!`] macro. +/// +/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html +/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html +pub type Result = ::std::result::Result; + +/// A builder to create an iterator for recursively walking a directory. +/// +/// Results are returned in depth first fashion, with directories yielded +/// before their contents. If [`contents_first`] is true, contents are yielded +/// before their directories. The order is unspecified but if [`sort_by`] is +/// given, directory entries are sorted according to this function. Directory +/// entries `.` and `..` are always omitted. +/// +/// If an error occurs at any point during iteration, then it is returned in +/// place of its corresponding directory entry and iteration continues as +/// normal. If an error occurs while opening a directory for reading, then it +/// is not descended into (but the error is still yielded by the iterator). +/// Iteration may be stopped at any time. When the iterator is destroyed, all +/// resources associated with it are freed. +/// +/// [`contents_first`]: struct.WalkDir.html#method.contents_first +/// [`sort_by`]: struct.WalkDir.html#method.sort_by +/// +/// # Usage +/// +/// This type implements [`IntoIterator`] so that it may be used as the subject +/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want +/// to use iterator adapters such as [`filter_entry`]. +/// +/// Idiomatic use of this type should use method chaining to set desired +/// options. For example, this only shows entries with a depth of `1`, `2` or +/// `3` (relative to `foo`): +/// +/// ```no_run +/// use walkdir::WalkDir; +/// # use walkdir::Error; +/// +/// # fn try_main() -> Result<(), Error> { +/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { +/// println!("{}", entry?.path().display()); +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html +/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter +/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry +/// +/// Note that the iterator by default includes the top-most directory. Since +/// this is the only directory yielded with depth `0`, it is easy to ignore it +/// with the [`min_depth`] setting: +/// +/// ```no_run +/// use walkdir::WalkDir; +/// # use walkdir::Error; +/// +/// # fn try_main() -> Result<(), Error> { +/// for entry in WalkDir::new("foo").min_depth(1) { +/// println!("{}", entry?.path().display()); +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// [`min_depth`]: struct.WalkDir.html#method.min_depth +/// +/// This will only return descendents of the `foo` directory and not `foo` +/// itself. +/// +/// # Loops +/// +/// This iterator (like most/all recursive directory iterators) assumes that +/// no loops can be made with *hard* links on your file system. In particular, +/// this would require creating a hard link to a directory such that it creates +/// a loop. On most platforms, this operation is illegal. +/// +/// Note that when following symbolic/soft links, loops are detected and an +/// error is reported. +#[derive(Debug)] +pub struct WalkDir { + opts: WalkDirOptions, + root: PathBuf, +} + +struct WalkDirOptions { + follow_links: bool, + max_open: usize, + min_depth: usize, + max_depth: usize, + sorter: Option Ordering + Send + Sync + 'static + >>, + contents_first: bool, + same_file_system: bool, +} + +impl fmt::Debug for WalkDirOptions { + fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { + let sorter_str = if self.sorter.is_some() { + // FnMut isn't `Debug` + "Some(...)" + } else { + "None" + }; + f.debug_struct("WalkDirOptions") + .field("follow_links", &self.follow_links) + .field("max_open", &self.max_open) + .field("min_depth", &self.min_depth) + .field("max_depth", &self.max_depth) + .field("sorter", &sorter_str) + .field("contents_first", &self.contents_first) + .field("same_file_system", &self.same_file_system) + .finish() + } +} + +impl WalkDir { + /// Create a builder for a recursive directory iterator starting at the + /// file path `root`. If `root` is a directory, then it is the first item + /// yielded by the iterator. If `root` is a file, then it is the first + /// and only item yielded by the iterator. If `root` is a symlink, then it + /// is always followed for the purposes of directory traversal. (A root + /// `DirEntry` still obeys its documentation with respect to symlinks and + /// the `follow_links` setting.) + pub fn new>(root: P) -> Self { + WalkDir { + opts: WalkDirOptions { + follow_links: false, + max_open: 10, + min_depth: 0, + max_depth: ::std::usize::MAX, + sorter: None, + contents_first: false, + same_file_system: false, + }, + root: root.as_ref().to_path_buf(), + } + } + + /// Set the minimum depth of entries yielded by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn min_depth(mut self, depth: usize) -> Self { + self.opts.min_depth = depth; + if self.opts.min_depth > self.opts.max_depth { + self.opts.min_depth = self.opts.max_depth; + } + self + } + + /// Set the maximum depth of entries yield by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// Note that this will not simply filter the entries of the iterator, but + /// it will actually avoid descending into directories when the depth is + /// exceeded. + pub fn max_depth(mut self, depth: usize) -> Self { + self.opts.max_depth = depth; + if self.opts.max_depth < self.opts.min_depth { + self.opts.max_depth = self.opts.min_depth; + } + self + } + + /// Follow symbolic links. By default, this is disabled. + /// + /// When `yes` is `true`, symbolic links are followed as if they were + /// normal directories and files. If a symbolic link is broken or is + /// involved in a loop, an error is yielded. + /// + /// When enabled, the yielded [`DirEntry`] values represent the target of + /// the link while the path corresponds to the link. See the [`DirEntry`] + /// type for more details. + /// + /// [`DirEntry`]: struct.DirEntry.html + pub fn follow_links(mut self, yes: bool) -> Self { + self.opts.follow_links = yes; + self + } + + /// Set the maximum number of simultaneously open file descriptors used + /// by the iterator. + /// + /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set + /// to `1` automatically. If this is not set, then it defaults to some + /// reasonably low number. + /// + /// This setting has no impact on the results yielded by the iterator + /// (even when `n` is `1`). Instead, this setting represents a trade off + /// between scarce resources (file descriptors) and memory. Namely, when + /// the maximum number of file descriptors is reached and a new directory + /// needs to be opened to continue iteration, then a previous directory + /// handle is closed and has its unyielded entries stored in memory. In + /// practice, this is a satisfying trade off because it scales with respect + /// to the *depth* of your file tree. Therefore, low values (even `1`) are + /// acceptable. + /// + /// Note that this value does not impact the number of system calls made by + /// an exhausted iterator. + /// + /// # Platform behavior + /// + /// On Windows, if `follow_links` is enabled, then this limit is not + /// respected. In particular, the maximum number of file descriptors opened + /// is proportional to the depth of the directory tree traversed. + pub fn max_open(mut self, mut n: usize) -> Self { + if n == 0 { + n = 1; + } + self.opts.max_open = n; + self + } + + /// Set a function for sorting directory entries. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// ```rust,no-run + /// use std::cmp; + /// use std::ffi::OsString; + /// use walkdir::WalkDir; + /// + /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); + /// ``` + pub fn sort_by(mut self, cmp: F) -> Self + where F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static + { + self.opts.sorter = Some(Box::new(cmp)); + self + } + + /// Yield a directory's contents before the directory itself. By default, + /// this is disabled. + /// + /// When `yes` is `false` (as is the default), the directory is yielded + /// before its contents are read. This is useful when, e.g. you want to + /// skip processing of some directories. + /// + /// When `yes` is `true`, the iterator yields the contents of a directory + /// before yielding the directory itself. This is useful when, e.g. you + /// want to recursively delete a directory. + /// + /// # Example + /// + /// Assume the following directory tree: + /// + /// ```text + /// foo/ + /// abc/ + /// qrs + /// tuv + /// def/ + /// ``` + /// + /// With contents_first disabled (the default), the following code visits + /// the directory tree in depth-first order: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo") { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo + /// // foo/abc + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/def + /// ``` + /// + /// With contents_first enabled: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo").contents_first(true) { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/abc + /// // foo/def + /// // foo + /// ``` + pub fn contents_first(mut self, yes: bool) -> Self { + self.opts.contents_first = yes; + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(mut self, yes: bool) -> Self { + self.opts.same_file_system = yes; + self + } +} + +impl IntoIterator for WalkDir { + type Item = Result; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + IntoIter { + opts: self.opts, + start: Some(self.root), + stack_list: vec![], + stack_path: vec![], + oldest_opened: 0, + depth: 0, + deferred_dirs: vec![], + root_device: None, + } + } +} + +/// An iterator for recursively descending into a directory. +/// +/// A value with this type must be constructed with the [`WalkDir`] type, which +/// uses a builder pattern to set options such as min/max depth, max open file +/// descriptors and whether the iterator should follow symbolic links. After +/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. +/// +/// The order of elements yielded by this iterator is unspecified. +/// +/// [`WalkDir`]: struct.WalkDir.html +/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v +#[derive(Debug)] +pub struct IntoIter { + /// Options specified in the builder. Depths, max fds, etc. + opts: WalkDirOptions, + /// The start path. + /// + /// This is only `Some(...)` at the beginning. After the first iteration, + /// this is always `None`. + start: Option, + /// A stack of open (up to max fd) or closed handles to directories. + /// An open handle is a plain [`fs::ReadDir`] while a closed handle is + /// a `Vec` corresponding to the as-of-yet consumed entries. + /// + /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html + stack_list: Vec, + /// A stack of file paths. + /// + /// This is *only* used when [`follow_links`] is enabled. In all other + /// cases this stack is empty. + /// + /// [`follow_links`]: struct.WalkDir.html#method.follow_links + stack_path: Vec, + /// An index into `stack_list` that points to the oldest open directory + /// handle. If the maximum fd limit is reached and a new directory needs to + /// be read, the handle at this index is closed before the new directory is + /// opened. + oldest_opened: usize, + /// The current depth of iteration (the length of the stack at the + /// beginning of each iteration). + depth: usize, + /// A list of DirEntries corresponding to directories, that are + /// yielded after their contents has been fully yielded. This is only + /// used when `contents_first` is enabled. + deferred_dirs: Vec, + /// The device of the root file path when the first call to `next` was + /// made. + /// + /// If the `same_file_system` option isn't enabled, then this is always + /// `None`. Conversely, if it is enabled, this is always `Some(...)` after + /// handling the root path. + root_device: Option, +} + +/// An ancestor is an item in the directory tree traversed by walkdir, and is +/// used to check for loops in the tree when traversing symlinks. +#[derive(Debug)] +struct Ancestor { + /// The path of this ancestor. + path: PathBuf, + /// An open file to this ancesor. This is only used on Windows where + /// opening a file handle appears to be quite expensive, so we choose to + /// cache it. This comes at the cost of not respecting the file descriptor + /// limit set by the user. + #[cfg(windows)] + handle: Handle, +} + +impl Ancestor { + /// Create a new ancestor from the given directory path. + #[cfg(windows)] + fn new(dent: &DirEntry) -> io::Result { + let handle = Handle::from_path(dent.path())?; + Ok(Ancestor { + path: dent.path().to_path_buf(), + handle: handle, + }) + } + + /// Create a new ancestor from the given directory path. + #[cfg(not(windows))] + fn new(dent: &DirEntry) -> io::Result { + Ok(Ancestor { path: dent.path().to_path_buf() }) + } + + /// Returns true if and only if the given open file handle corresponds to + /// the same directory as this ancestor. + #[cfg(windows)] + fn is_same(&self, child: &Handle) -> io::Result { + Ok(child == &self.handle) + } + + /// Returns true if and only if the given open file handle corresponds to + /// the same directory as this ancestor. + #[cfg(not(windows))] + fn is_same(&self, child: &Handle) -> io::Result { + Ok(child == &Handle::from_path(&self.path)?) + } +} + +/// A sequence of unconsumed directory entries. +/// +/// This represents the opened or closed state of a directory handle. When +/// open, future entries are read by iterating over the raw `fs::ReadDir`. +/// When closed, all future entries are read into memory. Iteration then +/// proceeds over a [`Vec`]. +/// +/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html +/// [`Vec`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html +#[derive(Debug)] +enum DirList { + /// An opened handle. + /// + /// This includes the depth of the handle itself. + /// + /// If there was an error with the initial [`fs::read_dir`] call, then it + /// is stored here. (We use an [`Option<...>`] to make yielding the error + /// exactly once simpler.) + /// + /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html + /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html + Opened { depth: usize, it: result::Result> }, + /// A closed handle. + /// + /// All remaining directory entries are read into memory. + Closed(vec::IntoIter>), +} + +impl Iterator for IntoIter { + type Item = Result; + /// Advances the iterator and returns the next value. + /// + /// # Errors + /// + /// If the iterator fails to retrieve the next value, this method returns + /// an error value. The error will be wrapped in an Option::Some. + fn next(&mut self) -> Option> { + if let Some(start) = self.start.take() { + if self.opts.same_file_system { + let result = util::device_num(&start) + .map_err(|e| Error::from_path(0, start.clone(), e)); + self.root_device = Some(itry!(result)); + } + let dent = itry!(DirEntry::from_path(0, start, false)); + if let Some(result) = self.handle_entry(dent) { + return Some(result); + } + } + while !self.stack_list.is_empty() { + self.depth = self.stack_list.len(); + if let Some(dentry) = self.get_deferred_dir() { + return Some(Ok(dentry)); + } + if self.depth > self.opts.max_depth { + // If we've exceeded the max depth, pop the current dir + // so that we don't descend. + self.pop(); + continue; + } + // Unwrap is safe here because we've verified above that + // `self.stack_list` is not empty + let next = self.stack_list + .last_mut() + .expect("BUG: stack should be non-empty") + .next(); + match next { + None => self.pop(), + Some(Err(err)) => return Some(Err(err)), + Some(Ok(dent)) => { + if let Some(result) = self.handle_entry(dent) { + return Some(result); + } + } + } + } + if self.opts.contents_first { + self.depth = self.stack_list.len(); + if let Some(dentry) = self.get_deferred_dir() { + return Some(Ok(dentry)); + } + } + None + } +} + +impl IntoIter { + /// Skips the current directory. + /// + /// This causes the iterator to stop traversing the contents of the least + /// recently yielded directory. This means any remaining entries in that + /// directory will be skipped (including sub-directories). + /// + /// Note that the ergonomics of this method are questionable since it + /// borrows the iterator mutably. Namely, you must write out the looping + /// condition manually. For example, to skip hidden entries efficiently on + /// unix systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// let mut it = WalkDir::new("foo").into_iter(); + /// loop { + /// let entry = match it.next() { + /// None => break, + /// Some(Err(err)) => panic!("ERROR: {}", err), + /// Some(Ok(entry)) => entry, + /// }; + /// if is_hidden(&entry) { + /// if entry.file_type().is_dir() { + /// it.skip_current_dir(); + /// } + /// continue; + /// } + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// You may find it more convenient to use the [`filter_entry`] iterator + /// adapter. (See its documentation for the same example functionality as + /// above.) + /// + /// [`filter_entry`]: #method.filter_entry + pub fn skip_current_dir(&mut self) { + if !self.stack_list.is_empty() { + self.stack_list.pop(); + } + if !self.stack_path.is_empty() { + self.stack_path.pop(); + } + } + + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// This is often more convenient to use than [`skip_current_dir`]. For + /// example, to skip hidden files and directories efficiently on unix + /// systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// # use walkdir::Error; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// # fn try_main() -> Result<(), Error> { + /// for entry in WalkDir::new("foo") + /// .into_iter() + /// .filter_entry(|e| !is_hidden(e)) { + /// println!("{}", entry?.path().display()); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Note that the iterator will still yield errors for reading entries that + /// may not satisfy the predicate. + /// + /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not + /// passed to this predicate. + /// + /// Note that if the iterator has `contents_first` enabled, then this + /// method is no different than calling the standard `Iterator::filter` + /// method (because directory entries are yielded after they've been + /// descended into). + /// + /// [`skip_current_dir`]: #method.skip_current_dir + /// [`min_depth`]: struct.WalkDir.html#method.min_depth + /// [`max_depth`]: struct.WalkDir.html#method.max_depth + pub fn filter_entry

(self, predicate: P) -> FilterEntry + where P: FnMut(&DirEntry) -> bool + { + FilterEntry { it: self, predicate: predicate } + } + + fn handle_entry( + &mut self, + mut dent: DirEntry, + ) -> Option> { + if self.opts.follow_links && dent.file_type().is_symlink() { + dent = itry!(self.follow(dent)); + } + let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); + if is_normal_dir { + if self.opts.same_file_system && dent.depth() > 0 { + if itry!(self.is_same_file_system(&dent)) { + itry!(self.push(&dent)); + } + } else { + itry!(self.push(&dent)); + } + } else if dent.depth() == 0 && dent.file_type().is_symlink() { + // As a special case, if we are processing a root entry, then we + // always follow it even if it's a symlink and follow_links is + // false. We are careful to not let this change the semantics of + // the DirEntry however. Namely, the DirEntry should still respect + // the follow_links setting. When it's disabled, it should report + // itself as a symlink. When it's enabled, it should always report + // itself as the target. + let md = itry!(fs::metadata(dent.path()).map_err(|err| { + Error::from_path(dent.depth(), dent.path().to_path_buf(), err) + })); + if md.file_type().is_dir() { + itry!(self.push(&dent)); + } + } + if is_normal_dir && self.opts.contents_first { + self.deferred_dirs.push(dent); + None + } else if self.skippable() { + None + } else { + Some(Ok(dent)) + } + } + + fn get_deferred_dir(&mut self) -> Option { + if self.opts.contents_first { + if self.depth < self.deferred_dirs.len() { + // Unwrap is safe here because we've guaranteed that + // `self.deferred_dirs.len()` can never be less than 1 + let deferred: DirEntry = self.deferred_dirs.pop() + .expect("BUG: deferred_dirs should be non-empty"); + if !self.skippable() { + return Some(deferred); + } + } + } + None + } + + fn push(&mut self, dent: &DirEntry) -> Result<()> { + // Make room for another open file descriptor if we've hit the max. + let free = self.stack_list + .len() + .checked_sub(self.oldest_opened).unwrap(); + if free == self.opts.max_open { + self.stack_list[self.oldest_opened].close(); + // Unwrap is safe here because self.oldest_opened is guaranteed to + // never be greater than `self.stack_list.len()`, which implies + // that the subtraction won't underflow and that adding 1 will + // never overflow. + self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); + } + // Open a handle to reading the directory's entries. + let rd = fs::read_dir(dent.path()).map_err(|err| { + Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) + }); + let mut list = DirList::Opened { depth: self.depth, it: rd }; + if let Some(ref mut cmp) = self.opts.sorter { + let mut entries: Vec<_> = list.collect(); + entries.sort_by(|a, b| { + match (a, b) { + (&Ok(ref a), &Ok(ref b)) => { + cmp(a, b) + } + (&Err(_), &Err(_)) => Ordering::Equal, + (&Ok(_), &Err(_)) => Ordering::Greater, + (&Err(_), &Ok(_)) => Ordering::Less, + } + }); + list = DirList::Closed(entries.into_iter()); + } + if self.opts.follow_links { + let ancestor = Ancestor::new(&dent).map_err(|err| { + Error::from_io(self.depth, err) + })?; + self.stack_path.push(ancestor); + } + // We push this after stack_path since creating the Ancestor can fail. + // If it fails, then we return the error and won't descend. + self.stack_list.push(list); + Ok(()) + } + + fn pop(&mut self) { + self.stack_list.pop().expect("BUG: cannot pop from empty stack"); + if self.opts.follow_links { + self.stack_path.pop().expect("BUG: list/path stacks out of sync"); + } + // If everything in the stack is already closed, then there is + // room for at least one more open descriptor and it will + // always be at the top of the stack. + self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); + } + + fn follow(&self, mut dent: DirEntry) -> Result { + dent = DirEntry::from_path( + self.depth, + dent.path().to_path_buf(), + true, + )?; + // The only way a symlink can cause a loop is if it points + // to a directory. Otherwise, it always points to a leaf + // and we can omit any loop checks. + if dent.is_dir() { + self.check_loop(dent.path())?; + } + Ok(dent) + } + + fn check_loop>(&self, child: P) -> Result<()> { + let hchild = Handle::from_path(&child).map_err(|err| { + Error::from_io(self.depth, err) + })?; + for ancestor in self.stack_path.iter().rev() { + let is_same = ancestor.is_same(&hchild).map_err(|err| { + Error::from_io(self.depth, err) + })?; + if is_same { + return Err(Error::from_loop( + self.depth, &ancestor.path, child.as_ref(), + )); + } + } + Ok(()) + } + + fn is_same_file_system(&mut self, dent: &DirEntry) -> Result { + let dent_device = util::device_num(dent.path()) + .map_err(|err| Error::from_entry(dent, err))?; + Ok(self.root_device + .map(|d| d == dent_device) + .expect("BUG: called is_same_file_system without root device")) + } + + fn skippable(&self) -> bool { + self.depth < self.opts.min_depth || self.depth > self.opts.max_depth + } +} + +impl DirList { + fn close(&mut self) { + if let DirList::Opened { .. } = *self { + *self = DirList::Closed(self.collect::>().into_iter()); + } + } +} + +impl Iterator for DirList { + type Item = Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + match *self { + DirList::Closed(ref mut it) => it.next(), + DirList::Opened { depth, ref mut it } => match *it { + Err(ref mut err) => err.take().map(Err), + Ok(ref mut rd) => rd.next().map(|r| match r { + Ok(r) => DirEntry::from_entry(depth + 1, &r), + Err(err) => Err(Error::from_io(depth + 1, err)) + }), + } + } + } +} + +/// A recursive directory iterator that skips entries. +/// +/// Values of this type are created by calling [`.filter_entry()`] on an +/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. +/// +/// Directories that fail the predicate `P` are skipped. Namely, they are +/// never yielded and never descended into. +/// +/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options +/// are not passed through this filter. +/// +/// If opening a handle to a directory resulted in an error, then it is yielded +/// and no corresponding call to the predicate is made. +/// +/// Type parameter `I` refers to the underlying iterator and `P` refers to the +/// predicate, which is usually `FnMut(&DirEntry) -> bool`. +/// +/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry +/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v +/// [`min_depth`]: struct.WalkDir.html#method.min_depth +/// [`max_depth`]: struct.WalkDir.html#method.max_depth +#[derive(Debug)] +pub struct FilterEntry { + it: I, + predicate: P, +} + +impl

Iterator for FilterEntry +where P: FnMut(&DirEntry) -> bool +{ + type Item = Result; + + /// Advances the iterator and returns the next value. + /// + /// # Errors + /// + /// If the iterator fails to retrieve the next value, this method returns + /// an error value. The error will be wrapped in an `Option::Some`. + fn next(&mut self) -> Option> { + loop { + let dent = match self.it.next() { + None => return None, + Some(result) => itry!(result), + }; + if !(self.predicate)(&dent) { + if dent.is_dir() { + self.it.skip_current_dir(); + } + continue; + } + return Some(Ok(dent)); + } + } +} + +impl

FilterEntry where P: FnMut(&DirEntry) -> bool { + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// This is often more convenient to use than [`skip_current_dir`]. For + /// example, to skip hidden files and directories efficiently on unix + /// systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// # use walkdir::Error; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// # fn try_main() -> Result<(), Error> { + /// for entry in WalkDir::new("foo") + /// .into_iter() + /// .filter_entry(|e| !is_hidden(e)) { + /// println!("{}", entry?.path().display()); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Note that the iterator will still yield errors for reading entries that + /// may not satisfy the predicate. + /// + /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not + /// passed to this predicate. + /// + /// Note that if the iterator has `contents_first` enabled, then this + /// method is no different than calling the standard `Iterator::filter` + /// method (because directory entries are yielded after they've been + /// descended into). + /// + /// [`skip_current_dir`]: #method.skip_current_dir + /// [`min_depth`]: struct.WalkDir.html#method.min_depth + /// [`max_depth`]: struct.WalkDir.html#method.max_depth + pub fn filter_entry(self, predicate: P) -> FilterEntry { + FilterEntry { it: self, predicate: predicate } + } + + /// Skips the current directory. + /// + /// This causes the iterator to stop traversing the contents of the least + /// recently yielded directory. This means any remaining entries in that + /// directory will be skipped (including sub-directories). + /// + /// Note that the ergonomics of this method are questionable since it + /// borrows the iterator mutably. Namely, you must write out the looping + /// condition manually. For example, to skip hidden entries efficiently on + /// unix systems: + /// + /// ```no_run + /// use walkdir::{DirEntry, WalkDir}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// let mut it = WalkDir::new("foo").into_iter(); + /// loop { + /// let entry = match it.next() { + /// None => break, + /// Some(Err(err)) => panic!("ERROR: {}", err), + /// Some(Ok(entry)) => entry, + /// }; + /// if is_hidden(&entry) { + /// if entry.file_type().is_dir() { + /// it.skip_current_dir(); + /// } + /// continue; + /// } + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// You may find it more convenient to use the [`filter_entry`] iterator + /// adapter. (See its documentation for the same example functionality as + /// above.) + /// + /// [`filter_entry`]: #method.filter_entry + pub fn skip_current_dir(&mut self) { + self.it.skip_current_dir(); + } +} diff --git a/src/os/linux/mod.rs b/src/os/linux/mod.rs index b6255ad..5021fba 100644 --- a/src/os/linux/mod.rs +++ b/src/os/linux/mod.rs @@ -181,6 +181,8 @@ pub struct DirEntryCursor { capacity: usize, /// The current position of this buffer as a pointer into `raw`. cursor: NonNull, + /// Whether the cursor has been advanced at least once. + advanced: bool, } impl Drop for DirEntryCursor { @@ -236,7 +238,7 @@ impl DirEntryCursor { Some(raw) => raw, None => handle_alloc_error(lay), }; - DirEntryCursor { raw, len: 0, capacity, cursor: raw } + DirEntryCursor { raw, len: 0, capacity, cursor: raw, advanced: false } } /// Read the next directory entry from this cursor. If the cursor has been @@ -249,31 +251,72 @@ impl DirEntryCursor { /// /// Note that no filtering of entries (such as `.` and `..`) is performed. pub fn read<'a>(&'a mut self) -> Option> { - if self.cursor.as_ptr() >= self.raw.as_ptr().wrapping_add(self.len) { + if !self.advance() { return None; } - // SAFETY: This is safe by the contract of getdents64. Namely, that it - // writes structures of type `RawDirEntry` to `raw`. The lifetime of - // this raw dirent is also tied to this buffer via the type signature - // of this method, which prevents use-after-free. Moreover, our - // allocation layout guarantees that the cursor is correctly aligned - // for RawDirEntry. - let raw_dirent = - unsafe { &*(self.cursor.as_ptr() as *const RawDirEntry) }; - let ent = DirEntry { - // SAFETY: This is safe since we are asking for the file name on a - // `RawDirEntry` that resides in its original buffer. - file_name: unsafe { raw_dirent.file_name() }, - file_type: raw_dirent.file_type(), - ino: raw_dirent.ino(), - }; + Some(self.current()) + } + + /// Advance this cursor to the next directory entry. If there are no more + /// directory entries to read, then this returns false. + /// + /// Calling `current()` after `advance` is guaranteed to panic when this + /// returns false. Conversely, calling `current()` after `advance` is + /// guaranteed not to panic when this returns true. + pub fn advance(&mut self) -> bool { + if self.is_done() { + return false; + } + if !self.advanced { + self.advanced = true; + return true; + } // SAFETY: This is safe by the assumption that `d_reclen` on the raw // dirent is correct. self.cursor = unsafe { - let next = self.cursor.as_ptr().add(raw_dirent.record_len()); + let raw = self.current_raw(); + let next = self.cursor.as_ptr().add(raw.record_len()); NonNull::new_unchecked(next) }; - Some(ent) + !self.is_done() + } + + /// Return the current directory entry in this cursor. + /// + /// This panics is the cursor has been exhausted, or if the cursor has not + /// yet had `advance` called. + /// + /// Calling `current()` after `advance` is guaranteed to panic when this + /// returns false. Conversely, calling `current()` after `advance` is + /// guaranteed not to panic when this returns true. + pub fn current<'a>(&'a self) -> DirEntry<'a> { + let raw = self.current_raw(); + DirEntry { + // SAFETY: This is safe since we are asking for the file name on a + // `RawDirEntry` that resides in its original buffer. + file_name: unsafe { raw.file_name() }, + file_type: raw.file_type(), + ino: raw.ino(), + } + } + + fn current_raw(&self) -> &RawDirEntry { + assert!(self.advanced); + assert!(!self.is_done()); + // SAFETY: This is safe by the contract of getdents64. Namely, that it + // writes structures of type `RawDirEntry` to `raw`. The lifetime of + // this raw dirent is also tied to this buffer via the type signature + // of this method, which prevents use-after-free. Moreover, our + // allocation layout guarantees that the cursor is correctly aligned + // for RawDirEntry. Finally, we assert that self.cursor has not + // reached the end yet, and since the cursor is only ever incremented + // by correct amounts, we know it points to the beginning of a valid + // directory entry. + unsafe { &*(self.cursor.as_ptr() as *const RawDirEntry) } + } + + fn is_done(&self) -> bool { + self.cursor.as_ptr() >= self.raw.as_ptr().wrapping_add(self.len) } /// Read the next directory entry from this cursor as an owned Unix @@ -311,5 +354,6 @@ impl DirEntryCursor { fn clear(&mut self) { self.cursor = self.raw; self.len = 0; + self.advanced = false; } } diff --git a/src/os/unix/mod.rs b/src/os/unix/mod.rs index 694f5ac..4306090 100644 --- a/src/os/unix/mod.rs +++ b/src/os/unix/mod.rs @@ -36,9 +36,16 @@ use libc::readdir64 as readdir; #[cfg(target_os = "linux")] use crate::os::linux::DirEntry as LinuxDirEntry; use crate::os::unix::dirent::RawDirEntry; +pub use crate::os::unix::rawpath::RawPathBuf; +pub use crate::os::unix::stat::{ + lstat, lstat_c, lstatat, lstatat_c, stat, stat_c, statat, statat_c, + FileType, Metadata, +}; mod dirent; pub(crate) mod errno; +mod rawpath; +mod stat; /// A low-level Unix specific directory entry. /// @@ -250,7 +257,9 @@ impl DirFd { /// This is just like `DirFd::open`, except it accepts a pre-made C string. /// As such, this only returns an error when opening the directory fails. pub fn open_c(dir_path: &CStr) -> io::Result { - let flags = libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC; + let flags = libc::O_RDONLY | libc::O_CLOEXEC; + #[cfg(not(target_os = "solaris"))] + let flags = flags | libc::O_DIRECTORY; // SAFETY: This is safe since we've guaranteed that cstr has no // interior NUL bytes and is terminated by a NUL. let fd = unsafe { libc::open(dir_path.as_ptr(), flags) }; @@ -292,7 +301,9 @@ impl DirFd { parent_dirfd: RawFd, dir_name: &CStr, ) -> io::Result { - let flags = libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC; + let flags = libc::O_RDONLY | libc::O_CLOEXEC; + #[cfg(not(target_os = "solaris"))] + let flags = flags | libc::O_DIRECTORY; // SAFETY: This is safe since we've guaranteed that cstr has no // interior NUL bytes and is terminated by a NUL. let fd = @@ -514,107 +525,6 @@ impl Dir { } } -/// One of seven possible file types on Unix. -#[derive(Clone, Copy)] -pub struct FileType(libc::mode_t); - -impl fmt::Debug for FileType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let human = if self.is_file() { - "File" - } else if self.is_dir() { - "Directory" - } else if self.is_symlink() { - "Symbolic Link" - } else if self.is_block_device() { - "Block Device" - } else if self.is_char_device() { - "Char Device" - } else if self.is_fifo() { - "FIFO" - } else if self.is_socket() { - "Socket" - } else { - "Unknown" - }; - write!(f, "FileType({})", human) - } -} - -impl FileType { - /// Create a new file type from a directory entry's type field. - /// - /// If the given type is not recognized or is `DT_UNKNOWN`, then `None` - /// is returned. - pub fn from_dirent_type(d_type: u8) -> Option { - Some(FileType(match d_type { - libc::DT_REG => libc::S_IFREG, - libc::DT_DIR => libc::S_IFDIR, - libc::DT_LNK => libc::S_IFLNK, - libc::DT_BLK => libc::S_IFBLK, - libc::DT_CHR => libc::S_IFCHR, - libc::DT_FIFO => libc::S_IFIFO, - libc::DT_SOCK => libc::S_IFSOCK, - libc::DT_UNKNOWN => return None, - _ => return None, // wat? - })) - } - - /// Create a new file type from a stat's `st_mode` field. - pub fn from_stat_mode(st_mode: u64) -> FileType { - FileType(st_mode as libc::mode_t) - } - - /// Returns true if this file type is a regular file. - /// - /// This corresponds to the `S_IFREG` value on Unix. - pub fn is_file(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFREG - } - - /// Returns true if this file type is a directory. - /// - /// This corresponds to the `S_IFDIR` value on Unix. - pub fn is_dir(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFDIR - } - - /// Returns true if this file type is a symbolic link. - /// - /// This corresponds to the `S_IFLNK` value on Unix. - pub fn is_symlink(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFLNK - } - - /// Returns true if this file type is a block device. - /// - /// This corresponds to the `S_IFBLK` value on Unix. - pub fn is_block_device(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFBLK - } - - /// Returns true if this file type is a character device. - /// - /// This corresponds to the `S_IFCHR` value on Unix. - pub fn is_char_device(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFCHR - } - - /// Returns true if this file type is a FIFO. - /// - /// This corresponds to the `S_IFIFO` value on Unix. - pub fn is_fifo(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFIFO - } - - /// Returns true if this file type is a socket. - /// - /// This corresponds to the `S_IFSOCK` value on Unix. - pub fn is_socket(&self) -> bool { - self.0 & libc::S_IFMT == libc::S_IFSOCK - } -} - /// Return a convenience ASCII-only debug representation of the given bytes. /// In essence, non-ASCII and non-printable bytes are escaped. pub(crate) fn escaped_bytes(bytes: &[u8]) -> String { diff --git a/src/os/unix/rawpath.rs b/src/os/unix/rawpath.rs new file mode 100644 index 0000000..170c44c --- /dev/null +++ b/src/os/unix/rawpath.rs @@ -0,0 +1,353 @@ +use std::ffi::{CStr, CString, OsStr, OsString}; +use std::fmt; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; + +// Currently, these types are not exported in the public API of this crate, +// even though they (or something like them) are seemingly necessary to +// implement recursive directory traversal without superfluous allocations. +// Figuring out how to expose them is tricky, since invariably, they _aren't_ +// the same type with the same API. So they wind up being a hazard if one +// accidentally tries to treat them as a platform independent type. + +/// A platform dependent representation of a file path. +/// +/// Unlike Rust's standard library `PathBuf`, a `RawPathBuf` uses the same +/// in-memory representation of a file path as the platform itself. Moreover, +/// the APIs of each `RawPathBuf` are also platform dependent. For example, +/// on Unix, a `RawPathBuf` can be cheaply converted between types such as +/// `Vec` and `CString`. But on Windows, since its internal representation +/// is a sequence of 16-bit integers, these conversions are not available. +#[derive(Clone)] +pub struct RawPathBuf { + /// Buf always has length at least 1 and always ends with a zero byte. + /// Buf only ever contains exactly 1 zero byte. (i.e., no interior NULs.) + buf: Vec, +} + +impl fmt::Debug for RawPathBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use crate::os::unix::escaped_bytes; + + f.debug_struct("RawPathBuf") + .field("buf", &escaped_bytes(self.as_code_units())) + .finish() + } +} + +impl<'a> From<&'a str> for RawPathBuf { + fn from(s: &'a str) -> RawPathBuf { + RawPathBuf::from(s.to_string()) + } +} + +impl From for RawPathBuf { + fn from(s: String) -> RawPathBuf { + let mut buf = s.into_bytes(); + buf.push(0); + RawPathBuf { buf } + } +} + +impl From for RawPathBuf { + fn from(cstr: CString) -> RawPathBuf { + RawPathBuf { buf: cstr.into_bytes_with_nul() } + } +} + +impl From for CString { + fn from(rawp: RawPathBuf) -> CString { + // SAFETY: Our internal buffer is guaranteed to end with a NUL and have + // no interior NULs. + unsafe { CString::from_vec_unchecked(rawp.buf) } + } +} + +impl From for RawPathBuf { + fn from(osstr: OsString) -> RawPathBuf { + let mut buf = osstr.into_vec(); + buf.push(0); + RawPathBuf { buf } + } +} + +impl From for OsString { + fn from(mut rawp: RawPathBuf) -> OsString { + // SAFETY: We are dropping this raw path and converting it into an + // OS string, which has no NUL terminator. + unsafe { + rawp.drop_nul(); + } + OsString::from_vec(rawp.buf) + } +} + +impl From for RawPathBuf { + fn from(path: PathBuf) -> RawPathBuf { + RawPathBuf::from(path.into_os_string()) + } +} + +impl From for PathBuf { + fn from(rawp: RawPathBuf) -> PathBuf { + PathBuf::from(OsString::from(rawp)) + } +} + +impl RawPathBuf { + /// Returns the code units (bytes) of this path without the NUL terminator. + pub fn as_code_units(&self) -> &[u8] { + &self.buf[..self.buf.len() - 1] + } + + /// Returns this raw path as a C string slice. + pub fn as_cstr(&self) -> &CStr { + // SAFETY: buf is guaranteed to have a NUL terminator with no interior + // NULs. + unsafe { CStr::from_bytes_with_nul_unchecked(&self.buf) } + } + + /// Returns this raw path as a OS string slice. + pub fn as_os_str(&self) -> &OsStr { + OsStr::from_bytes(self.as_code_units()) + } + + /// Return this raw path as a standard library path. + pub fn as_path(&self) -> &Path { + Path::new(self.as_os_str()) + } + + /// Push the given C string slice to the end of this path. + pub fn push_cstr(&mut self, slice: &CStr) { + // SAFETY: The internal buffer is guaranteed to have a NUL byte at + // this point, and we always add it back below via the CStr's NUL + // byte. + unsafe { + self.drop_nul(); + } + self.buf.extend_from_slice(slice.to_bytes_with_nul()); + } + + /// Join the given C string slice to this path in place via a path + /// separator. + /// + /// If this path ends with a `/`, and/or if name starts with a `/`, then + /// only one separator will be used to join them. This otherwise does no + /// other normalization. e.g., joining `a/b//` with `/c` will result in + /// `a/b//c`. + pub fn join(&mut self, name: &CStr) { + // SAFETY: The internal buffer is guaranteed to have a NUL byte at + // this point, and we always add it back below via the CStr's NUL + // byte. + unsafe { + self.drop_nul(); + } + if self.buf.last() != Some(&b'/') { + self.buf.push(b'/'); + } + if name.to_bytes().get(0) == Some(&b'/') { + debug_assert_eq!(self.buf.last(), Some(&b'/')); + self.buf.pop(); + } + self.buf.extend_from_slice(name.to_bytes_with_nul()); + } + + /// Pop the last element in this path. Return true if an element was + /// popped. An element isn't popped if the path is empty or represents + /// a root path. + pub fn pop(&mut self) -> bool { + // Move backwards through the path, finding the first location that + // ends the parent element, if one exists. Basically, we want to + // implement the following regex: + // + // ^.*?(/*[^/]+/*)$ + // + // Where everything in the capturing group is deleted. + + // First, start by skipping through all repeated separators in reverse. + let mut new_len = self.buf.len() - 1; + while new_len > 0 && self.buf[new_len - 1] == b'/' { + new_len -= 1; + } + // The path is either empty, or just made up of separators. + if new_len == 0 { + return false; + } + // Now find either the first preceding / or the beginning. + while new_len > 0 && self.buf[new_len - 1] != b'/' { + new_len -= 1; + } + // And now finally, remove all trailing separators. + // But we're careful not to remove a root slash if it's present. + while new_len > 1 && self.buf[new_len - 1] == b'/' { + new_len -= 1; + } + self.buf[new_len] = 0; + + // SAFETY: This is safe because our buffer contains Copy data and + // `new_len + 1` is guaranteed to be <= the original length of the + // buffer. Therefore, we do not need to worry about unitialized data. + unsafe { + self.buf.set_len(new_len + 1); + } + true + } + + /// Drop the trailing NUL byte from the internal buffer in place. + /// + /// # Safety + /// + /// This is unsafe to call because it removes the NUL byte from the buffer, + /// which is necessary for safety in many contexts. + /// + /// When callers use this method, they MUST ensure that a NUL byte is + /// added back to the internal buffer before its absence can be observed + /// by callers. + /// + /// Callers must also never call this method if the NUL byte has already + /// been removed. + unsafe fn drop_nul(&mut self) { + // SAFETY: This is safe since the new length is always <= than the + // old length, and thus there are no initialization worries. Moreover, + // since the buffer stores Copy data, there are no leaks. + debug_assert_eq!(*self.buf.last().unwrap(), 0); + self.buf.set_len(self.buf.len() - 1); + } + + /// Add a trailing NUL byte to the internal buffer. + /// + /// # Safety + /// + /// This is unsafe to call because it could create an interior NUL byte + /// if the internal buffer already ends with a NUL byte. Therefore, this + /// must only be called when the caller knows that the buffer does not end + /// with a NUL byte. + unsafe fn add_nul(&mut self) { + debug_assert_ne!(*self.buf.last().unwrap(), 0); + self.buf.push(0); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CStr; + + fn tostr(p: &RawPathBuf) -> &str { + std::str::from_utf8(p.as_code_units()).unwrap() + } + + fn cstr(s: &str) -> &CStr { + CStr::from_bytes_with_nul(s.as_bytes()).unwrap() + } + + #[test] + fn push1() { + let mut p = RawPathBuf::from("a/b"); + p.join(cstr("c\0")); + assert_eq!("a/b/c", tostr(&p)); + } + + #[test] + fn push2() { + let mut p = RawPathBuf::from("a/b/"); + p.join(cstr("c\0")); + assert_eq!("a/b/c", tostr(&p)); + } + + #[test] + fn push3() { + let mut p = RawPathBuf::from("a/b"); + p.join(cstr("/c\0")); + assert_eq!("a/b/c", tostr(&p)); + } + + #[test] + fn push4() { + let mut p = RawPathBuf::from("a/b/"); + p.join(cstr("/c\0")); + assert_eq!("a/b/c", tostr(&p)); + } + + #[test] + fn push5() { + let mut p = RawPathBuf::from("a/b//"); + p.join(cstr("/c\0")); + assert_eq!("a/b//c", tostr(&p)); + } + + #[test] + fn pop1() { + let mut p = RawPathBuf::from("/foo/bar////baz/"); + + assert!(p.pop()); + assert_eq!("/foo/bar", tostr(&p)); + + assert!(p.pop()); + assert_eq!("/foo", tostr(&p)); + + assert!(p.pop()); + assert_eq!("/", tostr(&p)); + + assert!(!p.pop()); + assert_eq!("/", tostr(&p)); + } + + #[test] + fn pop2() { + let mut p = RawPathBuf::from("////foo/"); + + assert!(p.pop()); + assert_eq!("/", tostr(&p)); + + assert!(!p.pop()); + assert_eq!("/", tostr(&p)); + } + + #[test] + fn pop3() { + let mut p = RawPathBuf::from("foo/bar/baz"); + + assert!(p.pop()); + assert_eq!("foo/bar", tostr(&p)); + + assert!(p.pop()); + assert_eq!("foo", tostr(&p)); + + assert!(p.pop()); + assert_eq!("", tostr(&p)); + + assert!(!p.pop()); + assert_eq!("", tostr(&p)); + } + + #[test] + fn pop4() { + let mut p = RawPathBuf::from("////"); + + assert!(!p.pop()); + assert_eq!("////", tostr(&p)); + } + + #[test] + fn pop5() { + let mut p = RawPathBuf::from("////a"); + + assert!(p.pop()); + assert_eq!("/", tostr(&p)); + + assert!(!p.pop()); + assert_eq!("/", tostr(&p)); + } + + #[test] + fn pop6() { + let mut p = RawPathBuf::from("foo"); + + assert!(p.pop()); + assert_eq!("", tostr(&p)); + + assert!(!p.pop()); + assert_eq!("", tostr(&p)); + } +} diff --git a/src/os/unix/stat.rs b/src/os/unix/stat.rs new file mode 100644 index 0000000..a367ed1 --- /dev/null +++ b/src/os/unix/stat.rs @@ -0,0 +1,297 @@ +use std::ffi::{CStr, CString, OsString}; +use std::fmt; +use std::io; +use std::mem; +use std::os::unix::ffi::OsStringExt; +use std::os::unix::io::RawFd; +use std::path::PathBuf; +use std::time::{Duration, SystemTime}; + +use libc; + +#[cfg(not(any(target_os = "linux", target_os = "android",)))] +use libc::{fstatat as fstatat64, lstat as lstat64, stat as stat64}; +#[cfg(any(target_os = "linux", target_os = "android",))] +use libc::{fstatat64, lstat64, stat64}; + +pub struct Metadata { + stat: stat64, +} + +impl Metadata { + pub fn file_type(&self) -> FileType { + FileType::from_stat_mode(self.stat.st_mode as u64) + } + + pub fn len(&self) -> u64 { + self.stat.st_size as u64 + } + + pub fn dev(&self) -> u64 { + self.stat.st_dev + } + + pub fn ino(&self) -> u64 { + self.stat.st_ino + } + + pub fn mode(&self) -> u64 { + self.stat.st_mode as u64 + } + + pub fn permissions(&self) -> ! { + unimplemented!() + } +} + +#[cfg(target_os = "netbsd")] +impl Metadata { + pub fn modified(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_mtime as u64, + self.stat.st_mtimensec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } + + pub fn accessed(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_atime as u64, + self.stat.st_atimensec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } + + pub fn created(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_birthtime as u64, + self.stat.st_birthtimensec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } +} + +#[cfg(not(target_os = "netbsd"))] +impl Metadata { + pub fn modified(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_mtime as u64, + self.stat.st_mtime_nsec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } + + pub fn accessed(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_atime as u64, + self.stat.st_atime_nsec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } + + #[cfg(any( + target_os = "freebsd", + target_os = "openbsd", + target_os = "macos", + target_os = "ios" + ))] + pub fn created(&self) -> io::Result { + let dur = Duration::new( + self.stat.st_birthtime as u64, + self.stat.st_birthtime_nsec as u32, + ); + Ok(SystemTime::UNIX_EPOCH + dur) + } + + #[cfg(not(any( + target_os = "freebsd", + target_os = "openbsd", + target_os = "macos", + target_os = "ios" + )))] + pub fn created(&self) -> io::Result { + Err(io::Error::new( + io::ErrorKind::Other, + "creation time is not available on this platform currently", + )) + } +} + +/// One of seven possible file types on Unix. +#[derive(Clone, Copy)] +pub struct FileType(libc::mode_t); + +impl fmt::Debug for FileType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let human = if self.is_file() { + "File" + } else if self.is_dir() { + "Directory" + } else if self.is_symlink() { + "Symbolic Link" + } else if self.is_block_device() { + "Block Device" + } else if self.is_char_device() { + "Char Device" + } else if self.is_fifo() { + "FIFO" + } else if self.is_socket() { + "Socket" + } else { + "Unknown" + }; + write!(f, "FileType({})", human) + } +} + +impl FileType { + /// Create a new file type from a directory entry's type field. + /// + /// If the given type is not recognized or is `DT_UNKNOWN`, then `None` + /// is returned. + pub fn from_dirent_type(d_type: u8) -> Option { + Some(FileType(match d_type { + libc::DT_REG => libc::S_IFREG, + libc::DT_DIR => libc::S_IFDIR, + libc::DT_LNK => libc::S_IFLNK, + libc::DT_BLK => libc::S_IFBLK, + libc::DT_CHR => libc::S_IFCHR, + libc::DT_FIFO => libc::S_IFIFO, + libc::DT_SOCK => libc::S_IFSOCK, + libc::DT_UNKNOWN => return None, + _ => return None, // wat? + })) + } + + /// Create a new file type from a stat's `st_mode` field. + pub fn from_stat_mode(st_mode: u64) -> FileType { + FileType(st_mode as libc::mode_t) + } + + /// Convert this file type to the platform independent file type. + pub fn into_api(self) -> crate::FileType { + crate::FileType::from(self) + } + + /// Returns true if this file type is a regular file. + /// + /// This corresponds to the `S_IFREG` value on Unix. + pub fn is_file(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFREG + } + + /// Returns true if this file type is a directory. + /// + /// This corresponds to the `S_IFDIR` value on Unix. + pub fn is_dir(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFDIR + } + + /// Returns true if this file type is a symbolic link. + /// + /// This corresponds to the `S_IFLNK` value on Unix. + pub fn is_symlink(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFLNK + } + + /// Returns true if this file type is a block device. + /// + /// This corresponds to the `S_IFBLK` value on Unix. + pub fn is_block_device(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFBLK + } + + /// Returns true if this file type is a character device. + /// + /// This corresponds to the `S_IFCHR` value on Unix. + pub fn is_char_device(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFCHR + } + + /// Returns true if this file type is a FIFO. + /// + /// This corresponds to the `S_IFIFO` value on Unix. + pub fn is_fifo(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFIFO + } + + /// Returns true if this file type is a socket. + /// + /// This corresponds to the `S_IFSOCK` value on Unix. + pub fn is_socket(&self) -> bool { + self.0 & libc::S_IFMT == libc::S_IFSOCK + } +} + +pub fn stat>(path: P) -> io::Result { + let bytes = path.into().into_os_string().into_vec(); + stat_c(&CString::new(bytes)?) +} + +pub fn stat_c(path: &CStr) -> io::Result { + let mut stat: stat64 = unsafe { mem::zeroed() }; + let res = unsafe { stat64(path.as_ptr(), &mut stat) }; + if res < 0 { + Err(io::Error::last_os_error()) + } else { + Ok(Metadata { stat }) + } +} + +pub fn lstat>(path: P) -> io::Result { + let bytes = path.into().into_os_string().into_vec(); + lstat_c(&CString::new(bytes)?) +} + +pub fn lstat_c(path: &CStr) -> io::Result { + let mut stat: stat64 = unsafe { mem::zeroed() }; + let res = unsafe { lstat64(path.as_ptr(), &mut stat) }; + if res < 0 { + Err(io::Error::last_os_error()) + } else { + Ok(Metadata { stat }) + } +} + +pub fn statat>( + parent_dirfd: RawFd, + name: N, +) -> io::Result { + let bytes = name.into().into_vec(); + statat_c(parent_dirfd, &CString::new(bytes)?) +} + +pub fn statat_c(parent_dirfd: RawFd, name: &CStr) -> io::Result { + let mut stat: stat64 = unsafe { mem::zeroed() }; + let res = unsafe { fstatat64(parent_dirfd, name.as_ptr(), &mut stat, 0) }; + if res < 0 { + Err(io::Error::last_os_error()) + } else { + Ok(Metadata { stat }) + } +} + +pub fn lstatat>( + parent_dirfd: RawFd, + name: N, +) -> io::Result { + let bytes = name.into().into_vec(); + lstatat_c(parent_dirfd, &CString::new(bytes)?) +} + +pub fn lstatat_c(parent_dirfd: RawFd, name: &CStr) -> io::Result { + let mut stat: stat64 = unsafe { mem::zeroed() }; + let res = unsafe { + fstatat64( + parent_dirfd, + name.as_ptr(), + &mut stat, + libc::AT_SYMLINK_NOFOLLOW, + ) + }; + if res < 0 { + Err(io::Error::last_os_error()) + } else { + Ok(Metadata { stat }) + } +} diff --git a/src/os/windows/mod.rs b/src/os/windows/mod.rs index d144f98..3f311f0 100644 --- a/src/os/windows/mod.rs +++ b/src/os/windows/mod.rs @@ -10,6 +10,7 @@ use std::io; use std::mem; use std::os::windows::ffi::{OsStrExt, OsStringExt}; use std::path::Path; +use std::time::{self, SystemTime}; use winapi::shared::minwindef::{DWORD, FILETIME}; use winapi::shared::winerror::ERROR_NO_MORE_FILES; @@ -19,6 +20,11 @@ use winapi::um::handleapi::INVALID_HANDLE_VALUE; use winapi::um::minwinbase::WIN32_FIND_DATAW; use winapi::um::winnt::HANDLE; +pub use crate::os::windows::stat::FileType; + +mod rawpath; +mod stat; + /// A low-level Windows specific directory entry. /// /// This type corresponds as closely as possible to the `WIN32_FIND_DATA` @@ -93,53 +99,67 @@ impl DirEntry { self.attr } - /// Return a 64-bit representation of the creation time of the underlying - /// file. - /// - /// The 64-bit value returned is equivalent to winapi's `FILETIME` - /// structure, which represents the number of 100-nanosecond intervals - /// since January 1, 1601 (UTC). - /// - /// If the underlying file system does not support creation time, then - /// `0` is returned. + /// Returns true if this file is marked as hidden via the + /// `FILE_ATTRIBUTE_HIDDEN` marker. #[inline] - pub fn creation_time(&self) -> u64 { - self.creation_time + pub fn is_hidden(&self) -> bool { + use winapi::um::winnt::FILE_ATTRIBUTE_HIDDEN; + self.file_attributes() & FILE_ATTRIBUTE_HIDDEN != 0 } - /// Return a 64-bit representation of the last access time of the - /// underlying file. + /// Return the creation time of the underlying file as a system time. /// - /// The 64-bit value returned is equivalent to winapi's `FILETIME` - /// structure, which represents the number of 100-nanosecond intervals - /// since January 1, 1601 (UTC). - /// - /// If the underlying file system does not support last access time, then - /// `0` is returned. + /// If the underlying file system does not support creation time, then an + /// error is returned. #[inline] - pub fn last_access_time(&self) -> u64 { - self.last_access_time + pub fn created(&self) -> io::Result { + if self.creation_time == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "creation time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(self.creation_time)) + } } - /// Return a 64-bit representation of the last write time of the - /// underlying file. + /// Return last access time of the underlying file as a system time. /// - /// The 64-bit value returned is equivalent to winapi's `FILETIME` - /// structure, which represents the number of 100-nanosecond intervals - /// since January 1, 1601 (UTC). + /// If the underlying file system does not support creation time, then an + /// error is returned. + #[inline] + pub fn accessed(&self) -> io::Result { + if self.last_access_time == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "last access time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(self.last_access_time)) + } + } + + /// Return the last modified time of the underlying file as a system time. /// - /// If the underlying file system does not support last write time, then - /// `0` is returned. + /// If the underlying file system does not support creation time, then an + /// error is returned. #[inline] - pub fn last_write_time(&self) -> u64 { - self.last_write_time + pub fn modified(&self) -> io::Result { + if self.last_write_time == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "last write time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(self.last_write_time)) + } } /// Return the file size, in bytes, of the corresponding file. /// /// This value has no meaning if this entry corresponds to a directory. #[inline] - pub fn file_size(&self) -> u64 { + pub fn len(&self) -> u64 { self.file_size } @@ -180,107 +200,6 @@ impl DirEntry { } } -/// File type information discoverable from the `FindNextFile` winapi routines. -/// -/// Note that this does not include all possible file types on Windows. -/// Instead, this only differentiates between directories, regular files and -/// symlinks. Additional file type information (such as whether a file handle -/// is a socket) can only be retrieved via the `GetFileType` winapi routines. -/// A safe wrapper for it is -/// [available in the `winapi-util` crate](https://docs.rs/winapi-util/*/x86_64-pc-windows-msvc/winapi_util/file/fn.typ.html). -#[derive(Clone, Copy)] -pub struct FileType { - attr: DWORD, - reparse_tag: DWORD, -} - -impl fmt::Debug for FileType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let human = if self.is_file() { - "File" - } else if self.is_dir() { - "Directory" - } else if self.is_symlink_file() { - "Symbolic Link (File)" - } else if self.is_symlink_dir() { - "Symbolic Link (Directory)" - } else { - "Unknown" - }; - write!(f, "FileType({})", human) - } -} - -impl FileType { - /// Create a file type from its raw winapi components. - /// - /// `attr` should be a file attribute bitset, corresponding to the - /// `dwFileAttributes` member of file information structs. - /// - /// `reparse_tag` should be a valid reparse tag value when the - /// `FILE_ATTRIBUTE_REPARSE_POINT` bit is set in `attr`. If the bit isn't - /// set or if the tag is not available, then the tag can be any value. - pub fn from_attr(attr: u32, reparse_tag: u32) -> FileType { - FileType { attr: attr, reparse_tag: reparse_tag } - } - - /// Returns true if this file type is a regular file. - /// - /// This corresponds to any file that is neither a symlink nor a directory. - pub fn is_file(&self) -> bool { - !self.is_dir() && !self.is_symlink() - } - - /// Returns true if this file type is a directory. - /// - /// This corresponds to any file that has the `FILE_ATTRIBUTE_DIRECTORY` - /// attribute and is not a symlink. - pub fn is_dir(&self) -> bool { - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - - self.attr & FILE_ATTRIBUTE_DIRECTORY != 0 && !self.is_symlink() - } - - /// Returns true if this file type is a symlink. This could be a symlink - /// to a directory or to a file. To distinguish between them, use - /// `is_symlink_file` and `is_symlink_dir`. - /// - /// This corresponds to any file that has a surrogate reparse point. - pub fn is_symlink(&self) -> bool { - use winapi::um::winnt::IsReparseTagNameSurrogate; - - self.reparse_tag().map_or(false, IsReparseTagNameSurrogate) - } - - /// Returns true if this file type is a symlink to a file. - /// - /// This corresponds to any file that has a surrogate reparse point and - /// is not a symlink to a directory. - pub fn is_symlink_file(&self) -> bool { - !self.is_symlink_dir() && self.is_symlink() - } - - /// Returns true if this file type is a symlink to a file. - /// - /// This corresponds to any file that has a surrogate reparse point and has - /// the `FILE_ATTRIBUTE_DIRECTORY` attribute. - pub fn is_symlink_dir(&self) -> bool { - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - - self.attr & FILE_ATTRIBUTE_DIRECTORY != 0 && self.is_symlink() - } - - fn reparse_tag(&self) -> Option { - use winapi::um::winnt::FILE_ATTRIBUTE_REPARSE_POINT; - - if self.attr & FILE_ATTRIBUTE_REPARSE_POINT != 0 { - Some(self.reparse_tag) - } else { - None - } - } -} - /// A handle to a directory stream. /// /// The handle is automatically closed when it's dropped. @@ -498,6 +417,17 @@ fn time_as_u64(time: &FILETIME) -> u64 { (time.dwHighDateTime as u64) << 32 | time.dwLowDateTime as u64 } +fn intervals_to_system_time(intervals: u64) -> SystemTime { + const NANOS_IN_SECOND: u64 = 1_000_000_000; + const NANOS_PER_INTERVAL: u64 = 100; + const SECONDS_TO_UNIX: u64 = 11_644_473_600; + + let seconds_from_unix = + (intervals / (NANOS_IN_SECOND / NANOS_PER_INTERVAL)) - SECONDS_TO_UNIX; + let dur_from_unix = time::Duration::from_secs(seconds_from_unix); + SystemTime::UNIX_EPOCH + dur_from_unix +} + fn to_utf16>(t: T, buf: &mut Vec) -> io::Result<()> { for cu16 in t.as_ref().encode_wide() { if cu16 == 0 { @@ -517,3 +447,45 @@ fn truncate_utf16(slice: &[u16]) -> &[u16] { None => slice, } } + +pub(crate) fn escaped_u16s(slice: &[u16]) -> String { + use std::char; + + let mut buf = String::with_capacity(slice.len()); + for result in char::decode_utf16(slice.iter().cloned()) { + match result { + Ok(ch) => buf.push(ch), + Err(err) => { + let bad = err.unpaired_surrogate(); + buf.push_str(&format!(r"\u{{{:X}}}", bad)); + } + } + } + buf +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn escaping1() { + let x: Vec = "foo☃bar".encode_utf16().collect(); + let escaped = escaped_u16s(&x); + assert_eq!("foo☃bar", escaped); + } + + #[test] + fn escaping2() { + let mut x = vec![]; + x.push(0xD800); + x.extend("a".encode_utf16()); + x.push(0xDA02); + x.extend("b".encode_utf16()); + x.push(0xDFFF); + x.extend("c".encode_utf16()); + + let escaped = escaped_u16s(&x); + assert_eq!(r"\u{D800}a\u{DA02}b\u{DFFF}c", escaped); + } +} diff --git a/src/os/windows/rawpath.rs b/src/os/windows/rawpath.rs new file mode 100644 index 0000000..ff6d19a --- /dev/null +++ b/src/os/windows/rawpath.rs @@ -0,0 +1,29 @@ +use std::fmt; + +#[derive(Clone)] +pub struct RawPathBuf { + /// Buf always has length at least 1 and always ends with a zero u16. + /// Buf only ever contains exactly 1 zero u16. (i.e., no interior NULs.) + buf: Vec, +} + +impl fmt::Debug for RawPathBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use crate::os::windows::escaped_u16s; + + f.debug_struct("RawPathBuf") + .field("buf", &escaped_u16s(self.as_code_units())) + .finish() + } +} + +impl RawPathBuf { + /// Returns the code units (u16s) of this path without the NUL terminator. + pub fn as_code_units(&self) -> &[u16] { + &self.buf[..self.buf.len() - 1] + } + + unsafe fn drop_nul(&mut self) { + self.buf.set_len(self.buf.len() - 1); + } +} diff --git a/src/os/windows/stat.rs b/src/os/windows/stat.rs new file mode 100644 index 0000000..69de8db --- /dev/null +++ b/src/os/windows/stat.rs @@ -0,0 +1,269 @@ +use std::fmt; +use std::fs::OpenOptions; +use std::io; +use std::mem; +use std::os::windows::fs::OpenOptionsExt; +use std::os::windows::io::{AsRawHandle, RawHandle}; +use std::path::Path; +use std::time::SystemTime; + +use winapi::shared::minwindef::DWORD; +use winapi::um::fileapi::{ + GetFileInformationByHandle, BY_HANDLE_FILE_INFORMATION, +}; +use winapi::um::winbase::{ + FILE_FLAG_BACKUP_SEMANTICS, FILE_FLAG_OPEN_REPARSE_POINT, +}; + +use crate::os::windows::{intervals_to_system_time, time_as_u64}; + +#[derive(Clone)] +pub struct Metadata { + info: BY_HANDLE_FILE_INFORMATION, + reparse_tag: DWORD, +} + +impl Metadata { + pub fn file_attributes(&self) -> u32 { + self.info.dwFileAttributes + } + + pub fn file_type(&self) -> FileType { + FileType::from_attr(self.file_attributes(), self.reparse_tag) + } + + pub fn is_hidden(&self) -> bool { + use winapi::um::winnt::FILE_ATTRIBUTE_HIDDEN; + self.file_attributes() & FILE_ATTRIBUTE_HIDDEN != 0 + } + + pub fn created(&self) -> io::Result { + let intervals = time_as_u64(&self.info.ftCreationTime); + if intervals == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "creation time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(intervals)) + } + } + + pub fn accessed(&self) -> io::Result { + let intervals = time_as_u64(&self.info.ftLastAccessTime); + if intervals == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "last access time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(intervals)) + } + } + + pub fn modified(&self) -> io::Result { + let intervals = time_as_u64(&self.info.ftLastWriteTime); + if intervals == 0 { + Err(io::Error::new( + io::ErrorKind::Other, + "last write time is not available on this platform currently", + )) + } else { + Ok(intervals_to_system_time(intervals)) + } + } + + pub fn len(&self) -> u64 { + ((self.info.nFileSizeHigh as u64) << 32) + | (self.info.nFileSizeLow as u64) + } + + pub fn number_of_links(&self) -> u64 { + self.info.nNumberOfLinks as u64 + } + + pub fn volume_serial_number(&self) -> u64 { + self.info.dwVolumeSerialNumber as u64 + } + + pub fn file_index(&self) -> u64 { + ((self.info.nFileIndexHigh as u64) << 32) + | (self.info.nFileIndexLow as u64) + } +} + +/// File type information discoverable from the `FindNextFile` winapi routines. +/// +/// Note that this does not include all possible file types on Windows. +/// Instead, this only differentiates between directories, regular files and +/// symlinks. Additional file type information (such as whether a file handle +/// is a socket) can only be retrieved via the `GetFileType` winapi routines. +/// A safe wrapper for it is +/// [available in the `winapi-util` crate](https://docs.rs/winapi-util/*/x86_64-pc-windows-msvc/winapi_util/file/fn.typ.html). +#[derive(Clone, Copy)] +pub struct FileType { + attr: DWORD, + reparse_tag: DWORD, +} + +impl fmt::Debug for FileType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let human = if self.is_file() { + "File" + } else if self.is_dir() { + "Directory" + } else if self.is_symlink_file() { + "Symbolic Link (File)" + } else if self.is_symlink_dir() { + "Symbolic Link (Directory)" + } else { + "Unknown" + }; + write!(f, "FileType({})", human) + } +} + +impl FileType { + /// Create a file type from its raw winapi components. + /// + /// `attr` should be a file attribute bitset, corresponding to the + /// `dwFileAttributes` member of file information structs. + /// + /// `reparse_tag` should be a valid reparse tag value when the + /// `FILE_ATTRIBUTE_REPARSE_POINT` bit is set in `attr`. If the bit isn't + /// set or if the tag is not available, then the tag can be any value. + pub fn from_attr(attr: u32, reparse_tag: u32) -> FileType { + FileType { attr, reparse_tag } + } + + /// Returns true if this file type is a regular file. + /// + /// This corresponds to any file that is neither a symlink nor a directory. + pub fn is_file(&self) -> bool { + !self.is_dir() && !self.is_symlink() + } + + /// Returns true if this file type is a directory. + /// + /// This corresponds to any file that has the `FILE_ATTRIBUTE_DIRECTORY` + /// attribute and is not a symlink. + pub fn is_dir(&self) -> bool { + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + self.attr & FILE_ATTRIBUTE_DIRECTORY != 0 && !self.is_symlink() + } + + /// Returns true if this file type is a symlink. This could be a symlink + /// to a directory or to a file. To distinguish between them, use + /// `is_symlink_file` and `is_symlink_dir`. + /// + /// This corresponds to any file that has a surrogate reparse point. + pub fn is_symlink(&self) -> bool { + use winapi::um::winnt::IsReparseTagNameSurrogate; + + self.reparse_tag().map_or(false, IsReparseTagNameSurrogate) + } + + /// Returns true if this file type is a symlink to a file. + /// + /// This corresponds to any file that has a surrogate reparse point and + /// is not a symlink to a directory. + pub fn is_symlink_file(&self) -> bool { + !self.is_symlink_dir() && self.is_symlink() + } + + /// Returns true if this file type is a symlink to a file. + /// + /// This corresponds to any file that has a surrogate reparse point and has + /// the `FILE_ATTRIBUTE_DIRECTORY` attribute. + pub fn is_symlink_dir(&self) -> bool { + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + self.attr & FILE_ATTRIBUTE_DIRECTORY != 0 && self.is_symlink() + } + + fn reparse_tag(&self) -> Option { + use winapi::um::winnt::FILE_ATTRIBUTE_REPARSE_POINT; + + if self.attr & FILE_ATTRIBUTE_REPARSE_POINT != 0 { + Some(self.reparse_tag) + } else { + None + } + } +} + +pub fn stat>(path: P) -> io::Result { + let file = OpenOptions::new() + // Neither read nor write permissions are needed. + .access_mode(0) + .custom_flags(FILE_FLAG_BACKUP_SEMANTICS) + .open(path)?; + statat(file.as_raw_handle()) +} + +pub fn lstat>(path: P) -> io::Result { + let file = OpenOptions::new() + // Neither read nor write permissions are needed. + .access_mode(0) + .custom_flags( + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, + ) + .open(path)?; + statat(file.as_raw_handle()) +} + +fn statat(handle: RawHandle) -> io::Result { + use winapi::um::winnt::FILE_ATTRIBUTE_REPARSE_POINT; + + let info: BY_HANDLE_FILE_INFORMATION = unsafe { + let mut info = mem::zeroed(); + let res = GetFileInformationByHandle(handle, &mut info); + if res == 0 { + return Err(io::Error::last_os_error()); + } + info + }; + let reparse_tag = + if info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT != 0 { + get_reparse_tag(handle)? + } else { + 0 + }; + Ok(Metadata { info, reparse_tag }) +} + +fn get_reparse_tag(handle: RawHandle) -> io::Result { + use std::ptr; + use winapi::ctypes::{c_uint, c_ushort}; + use winapi::um::ioapiset::DeviceIoControl; + use winapi::um::winioctl::FSCTL_GET_REPARSE_POINT; + use winapi::um::winnt::MAXIMUM_REPARSE_DATA_BUFFER_SIZE; + + #[repr(C)] + struct REPARSE_DATA_BUFFER { + ReparseTag: c_uint, + ReparseDataLength: c_ushort, + Reserved: c_ushort, + rest: (), + } + + let mut buf = [0; MAXIMUM_REPARSE_DATA_BUFFER_SIZE as usize]; + let res = unsafe { + DeviceIoControl( + handle, + FSCTL_GET_REPARSE_POINT, + ptr::null_mut(), + 0, + buf.as_mut_ptr() as *mut _, + buf.len() as DWORD, + &mut 0, + ptr::null_mut(), + ) + }; + if res == 0 { + return Err(io::Error::last_os_error()); + } + let data = buf.as_ptr() as *const REPARSE_DATA_BUFFER; + Ok(unsafe { (*data).ReparseTag }) +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 4d95cfa..4c2cbf2 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,10 +1,12 @@ #[macro_use] mod util; -mod recursive; +// mod recursive; #[cfg(target_os = "linux")] mod linux; #[cfg(unix)] +mod scratch; +#[cfg(unix)] mod unix; #[cfg(windows)] mod windows; diff --git a/src/tests/scratch.rs b/src/tests/scratch.rs new file mode 100644 index 0000000..17bf95a --- /dev/null +++ b/src/tests/scratch.rs @@ -0,0 +1,45 @@ +use std::fs; +use std::path::PathBuf; + +use crate::tests::util::{self, Dir}; +use crate::Cursor; + +#[test] +fn many_mixed() { + let dir = Dir::tmp(); + dir.mkdirp("foo/a"); + dir.mkdirp("foo/c"); + dir.mkdirp("foo/e"); + dir.touch_all(&["foo/b", "foo/d", "foo/f"]); + dir.touch_all(&["foo/c/bar", "foo/c/baz"]); + dir.touch_all(&["foo/a/quux"]); + + let mut cur = Cursor::new(dir.path()); + loop { + match cur.read() { + Ok(None) => break, + Ok(Some(entry)) => { + println!("{:?}", entry.path()); + } + Err(err) => { + println!("ERROR: {}", err); + break; + } + } + } + + // let r = dir.run_recursive(wd); + // r.assert_no_errors(); + // + // let expected = vec![ + // dir.path().to_path_buf(), + // dir.join("foo"), + // dir.join("foo").join("a"), + // dir.join("foo").join("b"), + // dir.join("foo").join("c"), + // dir.join("foo").join("d"), + // dir.join("foo").join("e"), + // dir.join("foo").join("f"), + // ]; + // assert_eq!(expected, r.sorted_paths()); +} diff --git a/src/walk.rs b/src/walk.rs new file mode 100644 index 0000000..019f118 --- /dev/null +++ b/src/walk.rs @@ -0,0 +1,339 @@ +use std::cmp; +use std::fmt; +use std::path::{Path, PathBuf}; +use std::result; +use std::usize; + +use crate::dent::DirEntry; +use crate::error::Result; + +struct WalkDirOptions { + follow_links: bool, + max_open: usize, + min_depth: usize, + max_depth: usize, + sorter: Option< + Box< + FnMut(&DirEntry, &DirEntry) -> cmp::Ordering + + Send + + Sync + + 'static, + >, + >, + contents_first: bool, + same_file_system: bool, +} + +impl fmt::Debug for WalkDirOptions { + fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { + let sorter_str = if self.sorter.is_some() { + // FnMut isn't `Debug` + "Some(...)" + } else { + "None" + }; + f.debug_struct("WalkDirOptions") + .field("follow_links", &self.follow_links) + .field("max_open", &self.max_open) + .field("min_depth", &self.min_depth) + .field("max_depth", &self.max_depth) + .field("sorter", &sorter_str) + .field("contents_first", &self.contents_first) + .field("same_file_system", &self.same_file_system) + .finish() + } +} + +/// TODO +#[derive(Debug)] +pub struct WalkDir { + root: PathBuf, + opts: WalkDirOptions, +} + +impl IntoIterator for WalkDir { + type Item = Result; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + unimplemented!() + } +} + +impl WalkDir { + /// Create a builder for a recursive directory iterator starting at the + /// file path `root`. If `root` is a directory, then it is the first item + /// yielded by the iterator. If `root` is a file, then it is the first + /// and only item yielded by the iterator. If `root` is a symlink, then it + /// is always followed for the purposes of directory traversal. (A root + /// `DirEntry` still obeys its documentation with respect to symlinks and + /// the `follow_links` setting.) + pub fn new>(root: P) -> WalkDir { + WalkDir { + root: root.into(), + opts: WalkDirOptions { + follow_links: false, + max_open: 10, + min_depth: 0, + max_depth: usize::MAX, + sorter: None, + contents_first: false, + same_file_system: false, + }, + } + } + + /// Set the minimum depth of entries yielded by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn min_depth(mut self, depth: usize) -> WalkDir { + self.opts.min_depth = depth; + if self.opts.min_depth > self.opts.max_depth { + self.opts.min_depth = self.opts.max_depth; + } + self + } + + /// Set the maximum depth of entries yield by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// Note that this will not simply filter the entries of the iterator, but + /// it will actually avoid descending into directories when the depth is + /// exceeded. + pub fn max_depth(mut self, depth: usize) -> WalkDir { + self.opts.max_depth = depth; + if self.opts.max_depth < self.opts.min_depth { + self.opts.max_depth = self.opts.min_depth; + } + self + } + + /// Follow symbolic links. By default, this is disabled. + /// + /// When `yes` is `true`, symbolic links are followed as if they were + /// normal directories and files. If a symbolic link is broken or is + /// involved in a loop, an error is yielded. + /// + /// When enabled, the yielded [`DirEntry`] values represent the target of + /// the link while the path corresponds to the link. See the [`DirEntry`] + /// type for more details. + /// + /// [`DirEntry`]: struct.DirEntry.html + pub fn follow_links(mut self, yes: bool) -> WalkDir { + self.opts.follow_links = yes; + self + } + + /// Set the maximum number of simultaneously open file descriptors used + /// by the iterator. + /// + /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set + /// to `1` automatically. If this is not set, then it defaults to some + /// reasonably low number. + /// + /// This setting has no impact on the results yielded by the iterator + /// (even when `n` is `1`). Instead, this setting represents a trade off + /// between scarce resources (file descriptors) and memory. Namely, when + /// the maximum number of file descriptors is reached and a new directory + /// needs to be opened to continue iteration, then a previous directory + /// handle is closed and has its unyielded entries stored in memory. In + /// practice, this is a satisfying trade off because it scales with respect + /// to the *depth* of your file tree. Therefore, low values (even `1`) are + /// acceptable. + /// + /// Note that this value does not impact the number of system calls made by + /// an exhausted iterator. + /// + /// # Platform behavior + /// + /// On Windows, if `follow_links` is enabled, then this limit is not + /// respected. In particular, the maximum number of file descriptors opened + /// is proportional to the depth of the directory tree traversed. + pub fn max_open(mut self, mut n: usize) -> WalkDir { + if n == 0 { + n = 1; + } + self.opts.max_open = n; + self + } + + /// Set a function for sorting directory entries. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// ```rust,no-run + /// use std::cmp; + /// use std::ffi::OsString; + /// use walkdir::WalkDir; + /// + /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); + /// ``` + pub fn sort_by(mut self, cmp: F) -> WalkDir + where + F: FnMut(&DirEntry, &DirEntry) -> cmp::Ordering + + Send + + Sync + + 'static, + { + self.opts.sorter = Some(Box::new(cmp)); + self + } + + /// Yield a directory's contents before the directory itself. By default, + /// this is disabled. + /// + /// When `yes` is `false` (as is the default), the directory is yielded + /// before its contents are read. This is useful when, e.g. you want to + /// skip processing of some directories. + /// + /// When `yes` is `true`, the iterator yields the contents of a directory + /// before yielding the directory itself. This is useful when, e.g. you + /// want to recursively delete a directory. + /// + /// # Example + /// + /// Assume the following directory tree: + /// + /// ```text + /// foo/ + /// abc/ + /// qrs + /// tuv + /// def/ + /// ``` + /// + /// With contents_first disabled (the default), the following code visits + /// the directory tree in depth-first order: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo") { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo + /// // foo/abc + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/def + /// ``` + /// + /// With contents_first enabled: + /// + /// ```no_run + /// use walkdir::WalkDir; + /// + /// for entry in WalkDir::new("foo").contents_first(true) { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// + /// // foo/abc/qrs + /// // foo/abc/tuv + /// // foo/abc + /// // foo/def + /// // foo + /// ``` + pub fn contents_first(mut self, yes: bool) -> WalkDir { + self.opts.contents_first = yes; + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(mut self, yes: bool) -> WalkDir { + self.opts.same_file_system = yes; + self + } +} + +#[derive(Debug)] +struct Walker { + root: PathBuf, + depth: usize, + opts: WalkDirOptions, +} + +impl Walker { + fn new() -> Walker { + Walker { + root: PathBuf::new(), + depth: 0, + opts: WalkDirOptions { + follow_links: false, + max_open: 10, + min_depth: 0, + max_depth: usize::MAX, + sorter: None, + contents_first: false, + same_file_system: false, + }, + } + } +} + +/// TODO +#[derive(Debug)] +pub struct IntoIter {} + +impl Iterator for IntoIter { + type Item = Result; + + fn next(&mut self) -> Option> { + unimplemented!() + } +} + +impl IntoIter { + /// TODO + pub fn filter_entry

(self, predicate: P) -> FilterEntry + where + P: FnMut(&DirEntry) -> bool, + { + FilterEntry { it: self, predicate: predicate } + } + + /// TODO + pub fn skip_current_dir(&mut self) { + unimplemented!() + } +} + +/// TODO +#[derive(Debug)] +pub struct FilterEntry { + it: I, + predicate: P, +} + +impl

Iterator for FilterEntry +where + P: FnMut(&DirEntry) -> bool, +{ + type Item = Result; + + /// Advances the iterator and returns the next value. + /// + /// # Errors + /// + /// If the iterator fails to retrieve the next value, this method returns + /// an error value. The error will be wrapped in an `Option::Some`. + fn next(&mut self) -> Option> { + unimplemented!() + } +} -- cgit v1.2.3