Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/walkdir.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/os/linux/mod.rs')
-rw-r--r--src/os/linux/mod.rs315
1 files changed, 315 insertions, 0 deletions
diff --git a/src/os/linux/mod.rs b/src/os/linux/mod.rs
new file mode 100644
index 0000000..b6255ad
--- /dev/null
+++ b/src/os/linux/mod.rs
@@ -0,0 +1,315 @@
+/*!
+Low level Linux specific APIs for reading directory entries via `getdents64`.
+*/
+
+use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
+use std::ffi::{CStr, CString, OsStr};
+use std::fmt;
+use std::io;
+use std::mem;
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+use std::os::unix::io::{AsRawFd, RawFd};
+use std::path::PathBuf;
+use std::ptr::NonNull;
+
+use libc::{syscall, SYS_getdents64};
+
+use crate::os::linux::dirent::RawDirEntry;
+use crate::os::unix::{
+ errno, escaped_bytes, DirEntry as UnixDirEntry, DirFd, FileType,
+};
+
+mod dirent;
+
+/// A safe function for calling Linux's `getdents64` API.
+///
+/// The basic idea of `getdents` is that it executes a single syscall but
+/// returns potentially many directory entries in a single buffer. This can
+/// provide a small speed boost when compared with the typical `readdir` POSIX
+/// API, depending on your platform's implementation.
+///
+/// This routine will read directory entries from the given file descriptor
+/// into the given cursor. The cursor can then be used to cheaply and safely
+/// iterate over the directory entries that were read.
+///
+/// When all directory entries have been read from the given file descriptor,
+/// then this function will return `false`. Otherwise, it returns `true`.
+///
+/// If there was a problem calling the underlying `getdents64` syscall, then
+/// an error is returned.
+pub fn getdents(fd: RawFd, cursor: &mut DirEntryCursor) -> io::Result<bool> {
+ cursor.clear();
+ let res = unsafe {
+ syscall(
+ SYS_getdents64,
+ fd,
+ cursor.raw.as_ptr() as *mut RawDirEntry,
+ cursor.capacity,
+ )
+ };
+ match res {
+ -1 => Err(io::Error::last_os_error()),
+ 0 => Ok(false),
+ nwritten => {
+ cursor.len = nwritten as usize;
+ Ok(true)
+ }
+ }
+}
+
+/// A Linux specific directory entry.
+///
+/// This directory entry is just like the Unix `DirEntry`, except its file
+/// name is borrowed from a `DirEntryCursor`'s internal buffer. This makes
+/// it possible to iterate over directory entries on Linux by reusing the
+/// cursor's internal buffer with no additional allocations or copying.
+///
+/// In practice, if one needs an owned directory entry, then convert it to a
+/// Unix `DirEntry` either via the Unix methods on this `DirEntry`, or by
+/// simply reading a Unix `DirEntry` directly from `DirEntryCursor`.
+#[derive(Clone)]
+pub struct DirEntry<'a> {
+ /// A borrowed version of the `d_name` field found in the raw directory
+ /// entry. This field is the only reason why this type exists, otherwise
+ /// we'd just expose `RawDirEntry` directly to users. The issue with
+ /// exposing the raw directory entry is that its size isn't correct (since
+ /// the file name may extend beyond the end of the struct).
+ ///
+ /// This borrow ties this entry to the `DirEntryBuffer`.
+ file_name: &'a CStr,
+ /// The file type, as is, from the raw dirent.
+ file_type: Option<FileType>,
+ /// The file serial number, as is, from the raw dirent.
+ ino: u64,
+}
+
+impl<'a> fmt::Debug for DirEntry<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use crate::os::unix::escaped_bytes;
+
+ f.debug_struct("DirEntry")
+ .field("file_name", &escaped_bytes(self.file_name_bytes()))
+ .field("file_type", &self.file_type)
+ .field("ino", &self.ino)
+ .finish()
+ }
+}
+
+impl<'a> DirEntry<'a> {
+ /// Return the file name in this directory entry as a C string.
+ #[inline]
+ pub fn file_name(&self) -> &CStr {
+ self.file_name
+ }
+
+ /// Return the file name in this directory entry as raw bytes without
+ /// a `NUL` terminator.
+ #[inline]
+ pub fn file_name_bytes(&self) -> &[u8] {
+ self.file_name.to_bytes()
+ }
+
+ /// Return the file name in this directory entry as an OS string without
+ /// a `NUL` terminator.
+ #[inline]
+ pub fn file_name_os(&self) -> &OsStr {
+ OsStr::from_bytes(self.file_name_bytes())
+ }
+
+ /// Return the file type of this directory entry, if one exists.
+ ///
+ /// A file type may not exist if the underlying file system reports an
+ /// unknown file type in the directory entry.
+ #[inline]
+ pub fn file_type(&self) -> Option<FileType> {
+ self.file_type
+ }
+
+ /// Returns the underlying file serial number for this directory entry.
+ #[inline]
+ pub fn ino(&self) -> u64 {
+ self.ino
+ }
+
+ /// Convert this directory entry into an owned Unix `DirEntry`. If you
+ /// want to be able to reuse allocations, then use `write_to_unix` instead.
+ #[inline]
+ pub fn to_unix(&self) -> UnixDirEntry {
+ let mut ent = UnixDirEntry::empty();
+ self.write_to_unix(&mut ent);
+ ent
+ }
+
+ /// Write this directory entry into the given Unix `DirEntry`. This makes
+ /// it possible to amortize allocation.
+ #[inline]
+ pub fn write_to_unix(&self, unix_dirent: &mut UnixDirEntry) {
+ unix_dirent.from_linux_raw(self)
+ }
+}
+
+/// A cursor for reading directory entries from a `getdents` buffer.
+///
+/// This cursor allocates space internally for storing one or more Linux
+/// directory entries, and exposes an API for cheaply iterating over those
+/// directory entries.
+///
+/// A cursor can and should be reused across multiple calls to `getdents`. A
+/// cursor is not tied to any one particular directory.
+#[derive(Clone, Debug)]
+pub struct DirEntryCursor {
+ /// Spiritually, this is a *mut RawDirEntry. Unfortunately, this doesn't
+ /// quite make sense since a value with type `RawDirEntry` does not
+ /// actually have a size of `size_of::<RawDirEntry>()` due to the way in
+ /// which the entry's name is stored in a flexible array member.
+ ///
+ /// With that said, we do transmute bytes in this buffer to a
+ /// `RawDirEntry`, which lets us read the members of the struct (including
+ /// the flexible array member) correctly. However, because of that, we need
+ /// to make sure our memory has the correct alignment. Hence, this is why
+ /// we use a raw `*mut u8` created by the std::alloc APIs. If there was an
+ /// easy way to control alignment with a `Vec<u8>`, then we could use that
+ /// instead. (It is indeed possible, but seems fragile.)
+ ///
+ /// Since a `RawDirEntry` is inherently unsafe to use because of its
+ /// flexible array member, it is converted to a `DirEntry` (cheaply,
+ /// without allocation) before being exposed to the caller.
+ raw: NonNull<u8>,
+ /// The lenth, in bytes, of all valid entries in `raw`.
+ len: usize,
+ /// The lenth, in bytes, of `raw`.
+ capacity: usize,
+ /// The current position of this buffer as a pointer into `raw`.
+ cursor: NonNull<u8>,
+}
+
+impl Drop for DirEntryCursor {
+ fn drop(&mut self) {
+ unsafe {
+ dealloc(self.raw.as_ptr(), layout(self.capacity));
+ }
+ }
+}
+
+/// Returns the allocation layout used for constructing the getdents buffer
+/// with the given capacity (in bytes).
+///
+/// This panics if the given length isn't a multiple of the alignment of
+/// `RawDirEntry` or is `0`.
+fn layout(capacity: usize) -> Layout {
+ let align = mem::align_of::<RawDirEntry>();
+ assert!(capacity > 0, "capacity must be greater than 0");
+ assert!(capacity % align == 0, "capacity must be a multiple of alignment");
+ Layout::from_size_align(capacity, align).expect("failed to create Layout")
+}
+
+impl DirEntryCursor {
+ /// Create a new cursor for reading directory entries.
+ ///
+ /// It is beneficial to reuse a cursor in multiple calls to `getdents`. A
+ /// cursor can be used with any number of directories.
+ pub fn new() -> DirEntryCursor {
+ DirEntryCursor::with_capacity(32 * (1 << 10))
+ }
+
+ /// Create a new cursor with the specified capacity. The capacity given
+ /// should be in bytes, and must be a multiple of the alignment of a raw
+ /// directory entry.
+ fn with_capacity(capacity: usize) -> DirEntryCursor {
+ // TODO: It would be nice to expose a way to control the capacity to
+ // the caller, but we'd really like the capacity to be a multiple of
+ // the alignment. (Technically, the only restriction is that
+ // the capacity and the alignment have a least common multiple that
+ // doesn't overflow `usize::MAX`. But requiring the size to be a
+ // multiple of alignment just seems like good sense in this case.)
+ //
+ // Anyway, exposing raw capacity to the caller is weird, because they
+ // shouldn't need to care about the alignment of an internal type.
+ // We *could* expose capacity in "units" of `RawDirEntry` itself, but
+ // even this is somewhat incorrect because the size of `RawDirEntry`
+ // is smaller than what it typically is, since the size doesn't account
+ // for file names. We could just pick a fixed approximate size for
+ // file names and add that to the size of `RawDirEntry`. But let's wait
+ // for a more concrete use case to emerge before exposing anything.
+ let lay = layout(capacity);
+ let raw = match NonNull::new(unsafe { alloc_zeroed(lay) }) {
+ Some(raw) => raw,
+ None => handle_alloc_error(lay),
+ };
+ DirEntryCursor { raw, len: 0, capacity, cursor: raw }
+ }
+
+ /// Read the next directory entry from this cursor. If the cursor has been
+ /// exhausted, then return `None`.
+ ///
+ /// The returned directory entry contains a file name that is borrowed from
+ /// this cursor's internal buffer. In particular, no allocation is
+ /// performed by this routine. If you need an owned directory entry, then
+ /// use `read_unix` or `read_unix_into`.
+ ///
+ /// Note that no filtering of entries (such as `.` and `..`) is performed.
+ pub fn read<'a>(&'a mut self) -> Option<DirEntry<'a>> {
+ if self.cursor.as_ptr() >= self.raw.as_ptr().wrapping_add(self.len) {
+ return None;
+ }
+ // SAFETY: This is safe by the contract of getdents64. Namely, that it
+ // writes structures of type `RawDirEntry` to `raw`. The lifetime of
+ // this raw dirent is also tied to this buffer via the type signature
+ // of this method, which prevents use-after-free. Moreover, our
+ // allocation layout guarantees that the cursor is correctly aligned
+ // for RawDirEntry.
+ let raw_dirent =
+ unsafe { &*(self.cursor.as_ptr() as *const RawDirEntry) };
+ let ent = DirEntry {
+ // SAFETY: This is safe since we are asking for the file name on a
+ // `RawDirEntry` that resides in its original buffer.
+ file_name: unsafe { raw_dirent.file_name() },
+ file_type: raw_dirent.file_type(),
+ ino: raw_dirent.ino(),
+ };
+ // SAFETY: This is safe by the assumption that `d_reclen` on the raw
+ // dirent is correct.
+ self.cursor = unsafe {
+ let next = self.cursor.as_ptr().add(raw_dirent.record_len());
+ NonNull::new_unchecked(next)
+ };
+ Some(ent)
+ }
+
+ /// Read the next directory entry from this cursor as an owned Unix
+ /// `DirEntry`. If the cursor has been exhausted, then return `None`.
+ ///
+ /// This will allocate new space to store the file name in the directory
+ /// entry. To reuse a previous allocation, use `read_unix_into` instead.
+ ///
+ /// Note that no filtering of entries (such as `.` and `..`) is performed.
+ pub fn read_unix(&mut self) -> Option<UnixDirEntry> {
+ self.read().map(|ent| ent.to_unix())
+ }
+
+ /// Read the next directory entry from this cursor into the given Unix
+ /// `DirEntry`. If the cursor has been exhausted, then return `false`.
+ /// Otherwise return `true`.
+ ///
+ /// Note that no filtering of entries (such as `.` and `..`) is performed.
+ pub fn read_unix_into(&mut self, unix_dirent: &mut UnixDirEntry) -> bool {
+ match self.read() {
+ None => false,
+ Some(dent) => {
+ dent.write_to_unix(unix_dirent);
+ true
+ }
+ }
+ }
+
+ /// Rewind this cursor such that it points to the first directory entry.
+ pub fn rewind(&mut self) {
+ self.cursor = self.raw;
+ }
+
+ /// Clear this cursor such that it has no entries.
+ fn clear(&mut self) {
+ self.cursor = self.raw;
+ self.len = 0;
+ }
+}