Welcome to mirror list, hosted at ThFree Co, Russian Federation.

mod.rs « linux « os « src - github.com/windirstat/walkdir.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: b6255ad052eea2bae05d9b6885d2d5a83d942c21 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
/*!
Low level Linux specific APIs for reading directory entries via `getdents64`.
*/

use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
use std::ffi::{CStr, CString, OsStr};
use std::fmt;
use std::io;
use std::mem;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::PathBuf;
use std::ptr::NonNull;

use libc::{syscall, SYS_getdents64};

use crate::os::linux::dirent::RawDirEntry;
use crate::os::unix::{
    errno, escaped_bytes, DirEntry as UnixDirEntry, DirFd, FileType,
};

mod dirent;

/// A safe function for calling Linux's `getdents64` API.
///
/// The basic idea of `getdents` is that it executes a single syscall but
/// returns potentially many directory entries in a single buffer. This can
/// provide a small speed boost when compared with the typical `readdir` POSIX
/// API, depending on your platform's implementation.
///
/// This routine will read directory entries from the given file descriptor
/// into the given cursor. The cursor can then be used to cheaply and safely
/// iterate over the directory entries that were read.
///
/// When all directory entries have been read from the given file descriptor,
/// then this function will return `false`. Otherwise, it returns `true`.
///
/// If there was a problem calling the underlying `getdents64` syscall, then
/// an error is returned.
pub fn getdents(fd: RawFd, cursor: &mut DirEntryCursor) -> io::Result<bool> {
    cursor.clear();
    let res = unsafe {
        syscall(
            SYS_getdents64,
            fd,
            cursor.raw.as_ptr() as *mut RawDirEntry,
            cursor.capacity,
        )
    };
    match res {
        -1 => Err(io::Error::last_os_error()),
        0 => Ok(false),
        nwritten => {
            cursor.len = nwritten as usize;
            Ok(true)
        }
    }
}

/// A Linux specific directory entry.
///
/// This directory entry is just like the Unix `DirEntry`, except its file
/// name is borrowed from a `DirEntryCursor`'s internal buffer. This makes
/// it possible to iterate over directory entries on Linux by reusing the
/// cursor's internal buffer with no additional allocations or copying.
///
/// In practice, if one needs an owned directory entry, then convert it to a
/// Unix `DirEntry` either via the Unix methods on this `DirEntry`, or by
/// simply reading a Unix `DirEntry` directly from `DirEntryCursor`.
#[derive(Clone)]
pub struct DirEntry<'a> {
    /// A borrowed version of the `d_name` field found in the raw directory
    /// entry. This field is the only reason why this type exists, otherwise
    /// we'd just expose `RawDirEntry` directly to users. The issue with
    /// exposing the raw directory entry is that its size isn't correct (since
    /// the file name may extend beyond the end of the struct).
    ///
    /// This borrow ties this entry to the `DirEntryBuffer`.
    file_name: &'a CStr,
    /// The file type, as is, from the raw dirent.
    file_type: Option<FileType>,
    /// The file serial number, as is, from the raw dirent.
    ino: u64,
}

impl<'a> fmt::Debug for DirEntry<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use crate::os::unix::escaped_bytes;

        f.debug_struct("DirEntry")
            .field("file_name", &escaped_bytes(self.file_name_bytes()))
            .field("file_type", &self.file_type)
            .field("ino", &self.ino)
            .finish()
    }
}

impl<'a> DirEntry<'a> {
    /// Return the file name in this directory entry as a C string.
    #[inline]
    pub fn file_name(&self) -> &CStr {
        self.file_name
    }

    /// Return the file name in this directory entry as raw bytes without
    /// a `NUL` terminator.
    #[inline]
    pub fn file_name_bytes(&self) -> &[u8] {
        self.file_name.to_bytes()
    }

    /// Return the file name in this directory entry as an OS string without
    /// a `NUL` terminator.
    #[inline]
    pub fn file_name_os(&self) -> &OsStr {
        OsStr::from_bytes(self.file_name_bytes())
    }

    /// Return the file type of this directory entry, if one exists.
    ///
    /// A file type may not exist if the underlying file system reports an
    /// unknown file type in the directory entry.
    #[inline]
    pub fn file_type(&self) -> Option<FileType> {
        self.file_type
    }

    /// Returns the underlying file serial number for this directory entry.
    #[inline]
    pub fn ino(&self) -> u64 {
        self.ino
    }

    /// Convert this directory entry into an owned Unix `DirEntry`. If you
    /// want to be able to reuse allocations, then use `write_to_unix` instead.
    #[inline]
    pub fn to_unix(&self) -> UnixDirEntry {
        let mut ent = UnixDirEntry::empty();
        self.write_to_unix(&mut ent);
        ent
    }

    /// Write this directory entry into the given Unix `DirEntry`. This makes
    /// it possible to amortize allocation.
    #[inline]
    pub fn write_to_unix(&self, unix_dirent: &mut UnixDirEntry) {
        unix_dirent.from_linux_raw(self)
    }
}

/// A cursor for reading directory entries from a `getdents` buffer.
///
/// This cursor allocates space internally for storing one or more Linux
/// directory entries, and exposes an API for cheaply iterating over those
/// directory entries.
///
/// A cursor can and should be reused across multiple calls to `getdents`. A
/// cursor is not tied to any one particular directory.
#[derive(Clone, Debug)]
pub struct DirEntryCursor {
    /// Spiritually, this is a *mut RawDirEntry. Unfortunately, this doesn't
    /// quite make sense since a value with type `RawDirEntry` does not
    /// actually have a size of `size_of::<RawDirEntry>()` due to the way in
    /// which the entry's name is stored in a flexible array member.
    ///
    /// With that said, we do transmute bytes in this buffer to a
    /// `RawDirEntry`, which lets us read the members of the struct (including
    /// the flexible array member) correctly. However, because of that, we need
    /// to make sure our memory has the correct alignment. Hence, this is why
    /// we use a raw `*mut u8` created by the std::alloc APIs. If there was an
    /// easy way to control alignment with a `Vec<u8>`, then we could use that
    /// instead. (It is indeed possible, but seems fragile.)
    ///
    /// Since a `RawDirEntry` is inherently unsafe to use because of its
    /// flexible array member, it is converted to a `DirEntry` (cheaply,
    /// without allocation) before being exposed to the caller.
    raw: NonNull<u8>,
    /// The lenth, in bytes, of all valid entries in `raw`.
    len: usize,
    /// The lenth, in bytes, of `raw`.
    capacity: usize,
    /// The current position of this buffer as a pointer into `raw`.
    cursor: NonNull<u8>,
}

impl Drop for DirEntryCursor {
    fn drop(&mut self) {
        unsafe {
            dealloc(self.raw.as_ptr(), layout(self.capacity));
        }
    }
}

/// Returns the allocation layout used for constructing the getdents buffer
/// with the given capacity (in bytes).
///
/// This panics if the given length isn't a multiple of the alignment of
/// `RawDirEntry` or is `0`.
fn layout(capacity: usize) -> Layout {
    let align = mem::align_of::<RawDirEntry>();
    assert!(capacity > 0, "capacity must be greater than 0");
    assert!(capacity % align == 0, "capacity must be a multiple of alignment");
    Layout::from_size_align(capacity, align).expect("failed to create Layout")
}

impl DirEntryCursor {
    /// Create a new cursor for reading directory entries.
    ///
    /// It is beneficial to reuse a cursor in multiple calls to `getdents`. A
    /// cursor can be used with any number of directories.
    pub fn new() -> DirEntryCursor {
        DirEntryCursor::with_capacity(32 * (1 << 10))
    }

    /// Create a new cursor with the specified capacity. The capacity given
    /// should be in bytes, and must be a multiple of the alignment of a raw
    /// directory entry.
    fn with_capacity(capacity: usize) -> DirEntryCursor {
        // TODO: It would be nice to expose a way to control the capacity to
        // the caller, but we'd really like the capacity to be a multiple of
        // the alignment. (Technically, the only restriction is that
        // the capacity and the alignment have a least common multiple that
        // doesn't overflow `usize::MAX`. But requiring the size to be a
        // multiple of alignment just seems like good sense in this case.)
        //
        // Anyway, exposing raw capacity to the caller is weird, because they
        // shouldn't need to care about the alignment of an internal type.
        // We *could* expose capacity in "units" of `RawDirEntry` itself, but
        // even this is somewhat incorrect because the size of `RawDirEntry`
        // is smaller than what it typically is, since the size doesn't account
        // for file names. We could just pick a fixed approximate size for
        // file names and add that to the size of `RawDirEntry`. But let's wait
        // for a more concrete use case to emerge before exposing anything.
        let lay = layout(capacity);
        let raw = match NonNull::new(unsafe { alloc_zeroed(lay) }) {
            Some(raw) => raw,
            None => handle_alloc_error(lay),
        };
        DirEntryCursor { raw, len: 0, capacity, cursor: raw }
    }

    /// Read the next directory entry from this cursor. If the cursor has been
    /// exhausted, then return `None`.
    ///
    /// The returned directory entry contains a file name that is borrowed from
    /// this cursor's internal buffer. In particular, no allocation is
    /// performed by this routine. If you need an owned directory entry, then
    /// use `read_unix` or `read_unix_into`.
    ///
    /// Note that no filtering of entries (such as `.` and `..`) is performed.
    pub fn read<'a>(&'a mut self) -> Option<DirEntry<'a>> {
        if self.cursor.as_ptr() >= self.raw.as_ptr().wrapping_add(self.len) {
            return None;
        }
        // SAFETY: This is safe by the contract of getdents64. Namely, that it
        // writes structures of type `RawDirEntry` to `raw`. The lifetime of
        // this raw dirent is also tied to this buffer via the type signature
        // of this method, which prevents use-after-free. Moreover, our
        // allocation layout guarantees that the cursor is correctly aligned
        // for RawDirEntry.
        let raw_dirent =
            unsafe { &*(self.cursor.as_ptr() as *const RawDirEntry) };
        let ent = DirEntry {
            // SAFETY: This is safe since we are asking for the file name on a
            // `RawDirEntry` that resides in its original buffer.
            file_name: unsafe { raw_dirent.file_name() },
            file_type: raw_dirent.file_type(),
            ino: raw_dirent.ino(),
        };
        // SAFETY: This is safe by the assumption that `d_reclen` on the raw
        // dirent is correct.
        self.cursor = unsafe {
            let next = self.cursor.as_ptr().add(raw_dirent.record_len());
            NonNull::new_unchecked(next)
        };
        Some(ent)
    }

    /// Read the next directory entry from this cursor as an owned Unix
    /// `DirEntry`. If the cursor has been exhausted, then return `None`.
    ///
    /// This will allocate new space to store the file name in the directory
    /// entry. To reuse a previous allocation, use `read_unix_into` instead.
    ///
    /// Note that no filtering of entries (such as `.` and `..`) is performed.
    pub fn read_unix(&mut self) -> Option<UnixDirEntry> {
        self.read().map(|ent| ent.to_unix())
    }

    /// Read the next directory entry from this cursor into the given Unix
    /// `DirEntry`. If the cursor has been exhausted, then return `false`.
    /// Otherwise return `true`.
    ///
    /// Note that no filtering of entries (such as `.` and `..`) is performed.
    pub fn read_unix_into(&mut self, unix_dirent: &mut UnixDirEntry) -> bool {
        match self.read() {
            None => false,
            Some(dent) => {
                dent.write_to_unix(unix_dirent);
                true
            }
        }
    }

    /// Rewind this cursor such that it points to the first directory entry.
    pub fn rewind(&mut self) {
        self.cursor = self.raw;
    }

    /// Clear this cursor such that it has no entries.
    fn clear(&mut self) {
        self.cursor = self.raw;
        self.len = 0;
    }
}