Welcome to mirror list, hosted at ThFree Co, Russian Federation.

rawpath.rs « unix « os « src - github.com/windirstat/walkdir.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 170c44c8df83ae03eca24fad7e449d793af3148c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
use std::ffi::{CStr, CString, OsStr, OsString};
use std::fmt;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::path::{Path, PathBuf};

// Currently, these types are not exported in the public API of this crate,
// even though they (or something like them) are seemingly necessary to
// implement recursive directory traversal without superfluous allocations.
// Figuring out how to expose them is tricky, since invariably, they _aren't_
// the same type with the same API. So they wind up being a hazard if one
// accidentally tries to treat them as a platform independent type.

/// A platform dependent representation of a file path.
///
/// Unlike Rust's standard library `PathBuf`, a `RawPathBuf` uses the same
/// in-memory representation of a file path as the platform itself. Moreover,
/// the APIs of each `RawPathBuf` are also platform dependent. For example,
/// on Unix, a `RawPathBuf` can be cheaply converted between types such as
/// `Vec<u8>` and `CString`. But on Windows, since its internal representation
/// is a sequence of 16-bit integers, these conversions are not available.
#[derive(Clone)]
pub struct RawPathBuf {
    /// Buf always has length at least 1 and always ends with a zero byte.
    /// Buf only ever contains exactly 1 zero byte. (i.e., no interior NULs.)
    buf: Vec<u8>,
}

impl fmt::Debug for RawPathBuf {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use crate::os::unix::escaped_bytes;

        f.debug_struct("RawPathBuf")
            .field("buf", &escaped_bytes(self.as_code_units()))
            .finish()
    }
}

impl<'a> From<&'a str> for RawPathBuf {
    fn from(s: &'a str) -> RawPathBuf {
        RawPathBuf::from(s.to_string())
    }
}

impl From<String> for RawPathBuf {
    fn from(s: String) -> RawPathBuf {
        let mut buf = s.into_bytes();
        buf.push(0);
        RawPathBuf { buf }
    }
}

impl From<CString> for RawPathBuf {
    fn from(cstr: CString) -> RawPathBuf {
        RawPathBuf { buf: cstr.into_bytes_with_nul() }
    }
}

impl From<RawPathBuf> for CString {
    fn from(rawp: RawPathBuf) -> CString {
        // SAFETY: Our internal buffer is guaranteed to end with a NUL and have
        // no interior NULs.
        unsafe { CString::from_vec_unchecked(rawp.buf) }
    }
}

impl From<OsString> for RawPathBuf {
    fn from(osstr: OsString) -> RawPathBuf {
        let mut buf = osstr.into_vec();
        buf.push(0);
        RawPathBuf { buf }
    }
}

impl From<RawPathBuf> for OsString {
    fn from(mut rawp: RawPathBuf) -> OsString {
        // SAFETY: We are dropping this raw path and converting it into an
        // OS string, which has no NUL terminator.
        unsafe {
            rawp.drop_nul();
        }
        OsString::from_vec(rawp.buf)
    }
}

impl From<PathBuf> for RawPathBuf {
    fn from(path: PathBuf) -> RawPathBuf {
        RawPathBuf::from(path.into_os_string())
    }
}

impl From<RawPathBuf> for PathBuf {
    fn from(rawp: RawPathBuf) -> PathBuf {
        PathBuf::from(OsString::from(rawp))
    }
}

impl RawPathBuf {
    /// Returns the code units (bytes) of this path without the NUL terminator.
    pub fn as_code_units(&self) -> &[u8] {
        &self.buf[..self.buf.len() - 1]
    }

    /// Returns this raw path as a C string slice.
    pub fn as_cstr(&self) -> &CStr {
        // SAFETY: buf is guaranteed to have a NUL terminator with no interior
        // NULs.
        unsafe { CStr::from_bytes_with_nul_unchecked(&self.buf) }
    }

    /// Returns this raw path as a OS string slice.
    pub fn as_os_str(&self) -> &OsStr {
        OsStr::from_bytes(self.as_code_units())
    }

    /// Return this raw path as a standard library path.
    pub fn as_path(&self) -> &Path {
        Path::new(self.as_os_str())
    }

    /// Push the given C string slice to the end of this path.
    pub fn push_cstr(&mut self, slice: &CStr) {
        // SAFETY: The internal buffer is guaranteed to have a NUL byte at
        // this point, and we always add it back below via the CStr's NUL
        // byte.
        unsafe {
            self.drop_nul();
        }
        self.buf.extend_from_slice(slice.to_bytes_with_nul());
    }

    /// Join the given C string slice to this path in place via a path
    /// separator.
    ///
    /// If this path ends with a `/`, and/or if name starts with a `/`, then
    /// only one separator will be used to join them. This otherwise does no
    /// other normalization. e.g., joining `a/b//` with `/c` will result in
    /// `a/b//c`.
    pub fn join(&mut self, name: &CStr) {
        // SAFETY: The internal buffer is guaranteed to have a NUL byte at
        // this point, and we always add it back below via the CStr's NUL
        // byte.
        unsafe {
            self.drop_nul();
        }
        if self.buf.last() != Some(&b'/') {
            self.buf.push(b'/');
        }
        if name.to_bytes().get(0) == Some(&b'/') {
            debug_assert_eq!(self.buf.last(), Some(&b'/'));
            self.buf.pop();
        }
        self.buf.extend_from_slice(name.to_bytes_with_nul());
    }

    /// Pop the last element in this path. Return true if an element was
    /// popped. An element isn't popped if the path is empty or represents
    /// a root path.
    pub fn pop(&mut self) -> bool {
        // Move backwards through the path, finding the first location that
        // ends the parent element, if one exists. Basically, we want to
        // implement the following regex:
        //
        //     ^.*?(/*[^/]+/*)$
        //
        // Where everything in the capturing group is deleted.

        // First, start by skipping through all repeated separators in reverse.
        let mut new_len = self.buf.len() - 1;
        while new_len > 0 && self.buf[new_len - 1] == b'/' {
            new_len -= 1;
        }
        // The path is either empty, or just made up of separators.
        if new_len == 0 {
            return false;
        }
        // Now find either the first preceding / or the beginning.
        while new_len > 0 && self.buf[new_len - 1] != b'/' {
            new_len -= 1;
        }
        // And now finally, remove all trailing separators.
        // But we're careful not to remove a root slash if it's present.
        while new_len > 1 && self.buf[new_len - 1] == b'/' {
            new_len -= 1;
        }
        self.buf[new_len] = 0;

        // SAFETY: This is safe because our buffer contains Copy data and
        // `new_len + 1` is guaranteed to be <= the original length of the
        // buffer. Therefore, we do not need to worry about unitialized data.
        unsafe {
            self.buf.set_len(new_len + 1);
        }
        true
    }

    /// Drop the trailing NUL byte from the internal buffer in place.
    ///
    /// # Safety
    ///
    /// This is unsafe to call because it removes the NUL byte from the buffer,
    /// which is necessary for safety in many contexts.
    ///
    /// When callers use this method, they MUST ensure that a NUL byte is
    /// added back to the internal buffer before its absence can be observed
    /// by callers.
    ///
    /// Callers must also never call this method if the NUL byte has already
    /// been removed.
    unsafe fn drop_nul(&mut self) {
        // SAFETY: This is safe since the new length is always <= than the
        // old length, and thus there are no initialization worries. Moreover,
        // since the buffer stores Copy data, there are no leaks.
        debug_assert_eq!(*self.buf.last().unwrap(), 0);
        self.buf.set_len(self.buf.len() - 1);
    }

    /// Add a trailing NUL byte to the internal buffer.
    ///
    /// # Safety
    ///
    /// This is unsafe to call because it could create an interior NUL byte
    /// if the internal buffer already ends with a NUL byte. Therefore, this
    /// must only be called when the caller knows that the buffer does not end
    /// with a NUL byte.
    unsafe fn add_nul(&mut self) {
        debug_assert_ne!(*self.buf.last().unwrap(), 0);
        self.buf.push(0);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::ffi::CStr;

    fn tostr(p: &RawPathBuf) -> &str {
        std::str::from_utf8(p.as_code_units()).unwrap()
    }

    fn cstr(s: &str) -> &CStr {
        CStr::from_bytes_with_nul(s.as_bytes()).unwrap()
    }

    #[test]
    fn push1() {
        let mut p = RawPathBuf::from("a/b");
        p.join(cstr("c\0"));
        assert_eq!("a/b/c", tostr(&p));
    }

    #[test]
    fn push2() {
        let mut p = RawPathBuf::from("a/b/");
        p.join(cstr("c\0"));
        assert_eq!("a/b/c", tostr(&p));
    }

    #[test]
    fn push3() {
        let mut p = RawPathBuf::from("a/b");
        p.join(cstr("/c\0"));
        assert_eq!("a/b/c", tostr(&p));
    }

    #[test]
    fn push4() {
        let mut p = RawPathBuf::from("a/b/");
        p.join(cstr("/c\0"));
        assert_eq!("a/b/c", tostr(&p));
    }

    #[test]
    fn push5() {
        let mut p = RawPathBuf::from("a/b//");
        p.join(cstr("/c\0"));
        assert_eq!("a/b//c", tostr(&p));
    }

    #[test]
    fn pop1() {
        let mut p = RawPathBuf::from("/foo/bar////baz/");

        assert!(p.pop());
        assert_eq!("/foo/bar", tostr(&p));

        assert!(p.pop());
        assert_eq!("/foo", tostr(&p));

        assert!(p.pop());
        assert_eq!("/", tostr(&p));

        assert!(!p.pop());
        assert_eq!("/", tostr(&p));
    }

    #[test]
    fn pop2() {
        let mut p = RawPathBuf::from("////foo/");

        assert!(p.pop());
        assert_eq!("/", tostr(&p));

        assert!(!p.pop());
        assert_eq!("/", tostr(&p));
    }

    #[test]
    fn pop3() {
        let mut p = RawPathBuf::from("foo/bar/baz");

        assert!(p.pop());
        assert_eq!("foo/bar", tostr(&p));

        assert!(p.pop());
        assert_eq!("foo", tostr(&p));

        assert!(p.pop());
        assert_eq!("", tostr(&p));

        assert!(!p.pop());
        assert_eq!("", tostr(&p));
    }

    #[test]
    fn pop4() {
        let mut p = RawPathBuf::from("////");

        assert!(!p.pop());
        assert_eq!("////", tostr(&p));
    }

    #[test]
    fn pop5() {
        let mut p = RawPathBuf::from("////a");

        assert!(p.pop());
        assert_eq!("/", tostr(&p));

        assert!(!p.pop());
        assert_eq!("/", tostr(&p));
    }

    #[test]
    fn pop6() {
        let mut p = RawPathBuf::from("foo");

        assert!(p.pop());
        assert_eq!("", tostr(&p));

        assert!(!p.pop());
        assert_eq!("", tostr(&p));
    }
}