Welcome to mirror list, hosted at ThFree Co, Russian Federation.

mft.rs « src - github.com/windirstat/mft.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 9d78543d14ffb79d3b4537aae3fc1b661244d443 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
use crate::entry::MftEntry;
use crate::err::{self, Result};

use crate::{EntryHeader, ReadSeek};
use log::{debug, trace};
use snafu::ResultExt;

use lru::LruCache;
use std::fs::{self, File};
use std::io::{BufReader, Cursor, SeekFrom};
use std::path::{Path, PathBuf};

pub struct MftParser<T: ReadSeek> {
    data: T,
    /// Entry size is present in the volume header, but this is not available to us.
    /// Instead this will be guessed by the entry size of the first entry.
    entry_size: u32,
    size: u64,
    entries_cache: LruCache<u64, PathBuf>,
}

impl MftParser<BufReader<File>> {
    /// Instantiates an instance of the parser from a file path.
    /// Does not mutate the file contents in any way.
    pub fn from_path(filename: impl AsRef<Path>) -> Result<Self> {
        let f = filename.as_ref();

        let mft_fh = File::open(f).context(err::FailedToOpenFile { path: f.to_owned() })?;
        let size = fs::metadata(f)?.len();

        Self::from_read_seek(BufReader::with_capacity(4096, mft_fh), Some(size))
    }
}

impl MftParser<Cursor<Vec<u8>>> {
    /// Instantiates an instance of the parser from a buffer containing a full MFT file.
    /// Useful for testing.
    pub fn from_buffer(buffer: Vec<u8>) -> Result<Self> {
        let size = buffer.len() as u64;
        let cursor = Cursor::new(buffer);

        Self::from_read_seek(cursor, Some(size))
    }
}

impl<T: ReadSeek> MftParser<T> {
    pub fn from_read_seek(mut data: T, size: Option<u64>) -> Result<Self> {
        // We use the first entry to guess the entry size for all the other records.
        let first_entry = EntryHeader::from_reader(&mut data, 0)?;

        let size = match size {
            Some(sz) => sz,
            None => data.seek(SeekFrom::End(0))?,
        };

        data.seek(SeekFrom::Start(0))?;

        Ok(Self {
            data,
            entry_size: first_entry.total_entry_size,
            size,
            entries_cache: LruCache::new(1000),
        })
    }

    pub fn get_entry_count(&self) -> u64 {
        self.size / u64::from(self.entry_size)
    }

    /// Reads an entry from the MFT by entry number.
    pub fn get_entry(&mut self, entry_number: u64) -> Result<MftEntry> {
        debug!("Reading entry {}", entry_number);

        self.data
            .seek(SeekFrom::Start(entry_number * u64::from(self.entry_size)))?;
        let mut entry_buffer = vec![0; self.entry_size as usize];

        self.data.read_exact(&mut entry_buffer)?;

        Ok(MftEntry::from_buffer(entry_buffer, entry_number)?)
    }

    /// Iterates over all the entries in the MFT.
    pub fn iter_entries(&mut self) -> impl Iterator<Item = Result<MftEntry>> + '_ {
        let total_entries = self.get_entry_count();

        (0..total_entries).map(move |i| self.get_entry(i))
    }

    fn inner_get_entry(&mut self, parent_entry_id: u64, entry_name: Option<&str>) -> PathBuf {
        let cached_entry = self.entries_cache.get(&parent_entry_id);

        // If my parent path is known, then my path is parent's full path + my name.
        // Else, retrieve and cache my parent's path.
        if let Some(cached_parent_path) = cached_entry {
            match entry_name {
                Some(name) => cached_parent_path.clone().join(name),
                None => cached_parent_path.clone(),
            }
        } else {
            let path = match self.get_entry(parent_entry_id).ok() {
                Some(parent) => match self.get_full_path_for_entry(&parent) {
                    Ok(Some(path)) => path,
                    // I have a parent, which doesn't have a filename attribute.
                    // Default to root.
                    _ => PathBuf::new(),
                },
                // Parent is maybe corrupted or incomplete, use a sentinel instead.
                None => PathBuf::from("[Unknown]"),
            };

            self.entries_cache.put(parent_entry_id, path.clone());
            match entry_name {
                Some(name) => path.join(name),
                None => path,
            }
        }
    }

    /// Gets the full path for an entry.
    /// Caches computations.
    pub fn get_full_path_for_entry(&mut self, entry: &MftEntry) -> Result<Option<PathBuf>> {
        let entry_id = entry.header.record_number;
        match entry.find_best_name_attribute() {
            Some(filename_header) => {
                let parent_entry_id = filename_header.parent.entry;

                // MFT entry 5 is the root path.
                if parent_entry_id == 5 {
                    return Ok(Some(PathBuf::from(filename_header.name)));
                }

                if parent_entry_id == entry_id {
                    trace!(
                        "Found self-referential file path, for entry ID {}",
                        entry_id
                    );
                    return Ok(Some(PathBuf::from("[Orphaned]").join(filename_header.name)));
                }

                if parent_entry_id > 0 {
                    Ok(Some(self.inner_get_entry(
                        parent_entry_id,
                        Some(&filename_header.name),
                    )))
                } else {
                    trace!("Found orphaned entry ID {}", entry_id);

                    let orphan = PathBuf::from("[Orphaned]").join(filename_header.name);

                    self.entries_cache
                        .put(entry.header.record_number, orphan.clone());

                    Ok(Some(orphan))
                }
            }
            None => match entry.header.base_reference.entry {
                // I don't have a parent reference, and no X30 attribute. Though luck.
                0 => Ok(None),
                parent_entry_id => Ok(Some(self.inner_get_entry(parent_entry_id, None))),
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::tests::fixtures::mft_sample;
    use crate::{MftEntry, MftParser};

    // entrypoint for clion profiler.
    #[test]
    fn test_process_90_mft_entries() {
        let sample = mft_sample();

        let mut parser = MftParser::from_path(sample).unwrap();

        let mut count = 0;
        for record in parser.iter_entries().take(10000).filter_map(|a| a.ok()) {
            for _attribute in record.iter_attributes() {
                count += 1;
            }
        }
    }

    #[test]
    fn test_get_full_path() {
        let sample = mft_sample();
        let mut parser = MftParser::from_path(sample).unwrap();

        let mut paths = Vec::with_capacity(1000);
        let entries: Vec<MftEntry> = parser
            .iter_entries()
            .take(1000)
            .filter_map(Result::ok)
            .collect();

        for entry in entries {
            if let Some(path) = parser.get_full_path_for_entry(&entry).unwrap() {
                paths.push(path)
            }
        }

        assert_eq!(paths.len(), 988);
    }

    #[test]
    fn test_get_full_name() {
        let sample = mft_sample();
        let mut parser = MftParser::from_path(sample).unwrap();

        let e = parser.get_entry(5).unwrap();
        parser.get_full_path_for_entry(&e).unwrap();
    }
}