Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/ntfs.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Finck <colin@reactos.org>2021-08-02 00:18:14 +0300
committerColin Finck <colin@reactos.org>2021-08-02 00:18:14 +0300
commit6210c625de3f617b0553ae8176f2483133517161 (patch)
tree09ed0a59abd45e1c5691c652011baf2733184e8e
parent943309f22ed6e208b7dcc1b24ae8879eb88f8a39 (diff)
Implement finding files by name in a file name index.
This introduces parsing the $UpCase file to perform case-insensitive searches.
-rw-r--r--src/error.rs2
-rw-r--r--src/index.rs95
-rw-r--r--src/indexes/file_name.rs29
-rw-r--r--src/lib.rs1
-rw-r--r--src/ntfs.rs29
-rw-r--r--src/string.rs77
-rw-r--r--src/upcase_table.rs103
7 files changed, 323 insertions, 13 deletions
diff --git a/src/error.rs b/src/error.rs
index 89e8d7b..85b6d3e 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -118,6 +118,8 @@ pub enum NtfsError {
expected: &'static [u8],
actual: [u8; 2],
},
+ /// The Upcase Table should have a size of {expected} bytes, but it has {actual} bytes
+ InvalidUpcaseTableSize { expected: u64, actual: u64 },
/// The VCN {vcn} read from the NTFS data run header at byte position {position:#010x} cannot be added to the LCN {previous_lcn} calculated from previous data runs
InvalidVcnInDataRunHeader {
position: u64,
diff --git a/src/index.rs b/src/index.rs
index e6abd7a..f9769d9 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -7,6 +7,7 @@ use crate::indexes::NtfsIndexEntryType;
use crate::structured_values::{NtfsIndexAllocation, NtfsIndexRoot};
use alloc::vec::Vec;
use binread::io::{Read, Seek};
+use core::cmp::Ordering;
use core::marker::PhantomData;
pub struct NtfsIndex<'n, 'f, E>
@@ -41,6 +42,12 @@ where
})
}
+ /// Returns an [`NtfsIndexFinder`] structure to efficiently find an entry in this index.
+ pub fn finder<'i>(&'i self) -> NtfsIndexFinder<'n, 'f, 'i, E> {
+ NtfsIndexFinder::new(self)
+ }
+
+ /// Returns an [`NtfsIndexEntries`] iterator to perform an in-order traversal of this index.
pub fn iter<'i>(&'i self) -> NtfsIndexEntries<'n, 'f, 'i, E> {
NtfsIndexEntries::new(self)
}
@@ -152,3 +159,91 @@ where
Some(Ok(entry))
}
}
+
+/// Helper structure to efficiently find an entry in an index, created by [`NtfsIndex::finder`].
+///
+/// This helper is required, because the returned entry borrows from the iterator it was created from.
+/// The idea is that you copy the field(s) you need from the returned entry and then drop the entry and the finder.
+pub struct NtfsIndexFinder<'n, 'f, 'i, E>
+where
+ E: NtfsIndexEntryType,
+{
+ index: &'i NtfsIndex<'n, 'f, E>,
+ inner_iterator: IndexNodeEntryRanges<E>,
+}
+
+impl<'n, 'f, 'i, E> NtfsIndexFinder<'n, 'f, 'i, E>
+where
+ E: NtfsIndexEntryType,
+{
+ fn new(index: &'i NtfsIndex<'n, 'f, E>) -> Self {
+ // This is superfluous and done again in `find`, but doesn't justify using an `Option` here.
+ let inner_iterator = index.index_root.entry_ranges();
+
+ Self {
+ index,
+ inner_iterator,
+ }
+ }
+
+ /// Finds an entry in this index using the given comparison function and returns an [`NtfsIndexEntry`]
+ /// (if there is one).
+ pub fn find<'a, T, F>(&'a mut self, fs: &mut T, cmp: F) -> Option<Result<NtfsIndexEntry<'a, E>>>
+ where
+ T: Read + Seek,
+ F: Fn(&E::KeyType) -> Ordering,
+ {
+ // Always (re)start by iterating through the Index Root entry ranges.
+ self.inner_iterator = self.index.index_root.entry_ranges();
+
+ loop {
+ // Get the next entry.
+ //
+ // A textbook B-tree search algorithm would get the middle entry and perform binary search.
+ // But we can't do that here, as we are dealing with variable-length entries.
+ let entry_range = self.inner_iterator.next()?;
+ let entry = entry_range.to_entry(self.inner_iterator.data());
+
+ // Check if this entry has a key.
+ if let Some(key) = entry.key() {
+ // The entry has a key, so compare it using the given function.
+ let key = iter_try!(key);
+
+ match cmp(&key) {
+ Ordering::Equal => {
+ // We found what we were looking for!
+ // Recreate `entry` from the last `self.inner_iterator` to please the borrow checker.
+ let entry = entry_range.to_entry(self.inner_iterator.data());
+ return Some(Ok(entry));
+ }
+ Ordering::Less => {
+ // What we are looking for comes BEFORE this entry.
+ // Hence, it must be in a subnode of this entry and we continue below.
+ }
+ Ordering::Greater => {
+ // What we are looking for comes AFTER this entry.
+ // Keep searching on the same subnode level.
+ continue;
+ }
+ }
+ }
+
+ // Either this entry has no key (= is the last one on this subnode level) or
+ // it comes lexicographically AFTER what we're looking for.
+ // In both cases, we have to continue iterating in the subnode of this entry (if there is any).
+ let subnode_vcn = entry.subnode_vcn()?;
+ let index_allocation =
+ iter_try!(self.index.index_allocation.as_ref().ok_or_else(|| {
+ NtfsError::MissingIndexAllocation {
+ position: self.index.index_root.position(),
+ }
+ }));
+ let subnode = iter_try!(index_allocation.record_from_vcn(
+ fs,
+ &self.index.index_root,
+ subnode_vcn
+ ));
+ self.inner_iterator = subnode.into_entry_ranges();
+ }
+ }
+}
diff --git a/src/indexes/file_name.rs b/src/indexes/file_name.rs
index 069836a..5f77bc7 100644
--- a/src/indexes/file_name.rs
+++ b/src/indexes/file_name.rs
@@ -1,12 +1,41 @@
// Copyright 2021 Colin Finck <colin@reactos.org>
// SPDX-License-Identifier: GPL-2.0-or-later
+use crate::error::Result;
+use crate::index::NtfsIndexFinder;
+use crate::index_entry::NtfsIndexEntry;
use crate::indexes::{NtfsIndexEntryHasFileReference, NtfsIndexEntryType};
+use crate::ntfs::Ntfs;
+use crate::string::UpcaseOrd;
use crate::structured_values::NtfsFileName;
+use binread::io::{Read, Seek};
+/// Defines the [`NtfsIndexEntryType`] for filename indexes (more commonly known as "directories").
#[derive(Debug)]
pub struct NtfsFileNameIndex {}
+impl NtfsFileNameIndex {
+ /// Finds a file in a filename index by name and returns the [`NtfsIndexEntry`] (if any).
+ /// The name is compared case-insensitively based on the filesystem's $UpCase table.
+ ///
+ /// # Panics
+ ///
+ /// Panics if [`read_upcase_table`][Ntfs::read_upcase_table] had not been called on the passed [`Ntfs`] object.
+ pub fn find<'a, 'n, T>(
+ index_finder: &'a mut NtfsIndexFinder<Self>,
+ ntfs: &'n Ntfs,
+ fs: &mut T,
+ name: &str,
+ ) -> Option<Result<NtfsIndexEntry<'a, Self>>>
+ where
+ T: Read + Seek,
+ {
+ // TODO: This always performs a case-insensitive comparison.
+ // There are some corner cases where NTFS uses case-sensitive filenames. These need to be considered!
+ index_finder.find(fs, |file_name| name.upcase_cmp(ntfs, &file_name.name()))
+ }
+}
+
impl NtfsIndexEntryType for NtfsFileNameIndex {
type KeyType = NtfsFileName;
}
diff --git a/src/lib.rs b/src/lib.rs
index 6215182..70723fb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,6 +27,7 @@ pub mod structured_values;
mod time;
mod traits;
mod types;
+mod upcase_table;
pub use crate::attribute::*;
pub use crate::attribute_value::*;
diff --git a/src/ntfs.rs b/src/ntfs.rs
index aeb6dd8..ee2f7fc 100644
--- a/src/ntfs.rs
+++ b/src/ntfs.rs
@@ -6,6 +6,7 @@ use crate::boot_sector::BootSector;
use crate::error::{NtfsError, Result};
use crate::ntfs_file::{KnownNtfsFile, NtfsFile};
use crate::structured_values::{NtfsVolumeInformation, NtfsVolumeName};
+use crate::upcase_table::UpcaseTable;
use binread::io::{Read, Seek, SeekFrom};
use binread::BinReaderExt;
@@ -23,6 +24,8 @@ pub struct Ntfs {
file_record_size: u32,
/// Serial number of the NTFS volume.
serial_number: u64,
+ /// Table of Unicode uppercase characters (only required for case-insensitive comparisons).
+ upcase_table: Option<UpcaseTable>,
}
impl Ntfs {
@@ -42,6 +45,7 @@ impl Ntfs {
let mft_position = 0;
let file_record_size = bpb.file_record_size()?;
let serial_number = bpb.serial_number();
+ let upcase_table = None;
let mut ntfs = Self {
cluster_size,
@@ -50,6 +54,7 @@ impl Ntfs {
mft_position,
file_record_size,
serial_number,
+ upcase_table,
};
ntfs.mft_position = bpb.mft_lcn().position(&ntfs)?;
@@ -88,6 +93,19 @@ impl Ntfs {
NtfsFile::new(&self, fs, position)
}
+ /// Reads the $UpCase file from the filesystem and stores it in this [`Ntfs`] object.
+ ///
+ /// This function only needs to be called if case-insensitive comparisons are later performed
+ /// (i.e. finding files).
+ pub fn read_upcase_table<T>(&mut self, fs: &mut T) -> Result<()>
+ where
+ T: Read + Seek,
+ {
+ let upcase_table = UpcaseTable::read(self, fs)?;
+ self.upcase_table = Some(upcase_table);
+ Ok(())
+ }
+
/// Returns the root [`Dir`] of this NTFS volume.
pub fn root_dir(&self) -> ! {
panic!("TODO")
@@ -108,6 +126,17 @@ impl Ntfs {
self.size
}
+ /// Returns the stored [`UpcaseTable`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if [`read_upcase_table`][Ntfs::read_upcase_table] had not been called.
+ pub(crate) fn upcase_table(&self) -> &UpcaseTable {
+ self.upcase_table
+ .as_ref()
+ .expect("You need to call read_upcase_table first")
+ }
+
/// Returns an [`NtfsVolumeInformation`] containing general information about
/// the volume, like the NTFS version.
pub fn volume_info<T>(&self, fs: &mut T) -> Result<NtfsVolumeInformation>
diff --git a/src/string.rs b/src/string.rs
index ce0109b..2d6d4a9 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -1,10 +1,11 @@
// Copyright 2021 Colin Finck <colin@reactos.org>
// SPDX-License-Identifier: GPL-2.0-or-later
+use crate::ntfs::Ntfs;
use alloc::string::String;
use core::char;
use core::cmp::Ordering;
-use core::convert::TryInto;
+use core::convert::{identity, TryInto};
use core::fmt;
/// Zero-copy representation of a string stored in an NTFS filesystem structure.
@@ -12,17 +13,21 @@ use core::fmt;
pub struct NtfsString<'a>(pub &'a [u8]);
impl<'a> NtfsString<'a> {
- fn cmp_iter<TI, OI>(mut this_iter: TI, mut other_iter: OI) -> Ordering
+ fn cmp_iter<TI, OI, F>(mut this_iter: TI, mut other_iter: OI, code_unit_fn: F) -> Ordering
where
TI: Iterator<Item = u16>,
OI: Iterator<Item = u16>,
+ F: Fn(u16) -> u16,
{
loop {
match (this_iter.next(), other_iter.next()) {
(Some(this_code_unit), Some(other_code_unit)) => {
// We have two UTF-16 code units to compare.
- if this_code_unit != other_code_unit {
- return this_code_unit.cmp(&other_code_unit);
+ let this_upper = code_unit_fn(this_code_unit);
+ let other_upper = code_unit_fn(other_code_unit);
+
+ if this_upper != other_upper {
+ return this_upper.cmp(&other_upper);
}
}
(Some(_), None) => {
@@ -91,7 +96,7 @@ impl<'a> fmt::Display for NtfsString<'a> {
impl<'a> Ord for NtfsString<'a> {
fn cmp(&self, other: &Self) -> Ordering {
- NtfsString::cmp_iter(self.u16_iter(), other.u16_iter())
+ NtfsString::cmp_iter(self.u16_iter(), other.u16_iter(), identity)
}
}
@@ -104,25 +109,25 @@ impl<'a> PartialEq for NtfsString<'a> {
impl<'a> PartialEq<str> for NtfsString<'a> {
fn eq(&self, other: &str) -> bool {
- NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16()) == Ordering::Equal
+ NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16(), identity) == Ordering::Equal
}
}
impl<'a> PartialEq<NtfsString<'a>> for str {
fn eq(&self, other: &NtfsString<'a>) -> bool {
- NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter()) == Ordering::Equal
+ NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter(), identity) == Ordering::Equal
}
}
impl<'a> PartialEq<&str> for NtfsString<'a> {
fn eq(&self, other: &&str) -> bool {
- NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16()) == Ordering::Equal
+ NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16(), identity) == Ordering::Equal
}
}
impl<'a> PartialEq<NtfsString<'a>> for &str {
fn eq(&self, other: &NtfsString<'a>) -> bool {
- NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter()) == Ordering::Equal
+ NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter(), identity) == Ordering::Equal
}
}
@@ -134,24 +139,70 @@ impl<'a> PartialOrd for NtfsString<'a> {
impl<'a> PartialOrd<str> for NtfsString<'a> {
fn partial_cmp(&self, other: &str) -> Option<Ordering> {
- Some(NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16()))
+ Some(NtfsString::cmp_iter(
+ self.u16_iter(),
+ other.encode_utf16(),
+ identity,
+ ))
}
}
impl<'a> PartialOrd<NtfsString<'a>> for str {
fn partial_cmp(&self, other: &NtfsString<'a>) -> Option<Ordering> {
- Some(NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter()))
+ Some(NtfsString::cmp_iter(
+ self.encode_utf16(),
+ other.u16_iter(),
+ identity,
+ ))
}
}
impl<'a> PartialOrd<&str> for NtfsString<'a> {
fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
- Some(NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16()))
+ Some(NtfsString::cmp_iter(
+ self.u16_iter(),
+ other.encode_utf16(),
+ identity,
+ ))
}
}
impl<'a> PartialOrd<NtfsString<'a>> for &str {
fn partial_cmp(&self, other: &NtfsString<'a>) -> Option<Ordering> {
- Some(NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter()))
+ Some(NtfsString::cmp_iter(
+ self.encode_utf16(),
+ other.u16_iter(),
+ identity,
+ ))
+ }
+}
+
+pub trait UpcaseOrd<Rhs> {
+ /// Performs a case-insensitive ordering based on the upcase table read from the filesystem.
+ ///
+ /// # Panics
+ ///
+ /// Panics if [`read_upcase_table`][Ntfs::read_upcase_table] had not been called on the passed [`Ntfs`] object.
+ fn upcase_cmp(&self, ntfs: &Ntfs, other: &Rhs) -> Ordering;
+}
+
+impl<'a> UpcaseOrd<NtfsString<'a>> for NtfsString<'a> {
+ fn upcase_cmp(&self, ntfs: &Ntfs, other: &NtfsString<'a>) -> Ordering {
+ let upcase_fn = |x| ntfs.upcase_table().u16_to_uppercase(x);
+ NtfsString::cmp_iter(self.u16_iter(), other.u16_iter(), upcase_fn)
+ }
+}
+
+impl<'a> UpcaseOrd<&str> for NtfsString<'a> {
+ fn upcase_cmp(&self, ntfs: &Ntfs, other: &&str) -> Ordering {
+ let upcase_fn = |x| ntfs.upcase_table().u16_to_uppercase(x);
+ NtfsString::cmp_iter(self.u16_iter(), other.encode_utf16(), upcase_fn)
+ }
+}
+
+impl<'a> UpcaseOrd<NtfsString<'a>> for &str {
+ fn upcase_cmp(&self, ntfs: &Ntfs, other: &NtfsString<'a>) -> Ordering {
+ let upcase_fn = |x| ntfs.upcase_table().u16_to_uppercase(x);
+ NtfsString::cmp_iter(self.encode_utf16(), other.u16_iter(), upcase_fn)
}
}
diff --git a/src/upcase_table.rs b/src/upcase_table.rs
new file mode 100644
index 0000000..b612fa6
--- /dev/null
+++ b/src/upcase_table.rs
@@ -0,0 +1,103 @@
+// Copyright 2021 Colin Finck <colin@reactos.org>
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+use crate::attribute::NtfsAttributeType;
+use crate::error::{NtfsError, Result};
+use crate::ntfs::Ntfs;
+use crate::ntfs_file::KnownNtfsFile;
+use crate::traits::NtfsReadSeek;
+use binread::io::{Read, Seek};
+use core::convert::TryInto;
+use core::mem;
+
+/// The Upcase Table contains an uppercase character for each Unicode character of the Basic Multilingual Plane.
+const UPCASE_CHARACTER_COUNT: usize = 65536;
+
+/// Hence, the table has a size of 128 KiB.
+const UPCASE_TABLE_SIZE: u64 = (UPCASE_CHARACTER_COUNT * mem::size_of::<u16>()) as u64;
+
+/// Manages a table for converting characters to uppercase.
+/// This table is used for case-insensitive file name comparisons.
+///
+/// NTFS stores such a table in the special $UpCase file on every filesystem.
+/// As this table is slightly different depending on the Windows version used for creating the filesystem,
+/// it is very important to always read the table from the filesystem itself.
+/// Hence, this table is not hardcoded into the crate.
+#[derive(Clone, Debug)]
+pub(crate) struct UpcaseTable {
+ uppercase_characters: Vec<u16>,
+}
+
+impl UpcaseTable {
+ /// Reads the $UpCase file from the given filesystem into a new [`UpcaseTable`] object.
+ pub(crate) fn read<T>(ntfs: &Ntfs, fs: &mut T) -> Result<Self>
+ where
+ T: Read + Seek,
+ {
+ // Lookup the $UpCase file and its $DATA attribute.
+ let upcase_file = ntfs.ntfs_file(fs, KnownNtfsFile::UpCase as u64)?;
+ let data_attribute = upcase_file
+ .attributes()
+ .find(|attribute| {
+ // TODO: Replace by attribute.ty().contains() once https://github.com/rust-lang/rust/issues/62358 has landed.
+ attribute
+ .ty()
+ .map(|ty| ty == NtfsAttributeType::Data)
+ .unwrap_or(false)
+ })
+ .ok_or(NtfsError::AttributeNotFound {
+ position: upcase_file.position(),
+ ty: NtfsAttributeType::VolumeName,
+ })?;
+ if data_attribute.value_length() != UPCASE_TABLE_SIZE {
+ return Err(NtfsError::InvalidUpcaseTableSize {
+ expected: UPCASE_TABLE_SIZE,
+ actual: data_attribute.value_length(),
+ });
+ }
+
+ // Read the entire raw data from the $DATA attribute.
+ let mut data_value = data_attribute.value()?;
+ let mut data = vec![0u8; UPCASE_TABLE_SIZE as usize];
+ data_value.read_exact(fs, &mut data)?;
+
+ // Store it in an array of `u16` uppercase characters.
+ // Any endianness conversion is done here once, which makes `u16_to_uppercase` fast.
+ let uppercase_characters = data
+ .chunks_exact(2)
+ .map(|two_bytes| u16::from_le_bytes(two_bytes.try_into().unwrap()))
+ .collect();
+
+ Ok(Self {
+ uppercase_characters,
+ })
+ }
+
+ /// Returns the uppercase variant of the given UCS-2 character (i.e. a Unicode character
+ /// from the Basic Multilingual Plane) based on the stored conversion table.
+ /// A character without an uppercase equivalent is returned as-is.
+ pub(crate) fn u16_to_uppercase(&self, character: u16) -> u16 {
+ self.uppercase_characters[character as usize]
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_upcase_table() {
+ let mut testfs1 = crate::helpers::tests::testfs1();
+ let ntfs = Ntfs::new(&mut testfs1).unwrap();
+ let upcase_table = UpcaseTable::read(&ntfs, &mut testfs1).unwrap();
+
+ // Prove that at least the lowercase English characters are mapped to their uppercase equivalents.
+ // It makes no sense to check everything here.
+ for (lowercase, uppercase) in (b'a'..=b'z').zip(b'A'..=b'Z') {
+ assert_eq!(
+ upcase_table.u16_to_uppercase(lowercase as u16),
+ uppercase as u16
+ );
+ }
+ }
+}