diff options
Diffstat (limited to 'src/upcase_table.rs')
-rw-r--r-- | src/upcase_table.rs | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/src/upcase_table.rs b/src/upcase_table.rs new file mode 100644 index 0000000..b612fa6 --- /dev/null +++ b/src/upcase_table.rs @@ -0,0 +1,103 @@ +// Copyright 2021 Colin Finck <colin@reactos.org> +// SPDX-License-Identifier: GPL-2.0-or-later + +use crate::attribute::NtfsAttributeType; +use crate::error::{NtfsError, Result}; +use crate::ntfs::Ntfs; +use crate::ntfs_file::KnownNtfsFile; +use crate::traits::NtfsReadSeek; +use binread::io::{Read, Seek}; +use core::convert::TryInto; +use core::mem; + +/// The Upcase Table contains an uppercase character for each Unicode character of the Basic Multilingual Plane. +const UPCASE_CHARACTER_COUNT: usize = 65536; + +/// Hence, the table has a size of 128 KiB. +const UPCASE_TABLE_SIZE: u64 = (UPCASE_CHARACTER_COUNT * mem::size_of::<u16>()) as u64; + +/// Manages a table for converting characters to uppercase. +/// This table is used for case-insensitive file name comparisons. +/// +/// NTFS stores such a table in the special $UpCase file on every filesystem. +/// As this table is slightly different depending on the Windows version used for creating the filesystem, +/// it is very important to always read the table from the filesystem itself. +/// Hence, this table is not hardcoded into the crate. +#[derive(Clone, Debug)] +pub(crate) struct UpcaseTable { + uppercase_characters: Vec<u16>, +} + +impl UpcaseTable { + /// Reads the $UpCase file from the given filesystem into a new [`UpcaseTable`] object. + pub(crate) fn read<T>(ntfs: &Ntfs, fs: &mut T) -> Result<Self> + where + T: Read + Seek, + { + // Lookup the $UpCase file and its $DATA attribute. + let upcase_file = ntfs.ntfs_file(fs, KnownNtfsFile::UpCase as u64)?; + let data_attribute = upcase_file + .attributes() + .find(|attribute| { + // TODO: Replace by attribute.ty().contains() once https://github.com/rust-lang/rust/issues/62358 has landed. + attribute + .ty() + .map(|ty| ty == NtfsAttributeType::Data) + .unwrap_or(false) + }) + .ok_or(NtfsError::AttributeNotFound { + position: upcase_file.position(), + ty: NtfsAttributeType::VolumeName, + })?; + if data_attribute.value_length() != UPCASE_TABLE_SIZE { + return Err(NtfsError::InvalidUpcaseTableSize { + expected: UPCASE_TABLE_SIZE, + actual: data_attribute.value_length(), + }); + } + + // Read the entire raw data from the $DATA attribute. + let mut data_value = data_attribute.value()?; + let mut data = vec![0u8; UPCASE_TABLE_SIZE as usize]; + data_value.read_exact(fs, &mut data)?; + + // Store it in an array of `u16` uppercase characters. + // Any endianness conversion is done here once, which makes `u16_to_uppercase` fast. + let uppercase_characters = data + .chunks_exact(2) + .map(|two_bytes| u16::from_le_bytes(two_bytes.try_into().unwrap())) + .collect(); + + Ok(Self { + uppercase_characters, + }) + } + + /// Returns the uppercase variant of the given UCS-2 character (i.e. a Unicode character + /// from the Basic Multilingual Plane) based on the stored conversion table. + /// A character without an uppercase equivalent is returned as-is. + pub(crate) fn u16_to_uppercase(&self, character: u16) -> u16 { + self.uppercase_characters[character as usize] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_upcase_table() { + let mut testfs1 = crate::helpers::tests::testfs1(); + let ntfs = Ntfs::new(&mut testfs1).unwrap(); + let upcase_table = UpcaseTable::read(&ntfs, &mut testfs1).unwrap(); + + // Prove that at least the lowercase English characters are mapped to their uppercase equivalents. + // It makes no sense to check everything here. + for (lowercase, uppercase) in (b'a'..=b'z').zip(b'A'..=b'Z') { + assert_eq!( + upcase_table.u16_to_uppercase(lowercase as u16), + uppercase as u16 + ); + } + } +} |