From c134deb4335f7234ab2d057e9928845e7710766a Mon Sep 17 00:00:00 2001 From: Colin Finck Date: Sun, 3 Oct 2021 11:11:18 +0200 Subject: Refactor `NtfsAttributeValue` into `NtfsValue` and add `NtfsAttributeListNonResidentAttributeValue`. This enables us to read a data value spread over multiple data runs of multiple connected attributes just like a single contiguous value. A caller can read an attribute value the same way, no matter if it's internally stored as a resident attribute, non-resident attribute, or non-resident attribute within an AttributeList. --- src/attribute.rs | 209 +++++- src/attribute_value.rs | 700 --------------------- src/file.rs | 54 +- src/index_record.rs | 2 +- src/lib.rs | 3 +- src/ntfs.rs | 19 +- src/structured_values/attribute_list.rs | 37 +- src/structured_values/file_name.rs | 2 +- src/structured_values/index_allocation.rs | 2 +- src/structured_values/mod.rs | 2 +- src/structured_values/standard_information.rs | 2 +- src/value/attribute_list_non_resident_attribute.rs | 292 +++++++++ src/value/mod.rs | 157 +++++ src/value/non_resident_attribute.rs | 670 ++++++++++++++++++++ src/value/slice.rs | 86 +++ 15 files changed, 1477 insertions(+), 760 deletions(-) delete mode 100644 src/attribute_value.rs create mode 100644 src/value/attribute_list_non_resident_attribute.rs create mode 100644 src/value/mod.rs create mode 100644 src/value/non_resident_attribute.rs create mode 100644 src/value/slice.rs diff --git a/src/attribute.rs b/src/attribute.rs index a7a1fc1..8de17b6 100644 --- a/src/attribute.rs +++ b/src/attribute.rs @@ -1,16 +1,18 @@ // Copyright 2021 Colin Finck // SPDX-License-Identifier: GPL-2.0-or-later -use crate::attribute_value::{ - NtfsAttributeValue, NtfsNonResidentAttributeValue, NtfsResidentAttributeValue, -}; use crate::error::{NtfsError, Result}; use crate::file::NtfsFile; use crate::string::NtfsString; use crate::structured_values::{ - NtfsStructuredValueFromNonResidentAttributeValue, NtfsStructuredValueFromSlice, + NtfsAttributeList, NtfsAttributeListEntries, NtfsStructuredValueFromNonResidentAttributeValue, + NtfsStructuredValueFromSlice, }; use crate::types::Vcn; +use crate::value::attribute_list_non_resident_attribute::NtfsAttributeListNonResidentAttributeValue; +use crate::value::non_resident_attribute::NtfsNonResidentAttributeValue; +use crate::value::slice::NtfsSliceValue; +use crate::value::NtfsValue; use binread::io::{Read, Seek}; use bitflags::bitflags; use byteorder::{ByteOrder, LittleEndian}; @@ -116,15 +118,26 @@ pub enum NtfsAttributeType { End = 0xFFFF_FFFF, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct NtfsAttribute<'n, 'f> { file: &'f NtfsFile<'n>, offset: usize, + /// Has a value if this attribute's value may be split over multiple attributes. + /// The connected attributes can be iterated using the encapsulated iterator. + list_entries: Option<&'f NtfsAttributeListEntries<'n, 'f>>, } impl<'n, 'f> NtfsAttribute<'n, 'f> { - fn new(file: &'f NtfsFile<'n>, offset: usize) -> Self { - Self { file, offset } + pub(crate) fn new( + file: &'f NtfsFile<'n>, + offset: usize, + list_entries: Option<&'f NtfsAttributeListEntries<'n, 'f>>, + ) -> Self { + Self { + file, + offset, + list_entries, + } } /// Returns the length of this NTFS attribute, in bytes. @@ -215,12 +228,8 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { S::from_non_resident_attribute_value(fs, self.non_resident_value()?) } - fn non_resident_value(&self) -> Result> { - debug_assert!(!self.is_resident()); - let start = self.offset + self.non_resident_value_data_runs_offset() as usize; - let end = start + self.attribute_length() as usize; - let data = &self.file.record_data()[start..end]; - let position = self.file.position() + start as u64; + pub(crate) fn non_resident_value(&self) -> Result> { + let (data, position) = self.non_resident_value_data_and_position(); NtfsNonResidentAttributeValue::new( self.file.ntfs(), @@ -230,6 +239,16 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { ) } + pub(crate) fn non_resident_value_data_and_position(&self) -> (&'f [u8], u64) { + debug_assert!(!self.is_resident()); + let start = self.offset + self.non_resident_value_data_runs_offset() as usize; + let end = start + self.attribute_length() as usize; + let data = &self.file.record_data()[start..end]; + let position = self.file.position() + start as u64; + + (data, position) + } + fn non_resident_value_data_size(&self) -> u64 { debug_assert!(!self.is_resident()); let start = self.offset + offset_of!(NtfsNonResidentAttributeHeader, data_size); @@ -242,6 +261,10 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { LittleEndian::read_u16(&self.file.record_data()[start..]) } + pub(crate) fn offset(&self) -> usize { + self.offset + } + /// Returns the absolute position of this NTFS attribute within the filesystem, in bytes. pub fn position(&self) -> u64 { self.file.position() + self.offset as u64 @@ -269,7 +292,7 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { S::from_slice(resident_value.data(), self.position()) } - pub(crate) fn resident_value(&self) -> Result> { + pub(crate) fn resident_value(&self) -> Result> { debug_assert!(self.is_resident()); self.validate_resident_value_sizes()?; @@ -277,7 +300,7 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { let end = start + self.resident_value_length() as usize; let data = &self.file.record_data()[start..end]; - Ok(NtfsResidentAttributeValue::new(data, self.position())) + Ok(NtfsSliceValue::new(data, self.position())) } fn resident_value_length(&self) -> u32 { @@ -364,13 +387,27 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { } /// Returns an [`NtfsAttributeValue`] structure to read the value of this NTFS attribute. - pub fn value(&self) -> Result> { - if self.is_resident() { - let resident_value = self.resident_value()?; - Ok(NtfsAttributeValue::Resident(resident_value)) + pub fn value(&self) -> Result> { + if let Some(list_entries) = self.list_entries { + // The first attribute reports the entire data size for all connected attributes + // (remaining ones are set to zero). + // Fortunately, we are the first attribute :) + let data_size = self.non_resident_value_data_size(); + + let value = NtfsAttributeListNonResidentAttributeValue::new( + self.file.ntfs(), + list_entries.clone(), + self.instance(), + self.ty()?, + data_size, + ); + Ok(NtfsValue::AttributeListNonResidentAttribute(value)) + } else if self.is_resident() { + let value = self.resident_value()?; + Ok(NtfsValue::Slice(value)) } else { - let non_resident_value = self.non_resident_value()?; - Ok(NtfsAttributeValue::NonResident(non_resident_value)) + let value = self.non_resident_value()?; + Ok(NtfsValue::NonResidentAttribute(value)) } } @@ -384,13 +421,127 @@ impl<'n, 'f> NtfsAttribute<'n, 'f> { } } -pub struct NtfsAttributes<'n, 'a> { - file: &'a NtfsFile<'n>, +pub struct NtfsAttributes<'n, 'f> { + raw_iter: NtfsAttributesRaw<'n, 'f>, + list_entries: Option>, + list_skip_info: Option<(u16, NtfsAttributeType)>, +} + +impl<'n, 'f> NtfsAttributes<'n, 'f> { + pub(crate) fn new(file: &'f NtfsFile<'n>) -> Self { + Self { + raw_iter: NtfsAttributesRaw::new(file), + list_entries: None, + list_skip_info: None, + } + } + + pub fn next(&mut self, fs: &mut T) -> Option>> + where + T: Read + Seek, + { + loop { + if let Some(attribute_list_entries) = &mut self.list_entries { + loop { + // If the next AttributeList entry turns out to be a non-resident attribute, that attribute's + // value may be split over multiple (adjacent) attributes. + // To view this value as a single one, we need an `AttributeListConnectedEntries` iterator + // and that iterator needs `NtfsAttributeListEntries` where the next call to `next` yields + // the first connected attribute. + // Therefore, we need to clone `attribute_list_entries` before every call. + let attribute_list_entries_clone = attribute_list_entries.clone(); + + let entry = match attribute_list_entries.next(fs) { + Some(Ok(entry)) => entry, + Some(Err(e)) => return Some(Err(e)), + None => break, + }; + let entry_instance = entry.instance(); + let entry_record_number = entry.base_file_reference().file_record_number(); + let entry_ty = iter_try!(entry.ty()); + + // Ignore all AttributeList entries that just repeat attributes of the raw iterator. + if entry_record_number == self.raw_iter.file.file_record_number() { + continue; + } + + // Ignore all AttributeList entries that are connected attributes of a previous one. + if let Some((skip_instance, skip_ty)) = self.list_skip_info { + if entry_instance == skip_instance && entry_ty == skip_ty { + continue; + } + } + + // We found an attribute that we want to return. + self.list_skip_info = None; + + let ntfs = self.raw_iter.file.ntfs(); + let entry_file = iter_try!(entry.to_file(ntfs, fs)); + let entry_attribute = iter_try!(entry.to_attribute(&entry_file)); + let attribute_offset = entry_attribute.offset(); + + let mut list_entries = None; + if !entry_attribute.is_resident() { + list_entries = Some(attribute_list_entries_clone); + self.list_skip_info = Some((entry_instance, entry_ty)); + } + + let item = NtfsAttributeItem { + attribute_file: self.raw_iter.file, + attribute_value_file: Some(entry_file), + attribute_offset, + list_entries, + }; + return Some(Ok(item)); + } + } + + let attribute = self.raw_iter.next()?; + if let Ok(NtfsAttributeType::AttributeList) = attribute.ty() { + let attribute_list = + iter_try!(attribute.structured_value::(fs)); + self.list_entries = Some(attribute_list.iter()); + } else { + let item = NtfsAttributeItem { + attribute_file: self.raw_iter.file, + attribute_value_file: None, + attribute_offset: attribute.offset(), + list_entries: None, + }; + return Some(Ok(item)); + } + } + } +} + +pub struct NtfsAttributeItem<'n, 'f> { + attribute_file: &'f NtfsFile<'n>, + attribute_value_file: Option>, + attribute_offset: usize, + list_entries: Option>, +} + +impl<'n, 'f> NtfsAttributeItem<'n, 'f> { + pub fn to_attribute<'i>(&'i self) -> NtfsAttribute<'n, 'i> { + if let Some(file) = &self.attribute_value_file { + NtfsAttribute::new(file, self.attribute_offset, self.list_entries.as_ref()) + } else { + NtfsAttribute::new( + self.attribute_file, + self.attribute_offset, + self.list_entries.as_ref(), + ) + } + } +} + +pub struct NtfsAttributesRaw<'n, 'f> { + file: &'f NtfsFile<'n>, items_range: Range, } -impl<'n, 'a> NtfsAttributes<'n, 'a> { - pub(crate) fn new(file: &'a NtfsFile<'n>) -> Self { +impl<'n, 'f> NtfsAttributesRaw<'n, 'f> { + pub(crate) fn new(file: &'f NtfsFile<'n>) -> Self { let start = file.first_attribute_offset() as usize; let end = file.used_size() as usize; let items_range = start..end; @@ -399,8 +550,8 @@ impl<'n, 'a> NtfsAttributes<'n, 'a> { } } -impl<'n, 'a> Iterator for NtfsAttributes<'n, 'a> { - type Item = NtfsAttribute<'n, 'a>; +impl<'n, 'f> Iterator for NtfsAttributesRaw<'n, 'f> { + type Item = NtfsAttribute<'n, 'f>; fn next(&mut self) -> Option { if self.items_range.is_empty() { @@ -415,11 +566,11 @@ impl<'n, 'a> Iterator for NtfsAttributes<'n, 'a> { } // It's a real attribute. - let attribute = NtfsAttribute::new(self.file, self.items_range.start); + let attribute = NtfsAttribute::new(self.file, self.items_range.start, None); self.items_range.start += attribute.attribute_length() as usize; Some(attribute) } } -impl<'n, 'a> FusedIterator for NtfsAttributes<'n, 'a> {} +impl<'n, 'f> FusedIterator for NtfsAttributesRaw<'n, 'f> {} diff --git a/src/attribute_value.rs b/src/attribute_value.rs deleted file mode 100644 index 522e22b..0000000 --- a/src/attribute_value.rs +++ /dev/null @@ -1,700 +0,0 @@ -// Copyright 2021 Colin Finck -// SPDX-License-Identifier: GPL-2.0-or-later - -use crate::error::{NtfsError, Result}; -use crate::ntfs::Ntfs; -use crate::traits::NtfsReadSeek; -use crate::types::{Lcn, Vcn}; -use binread::io; -use binread::io::Cursor; -use binread::io::{Read, Seek, SeekFrom}; -use binread::BinRead; -use core::convert::TryFrom; -use core::iter::FusedIterator; -use core::{cmp, mem}; - -#[derive(Clone, Debug)] -pub enum NtfsAttributeValue<'n, 'f> { - Resident(NtfsResidentAttributeValue<'f>), - NonResident(NtfsNonResidentAttributeValue<'n, 'f>), -} - -impl<'n, 'f> NtfsAttributeValue<'n, 'f> { - pub fn attach<'a, T>(self, fs: &'a mut T) -> NtfsAttributeValueAttached<'n, 'f, 'a, T> - where - T: Read + Seek, - { - NtfsAttributeValueAttached::new(fs, self) - } - - pub fn data_position(&self) -> Option { - match self { - Self::Resident(inner) => inner.data_position(), - Self::NonResident(inner) => inner.data_position(), - } - } - - pub fn len(&self) -> u64 { - match self { - Self::Resident(inner) => inner.len(), - Self::NonResident(inner) => inner.len(), - } - } -} - -impl<'n, 'f> NtfsReadSeek for NtfsAttributeValue<'n, 'f> { - fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result - where - T: Read + Seek, - { - match self { - Self::Resident(inner) => inner.read(fs, buf), - Self::NonResident(inner) => inner.read(fs, buf), - } - } - - fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result - where - T: Read + Seek, - { - match self { - Self::Resident(inner) => inner.seek(fs, pos), - Self::NonResident(inner) => inner.seek(fs, pos), - } - } - - fn stream_position(&self) -> u64 { - match self { - Self::Resident(inner) => inner.stream_position(), - Self::NonResident(inner) => inner.stream_position(), - } - } -} - -pub struct NtfsAttributeValueAttached<'n, 'f, 'a, T: Read + Seek> { - fs: &'a mut T, - value: NtfsAttributeValue<'n, 'f>, -} - -impl<'n, 'f, 'a, T> NtfsAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn new(fs: &'a mut T, value: NtfsAttributeValue<'n, 'f>) -> Self { - Self { fs, value } - } - - pub fn data_position(&self) -> Option { - self.value.data_position() - } - - pub fn detach(self) -> NtfsAttributeValue<'n, 'f> { - self.value - } - - pub fn len(&self) -> u64 { - self.value.len() - } -} - -impl<'n, 'f, 'a, T> Read for NtfsAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.value.read(self.fs, buf).map_err(io::Error::from) - } -} - -impl<'n, 'f, 'a, T> Seek for NtfsAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn seek(&mut self, pos: SeekFrom) -> io::Result { - self.value.seek(self.fs, pos).map_err(io::Error::from) - } -} - -#[derive(Clone, Debug)] -pub struct NtfsDataRun { - /// Absolute position of the attribute's value within the filesystem, in bytes. - /// This may be zero if this is a "sparse" data run. - position: u64, - /// Total length of the attribute's value, in bytes. - length: u64, - /// Current relative position within the value, in bytes. - stream_position: u64, -} - -impl NtfsDataRun { - pub(crate) fn new(ntfs: &Ntfs, lcn: Lcn, cluster_count: u64) -> Result { - let position = lcn.position(ntfs)?; - let length = cluster_count - .checked_mul(ntfs.cluster_size() as u64) - .ok_or(NtfsError::InvalidClusterCount { cluster_count })?; - - Ok(Self { - position, - length, - stream_position: 0, - }) - } - - /// Returns the absolute current data seek position within the filesystem, in bytes. - /// This may be `None` if: - /// * The current seek position is outside the valid range, or - /// * The data run is a "sparse" data run - pub fn data_position(&self) -> Option { - if self.position > 0 && self.stream_position < self.len() { - Some(self.position + self.stream_position) - } else { - None - } - } - - pub fn len(&self) -> u64 { - self.length - } - - fn remaining_len(&self) -> u64 { - self.len().saturating_sub(self.stream_position) - } -} - -impl NtfsReadSeek for NtfsDataRun { - fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result - where - T: Read + Seek, - { - if self.remaining_len() == 0 { - return Ok(0); - } - - let bytes_to_read = cmp::min(buf.len(), self.remaining_len() as usize); - let work_slice = &mut buf[..bytes_to_read]; - - if self.position == 0 { - // This is a sparse data run. - work_slice.fill(0); - } else { - // This data run contains "real" data. - // We have already performed all necessary sanity checks above, so we can just unwrap here. - fs.seek(SeekFrom::Start(self.data_position().unwrap()))?; - fs.read(work_slice)?; - } - - self.stream_position += bytes_to_read as u64; - Ok(bytes_to_read) - } - - fn seek(&mut self, _fs: &mut T, pos: SeekFrom) -> Result - where - T: Read + Seek, - { - let length = self.len(); - seek_contiguous(&mut self.stream_position, length, pos) - } - - fn stream_position(&self) -> u64 { - self.stream_position - } -} - -#[derive(Clone, Debug)] -pub struct NtfsDataRuns<'n, 'f> { - ntfs: &'n Ntfs, - data: &'f [u8], - position: u64, - previous_lcn: Lcn, -} - -impl<'n, 'f> NtfsDataRuns<'n, 'f> { - fn new(ntfs: &'n Ntfs, data: &'f [u8], position: u64) -> Self { - Self { - ntfs, - data, - position, - previous_lcn: Lcn::from(0), - } - } - - fn read_variable_length_bytes( - &self, - cursor: &mut Cursor<&[u8]>, - byte_count: u8, - ) -> Result<[u8; 8]> { - const MAX_BYTE_COUNT: u8 = mem::size_of::() as u8; - - if byte_count > MAX_BYTE_COUNT { - return Err(NtfsError::InvalidByteCountInDataRunHeader { - position: self.position, - expected: byte_count, - actual: MAX_BYTE_COUNT, - }); - } - - let mut buf = [0u8; MAX_BYTE_COUNT as usize]; - cursor.read_exact(&mut buf[..byte_count as usize])?; - - Ok(buf) - } - - fn read_variable_length_signed_integer( - &self, - cursor: &mut Cursor<&[u8]>, - byte_count: u8, - ) -> Result { - let buf = self.read_variable_length_bytes(cursor, byte_count)?; - let mut integer = i64::from_le_bytes(buf); - - // We have read `byte_count` bytes into a zeroed buffer and just interpreted that as an `i64`. - // Sign-extend `integer` to make it replicate the proper value. - let unused_bits = (mem::size_of::() as u32 - byte_count as u32) * 8; - integer = integer.wrapping_shl(unused_bits).wrapping_shr(unused_bits); - - Ok(integer) - } - - fn read_variable_length_unsigned_integer( - &self, - cursor: &mut Cursor<&[u8]>, - byte_count: u8, - ) -> Result { - let buf = self.read_variable_length_bytes(cursor, byte_count)?; - let integer = u64::from_le_bytes(buf); - Ok(integer) - } -} - -impl<'n, 'f> Iterator for NtfsDataRuns<'n, 'f> { - type Item = Result; - - fn next(&mut self) -> Option> { - if self.data.is_empty() { - return None; - } - - // Read the single header byte. - let mut cursor = Cursor::new(self.data); - let header = iter_try!(u8::read(&mut cursor)); - - // A zero byte marks the end of the data runs. - if header == 0 { - // Ensure `self.data.is_empty` returns true, so any further call uses the fast path above. - self.data = &[]; - return None; - } - - // The lower nibble indicates the length of the following cluster count variable length integer. - let cluster_count_byte_count = header & 0x0f; - let cluster_count = iter_try!( - self.read_variable_length_unsigned_integer(&mut cursor, cluster_count_byte_count) - ); - - // The upper nibble indicates the length of the following VCN variable length integer. - let vcn_byte_count = (header & 0xf0) >> 4; - let vcn = Vcn::from(iter_try!( - self.read_variable_length_signed_integer(&mut cursor, vcn_byte_count) - )); - - // Turn the read VCN into an absolute LCN. - let lcn = iter_try!(self.previous_lcn.checked_add(vcn).ok_or({ - NtfsError::InvalidVcnInDataRunHeader { - position: self.position, - vcn, - previous_lcn: self.previous_lcn, - } - })); - self.previous_lcn = lcn; - - // Only advance after having checked for success. - // In case of an error, a subsequent call shall output the same error again. - let bytes_to_advance = cursor.stream_position().unwrap(); - self.data = &self.data[bytes_to_advance as usize..]; - self.position += bytes_to_advance; - - let data_run = iter_try!(NtfsDataRun::new(self.ntfs, lcn, cluster_count)); - Some(Ok(data_run)) - } -} - -impl<'n, 'f> FusedIterator for NtfsDataRuns<'n, 'f> {} - -#[derive(Clone, Debug)] -pub struct NtfsNonResidentAttributeValue<'n, 'f> { - /// Reference to the base `Ntfs` object of this filesystem. - ntfs: &'n Ntfs, - /// Attribute bytes where the data run information of this non-resident value is stored on the filesystem. - data: &'f [u8], - /// Absolute position of the data run information within the filesystem, in bytes. - position: u64, - /// Total size of the data spread among all data runs, in bytes. - data_size: u64, - /// Iterator of data runs used for reading/seeking. - stream_data_runs: NtfsDataRuns<'n, 'f>, - /// Current data run we are reading from. - stream_data_run: Option, - /// Total stream position, in bytes. - stream_position: u64, -} - -impl<'n, 'f> NtfsNonResidentAttributeValue<'n, 'f> { - pub(crate) fn new( - ntfs: &'n Ntfs, - data: &'f [u8], - position: u64, - data_size: u64, - ) -> Result { - let mut stream_data_runs = NtfsDataRuns::new(ntfs, data, position); - - // Get the first data run already here to let `data_position` return something meaningful. - let stream_data_run = match stream_data_runs.next() { - Some(Ok(data_run)) => Some(data_run), - Some(Err(e)) => return Err(e), - None => None, - }; - - Ok(Self { - ntfs, - data, - position, - data_size, - stream_data_runs, - stream_data_run, - stream_position: 0, - }) - } - - pub fn attach<'a, T>( - self, - fs: &'a mut T, - ) -> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> - where - T: Read + Seek, - { - NtfsNonResidentAttributeValueAttached::new(fs, self) - } - - /// Returns the absolute current data seek position within the filesystem, in bytes. - /// This may be `None` if: - /// * The current seek position is outside the valid range, or - /// * The current data run is a "sparse" data run - pub fn data_position(&self) -> Option { - let stream_data_run = self.stream_data_run.as_ref()?; - stream_data_run.data_position() - } - - pub fn data_runs(&self) -> NtfsDataRuns<'n, 'f> { - NtfsDataRuns::new(self.ntfs, self.data, self.position) - } - - pub fn len(&self) -> u64 { - self.data_size - } - - pub fn ntfs(&self) -> &'n Ntfs { - self.ntfs - } - - /// Returns the absolute position of the data run information within the filesystem, in bytes. - pub fn position(&self) -> u64 { - self.position - } - - fn do_seek(&mut self, fs: &mut T, mut bytes_to_seek: SeekFrom) -> Result - where - T: Read + Seek, - { - // Translate `SeekFrom::Start(n)` into a more efficient `SeekFrom::Current` - // if n >= self.stream_position. - // We don't need to traverse data runs from the very beginning then. - if let SeekFrom::Start(n) = bytes_to_seek { - if let Some(n_from_current) = n.checked_sub(self.stream_position) { - if let Ok(signed_n_from_current) = i64::try_from(n_from_current) { - bytes_to_seek = SeekFrom::Current(signed_n_from_current); - } - } - } - - let mut bytes_left_to_seek = match bytes_to_seek { - SeekFrom::Start(n) => { - // Reset `stream_data_runs` and `stream_data_run` to read from the very beginning. - self.stream_data_runs = NtfsDataRuns::new(self.ntfs, self.data, self.position); - self.stream_data_run = None; - n - } - SeekFrom::Current(n) if n >= 0 => n as u64, - _ => panic!("do_seek only accepts positive seeks from Start or Current!"), - }; - - while bytes_left_to_seek > 0 { - if let Some(data_run) = &mut self.stream_data_run { - if bytes_left_to_seek < data_run.remaining_len() { - // We have found the right data run, now we have to seek inside the data run. - // - // If we were called to seek from the very beginning, we can be sure that this - // data run is also seeked from the beginning. - // Hence, we can use SeekFrom::Start and use the full u64 range. - // - // If we were called to seek from the current position, we have to use - // SeekFrom::Current and can only use the positive part of the i64 range. - // This is no problem though, as `bytes_left_to_seek` was also created from a - // positive i64 value in that case. - let pos = match bytes_to_seek { - SeekFrom::Start(_) => SeekFrom::Start(bytes_left_to_seek), - SeekFrom::Current(_) => SeekFrom::Current(bytes_left_to_seek as i64), - _ => unreachable!(), - }; - - data_run.seek(fs, pos)?; - break; - } else { - // We can skip the entire data run. - bytes_left_to_seek -= data_run.remaining_len(); - } - } - - match self.stream_data_runs.next() { - Some(Ok(data_run)) => self.stream_data_run = Some(data_run), - Some(Err(e)) => return Err(e), - None => break, - } - } - - match bytes_to_seek { - SeekFrom::Start(n) => self.stream_position = n, - SeekFrom::Current(n) => self.stream_position += n as u64, - _ => unreachable!(), - } - - Ok(self.stream_position) - } -} - -impl<'n, 'f> NtfsReadSeek for NtfsNonResidentAttributeValue<'n, 'f> { - fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result - where - T: Read + Seek, - { - let mut bytes_read = 0usize; - - while bytes_read < buf.len() { - if let Some(data_run) = &mut self.stream_data_run { - if data_run.stream_position() < data_run.len() { - let bytes_read_in_data_run = data_run.read(fs, &mut buf[bytes_read..])?; - bytes_read += bytes_read_in_data_run; - self.stream_position += bytes_read_in_data_run as u64; - continue; - } - } - - // We still have bytes to read, but no data run or the previous data run has been read to its end. - // Get the next data run and try again. - match self.stream_data_runs.next() { - Some(Ok(data_run)) => self.stream_data_run = Some(data_run), - Some(Err(e)) => return Err(e), - None => break, - } - } - - Ok(bytes_read) - } - - fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result - where - T: Read + Seek, - { - match pos { - SeekFrom::Start(n) => { - // Seek n bytes from the very beginning. - return self.do_seek(fs, SeekFrom::Start(n)); - } - SeekFrom::End(n) => { - if n >= 0 { - if let Some(bytes_to_seek) = self.data_size.checked_add(n as u64) { - // Seek data_size + n bytes from the very beginning. - return self.do_seek(fs, SeekFrom::Start(bytes_to_seek)); - } - } else { - if let Some(bytes_to_seek) = self.data_size.checked_sub(n.wrapping_neg() as u64) - { - // Seek data_size + n bytes (with n being negative) from the very beginning. - return self.do_seek(fs, SeekFrom::Start(bytes_to_seek)); - } - } - } - SeekFrom::Current(n) => { - if n >= 0 { - if self.stream_position.checked_add(n as u64).is_some() { - // Seek n bytes from the current position. - // This is an optimization for the common case, as we don't need to traverse all - // data runs from the very beginning. - return self.do_seek(fs, SeekFrom::Current(n)); - } - } else { - if let Some(bytes_to_seek) = - self.stream_position.checked_sub(n.wrapping_neg() as u64) - { - // Seek stream_position + n bytes (with n being negative) from the very beginning. - return self.do_seek(fs, SeekFrom::Start(bytes_to_seek)); - } - } - } - } - - Err(NtfsError::Io(io::Error::new( - io::ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - ))) - } - - fn stream_position(&self) -> u64 { - self.stream_position - } -} - -pub struct NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T: Read + Seek> { - fs: &'a mut T, - value: NtfsNonResidentAttributeValue<'n, 'f>, -} - -impl<'n, 'f, 'a, T> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn new(fs: &'a mut T, value: NtfsNonResidentAttributeValue<'n, 'f>) -> Self { - Self { fs, value } - } - - pub fn data_position(&self) -> Option { - self.value.data_position() - } - - pub fn detach(self) -> NtfsNonResidentAttributeValue<'n, 'f> { - self.value - } - - pub fn len(&self) -> u64 { - self.value.len() - } -} - -impl<'n, 'f, 'a, T> Read for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.value.read(self.fs, buf).map_err(io::Error::from) - } -} - -impl<'n, 'f, 'a, T> Seek for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> -where - T: Read + Seek, -{ - fn seek(&mut self, pos: SeekFrom) -> io::Result { - self.value.seek(self.fs, pos).map_err(io::Error::from) - } -} - -#[derive(Clone, Debug)] -pub struct NtfsResidentAttributeValue<'f> { - data: &'f [u8], - position: u64, - stream_position: u64, -} - -impl<'f> NtfsResidentAttributeValue<'f> { - pub(crate) fn new(data: &'f [u8], position: u64) -> Self { - Self { - data, - position, - stream_position: 0, - } - } - - pub fn data(&self) -> &'f [u8] { - self.data - } - - /// Returns the absolute current data seek position within the filesystem, in bytes. - /// This may be `None` if the current seek position is outside the valid range. - pub fn data_position(&self) -> Option { - if self.stream_position < self.len() { - Some(self.position + self.stream_position) - } else { - None - } - } - - pub fn len(&self) -> u64 { - self.data.len() as u64 - } - - fn remaining_len(&self) -> u64 { - self.len().saturating_sub(self.stream_position) - } -} - -impl<'f> NtfsReadSeek for NtfsResidentAttributeValue<'f> { - fn read(&mut self, _fs: &mut T, buf: &mut [u8]) -> Result - where - T: Read + Seek, - { - if self.remaining_len() == 0 { - return Ok(0); - } - - let bytes_to_read = cmp::min(buf.len(), self.remaining_len() as usize); - let work_slice = &mut buf[..bytes_to_read]; - - let start = self.stream_position as usize; - let end = start + bytes_to_read; - work_slice.copy_from_slice(&self.data[start..end]); - - self.stream_position += bytes_to_read as u64; - Ok(bytes_to_read) - } - - fn seek(&mut self, _fs: &mut T, pos: SeekFrom) -> Result - where - T: Read + Seek, - { - let length = self.len(); - seek_contiguous(&mut self.stream_position, length, pos) - } - - fn stream_position(&self) -> u64 { - self.stream_position - } -} - -fn seek_contiguous(stream_position: &mut u64, length: u64, pos: SeekFrom) -> Result { - // This implementation is taken from https://github.com/rust-lang/rust/blob/18c524fbae3ab1bf6ed9196168d8c68fc6aec61a/library/std/src/io/cursor.rs - // It handles all signed/unsigned arithmetics properly and outputs the known `io` error message. - let (base_pos, offset) = match pos { - SeekFrom::Start(n) => { - *stream_position = n; - return Ok(n); - } - SeekFrom::End(n) => (length, n), - SeekFrom::Current(n) => (*stream_position, n), - }; - - let new_pos = if offset >= 0 { - base_pos.checked_add(offset as u64) - } else { - base_pos.checked_sub(offset.wrapping_neg() as u64) - }; - - match new_pos { - Some(n) => { - *stream_position = n; - Ok(*stream_position) - } - None => Err(NtfsError::Io(io::Error::new( - io::ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - ))), - } -} diff --git a/src/file.rs b/src/file.rs index 27d6a29..147c986 100644 --- a/src/file.rs +++ b/src/file.rs @@ -1,7 +1,9 @@ // Copyright 2021 Colin Finck // SPDX-License-Identifier: GPL-2.0-or-later -use crate::attribute::{NtfsAttribute, NtfsAttributeType, NtfsAttributes}; +use crate::attribute::{ + NtfsAttribute, NtfsAttributeItem, NtfsAttributeType, NtfsAttributes, NtfsAttributesRaw, +}; use crate::error::{NtfsError, Result}; use crate::file_reference::NtfsFileReference; use crate::index::NtfsIndex; @@ -54,7 +56,7 @@ bitflags! { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct NtfsFile<'n> { record: Record<'n>, file_record_number: u64, @@ -97,7 +99,7 @@ impl<'n> NtfsFile<'n> { &'f self, ty: NtfsAttributeType, ) -> Result> { - self.attributes() + self.attributes_raw() .find(|attribute| { // TODO: Replace by attribute.ty().contains() once https://github.com/rust-lang/rust/issues/62358 has landed. attribute.ty().map(|x| x == ty).unwrap_or(false) @@ -108,8 +110,15 @@ impl<'n> NtfsFile<'n> { }) } + /// This provides a flattened "data-centric" view of the attributes and abstracts away the filesystem details + /// to deal with many or large attributes (Attribute Lists and split attributes). + /// Use [`NtfsFile::attributes_raw`] to iterate over the plain attributes on the filesystem. pub fn attributes<'f>(&'f self) -> NtfsAttributes<'n, 'f> { - NtfsAttributes::new(self) + NtfsAttributes::<'n, 'f>::new(self) + } + + pub fn attributes_raw<'f>(&'f self) -> NtfsAttributesRaw<'n, 'f> { + NtfsAttributesRaw::new(self) } /// Convenience function to get a $DATA attribute of this file. @@ -120,22 +129,31 @@ impl<'n> NtfsFile<'n> { /// /// If you need more control over which $DATA attribute is available and picked up, /// you can use [`NtfsFile::attributes`] to iterate over all attributes of this file. - pub fn data<'f>(&'f self, data_stream_name: &str) -> Option>> { - // Create an iterator that emits all $DATA attributes. - let iter = self.attributes().filter(|attribute| { - // TODO: Replace by attribute.ty().contains() once https://github.com/rust-lang/rust/issues/62358 has landed. - attribute - .ty() - .map(|ty| ty == NtfsAttributeType::Data) - .unwrap_or(false) - }); - - for attribute in iter { - let name = iter_try!(attribute.name()); + pub fn data<'f, T>( + &'f self, + fs: &mut T, + data_stream_name: &str, + ) -> Option>> + where + T: Read + Seek, + { + let mut iter = self.attributes(); - if data_stream_name == name { - return Some(Ok(attribute)); + while let Some(item) = iter.next(fs) { + let item = iter_try!(item); + let attribute = item.to_attribute(); + + let ty = iter_try!(attribute.ty()); + if ty != NtfsAttributeType::Data { + continue; } + + let name = iter_try!(attribute.name()); + if name != data_stream_name { + continue; + } + + return Some(Ok(item)); } None diff --git a/src/index_record.rs b/src/index_record.rs index ddec81a..b05b6e4 100644 --- a/src/index_record.rs +++ b/src/index_record.rs @@ -1,7 +1,6 @@ // Copyright 2021 Colin Finck // SPDX-License-Identifier: GPL-2.0-or-later -use crate::attribute_value::NtfsNonResidentAttributeValue; use crate::error::{NtfsError, Result}; use crate::index_entry::{IndexNodeEntryRanges, NtfsIndexNodeEntries}; use crate::indexes::NtfsIndexEntryType; @@ -9,6 +8,7 @@ use crate::record::Record; use crate::record::RecordHeader; use crate::traits::NtfsReadSeek; use crate::types::Vcn; +use crate::value::non_resident_attribute::NtfsNonResidentAttributeValue; use binread::io::{Read, Seek}; use byteorder::{ByteOrder, LittleEndian}; use core::ops::Range; diff --git a/src/lib.rs b/src/lib.rs index 347932b..7eef943 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,6 @@ extern crate alloc; mod helpers; mod attribute; -mod attribute_value; mod boot_sector; mod error; mod file; @@ -28,9 +27,9 @@ mod time; mod traits; mod types; mod upcase_table; +pub mod value; pub use crate::attribute::*; -pub use crate::attribute_value::*; pub use crate::error::*; pub use crate::file::*; pub use crate::file_reference::*; diff --git a/src/ntfs.rs b/src/ntfs.rs index bb99e92..9ef8734 100644 --- a/src/ntfs.rs +++ b/src/ntfs.rs @@ -79,11 +79,22 @@ impl Ntfs { .checked_mul(self.file_record_size as u64) .ok_or(NtfsError::InvalidFileRecordNumber { file_record_number })?; + // The MFT may be split into multiple data runs, referenced by its $DATA attribute. + // We therefore read it just like any other non-resident attribute value. + // However, this code assumes that the MFT does not have an AttributeList! let mft = NtfsFile::new(&self, fs, self.mft_position, 0)?; - let mft_data_attribute = mft.data("").ok_or(NtfsError::AttributeNotFound { - position: self.mft_position, - ty: NtfsAttributeType::Data, - })??; + let mft_data_attribute = mft + .attributes_raw() + .find(|attribute| { + attribute + .ty() + .map(|ty| ty == NtfsAttributeType::Data) + .unwrap_or(false) + }) + .ok_or(NtfsError::AttributeNotFound { + position: self.mft_position, + ty: NtfsAttributeType::Data, + })?; let mut mft_data_value = mft_data_attribute.value()?; mft_data_value.seek(fs, SeekFrom::Start(offset))?; diff --git a/src/structured_values/attribute_list.rs b/src/structured_values/attribute_list.rs index da1f891..334b17c 100644 --- a/src/structured_values/attribute_list.rs +++ b/src/structured_values/attribute_list.rs @@ -1,10 +1,11 @@ // Copyright 2021 Colin Finck // SPDX-License-Identifier: GPL-2.0-or-later -use crate::attribute::NtfsAttributeType; -use crate::attribute_value::NtfsNonResidentAttributeValue; +use crate::attribute::{NtfsAttribute, NtfsAttributeType}; use crate::error::{NtfsError, Result}; +use crate::file::NtfsFile; use crate::file_reference::NtfsFileReference; +use crate::ntfs::Ntfs; use crate::string::NtfsString; use crate::structured_values::{ NtfsStructuredValue, NtfsStructuredValueFromNonResidentAttributeValue, @@ -12,6 +13,7 @@ use crate::structured_values::{ }; use crate::traits::NtfsReadSeek; use crate::types::Vcn; +use crate::value::non_resident_attribute::NtfsNonResidentAttributeValue; use arrayvec::ArrayVec; use binread::io::{Cursor, Read, Seek, SeekFrom}; use binread::{BinRead, BinReaderExt}; @@ -221,6 +223,37 @@ impl NtfsAttributeListEntry { Ok(()) } + pub fn to_attribute<'n, 'f>(&self, file: &'f NtfsFile<'n>) -> Result> { + let file_record_number = self.base_file_reference().file_record_number(); + assert_eq!( + file.file_record_number(), + file_record_number, + "The given NtfsFile's record number does not match the expected record number. \ + Always use NtfsAttributeListEntry::to_file to retrieve the correct NtfsFile." + ); + + let instance = self.instance(); + let ty = self.ty()?; + + file.attributes_raw() + .find(|attribute| { + attribute.instance() == instance + && attribute.ty().map(|attr_ty| attr_ty == ty).unwrap_or(false) + }) + .ok_or(NtfsError::AttributeNotFound { + position: file.position(), + ty, + }) + } + + pub fn to_file<'n, T>(&self, ntfs: &'n Ntfs, fs: &mut T) -> Result> + where + T: Read + Seek, + { + let file_record_number = self.base_file_reference().file_record_number(); + ntfs.file(fs, file_record_number) + } + /// Returns the type of this NTFS attribute, or [`NtfsError::UnsupportedAttributeType`] /// if it's an unknown type. pub fn ty(&self) -> Result { diff --git a/src/structured_values/file_name.rs b/src/structured_values/file_name.rs index 237c923..22f104e 100644 --- a/src/structured_values/file_name.rs +++ b/src/structured_values/file_name.rs @@ -197,7 +197,7 @@ mod tests { let mft = ntfs .file(&mut testfs1, KnownNtfsFileRecordNumber::MFT as u64) .unwrap(); - let mut mft_attributes = mft.attributes(); + let mut mft_attributes = mft.attributes_raw(); // Check the FileName attribute of the MFT. let attribute = mft_attributes.nth(1).unwrap(); diff --git a/src/structured_values/index_allocation.rs b/src/structured_values/index_allocation.rs index a1e52f2..a062a79 100644 --- a/src/structured_values/index_allocation.rs +++ b/src/structured_values/index_allocation.rs @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later use crate::attribute::NtfsAttributeType; -use crate::attribute_value::NtfsNonResidentAttributeValue; use crate::error::{NtfsError, Result}; use crate::index_record::NtfsIndexRecord; use crate::structured_values::index_root::NtfsIndexRoot; @@ -11,6 +10,7 @@ use crate::structured_values::{ }; use crate::traits::NtfsReadSeek; use crate::types::Vcn; +use crate::value::non_resident_attribute::NtfsNonResidentAttributeValue; use binread::io::{Read, Seek, SeekFrom}; use core::iter::FusedIterator; diff --git a/src/structured_values/mod.rs b/src/structured_values/mod.rs index a07764d..68971f4 100644 --- a/src/structured_values/mod.rs +++ b/src/structured_values/mod.rs @@ -22,8 +22,8 @@ pub use volume_information::*; pub use volume_name::*; use crate::attribute::NtfsAttributeType; -use crate::attribute_value::NtfsNonResidentAttributeValue; use crate::error::Result; +use crate::value::non_resident_attribute::NtfsNonResidentAttributeValue; use binread::io::{Read, Seek}; use bitflags::bitflags; diff --git a/src/structured_values/standard_information.rs b/src/structured_values/standard_information.rs index 9cf55fd..bcd1cee 100644 --- a/src/structured_values/standard_information.rs +++ b/src/structured_values/standard_information.rs @@ -135,7 +135,7 @@ mod tests { let mft = ntfs .file(&mut testfs1, KnownNtfsFileRecordNumber::MFT as u64) .unwrap(); - let mut mft_attributes = mft.attributes(); + let mut mft_attributes = mft.attributes_raw(); // Check the StandardInformation attribute of the MFT. let attribute = mft_attributes.nth(0).unwrap(); diff --git a/src/value/attribute_list_non_resident_attribute.rs b/src/value/attribute_list_non_resident_attribute.rs new file mode 100644 index 0000000..730b094 --- /dev/null +++ b/src/value/attribute_list_non_resident_attribute.rs @@ -0,0 +1,292 @@ +// Copyright 2021 Colin Finck +// SPDX-License-Identifier: GPL-2.0-or-later +// +//! This module implements a reader for a non-resident attribute value that is part of an AttributeList. +//! Such values are not only split up into data runs, but may also be continued by connected attributes which are listed in the same AttributeList. +//! This reader provides one contiguous data stream for all data runs in all connected attributes. +// +// It is important to note that `NtfsAttributeListNonResidentAttributeValue` can't just encapsulate `NtfsNonResidentAttributeValue` and provide one +// layer on top to connect the attributes! +// Connected attributes are stored in a way that the first attribute reports the entire data size and all further attributes report a zero value length. +// We have to go down to the data run level to get trustable lengths again, and this is what `NtfsAttributeListNonResidentAttributeValue` does here. + +use crate::attribute::{NtfsAttribute, NtfsAttributeType}; +use crate::error::{NtfsError, Result}; +use crate::file::NtfsFile; +use crate::ntfs::Ntfs; +use crate::structured_values::{NtfsAttributeListEntries, NtfsAttributeListEntry}; +use crate::traits::NtfsReadSeek; +use crate::value::non_resident_attribute::{DataRunsState, NtfsDataRuns, StreamState}; +use binread::io::{Read, Seek, SeekFrom}; + +#[derive(Clone, Debug)] +pub struct NtfsAttributeListNonResidentAttributeValue<'n, 'f> { + /// Reference to the base `Ntfs` object of this filesystem. + ntfs: &'n Ntfs, + /// An untouched copy of the `attribute_list_entries` passed in [`Self::new`] to rewind to the beginning when desired. + initial_attribute_list_entries: NtfsAttributeListEntries<'n, 'f>, + /// Iterator through all connected attributes of this attribute in the AttributeList. + connected_entries: AttributeListConnectedEntries<'n, 'f>, + /// File, location, and data runs iteration state of the current attribute. + attribute_state: Option>, + /// Iteration state of the current data run. + stream_state: StreamState, +} + +impl<'n, 'f> NtfsAttributeListNonResidentAttributeValue<'n, 'f> { + pub(crate) fn new( + ntfs: &'n Ntfs, + attribute_list_entries: NtfsAttributeListEntries<'n, 'f>, + instance: u16, + ty: NtfsAttributeType, + data_size: u64, + ) -> Self { + let connected_entries = + AttributeListConnectedEntries::new(attribute_list_entries.clone(), instance, ty); + + Self { + ntfs, + initial_attribute_list_entries: attribute_list_entries, + connected_entries, + attribute_state: None, + stream_state: StreamState::new(data_size), + } + } + + /// Returns the absolute current data seek position within the filesystem, in bytes. + /// This may be `None` if: + /// * The current seek position is outside the valid range, or + /// * The current data run is a "sparse" data run + pub fn data_position(&self) -> Option { + self.stream_state.data_position() + } + + pub fn len(&self) -> u64 { + self.stream_state.data_size() + } + + /// Returns whether we got another data run. + fn next_data_run(&mut self) -> Result { + // Do we have a file and a (non-resident) attribute to iterate through its data runs? + let attribute_state = match &mut self.attribute_state { + Some(attribute_state) => attribute_state, + None => return Ok(false), + }; + + // Get the state of the `NtfsDataRuns` iterator of that attribute. + let data_runs_state = match attribute_state.data_runs_state.take() { + Some(data_runs_state) => data_runs_state, + None => return Ok(false), + }; + + // Deserialize the state into an `NtfsDataRuns` iterator. + let attribute = NtfsAttribute::new( + &attribute_state.file, + attribute_state.attribute_offset, + None, + ); + let (data, position) = attribute.non_resident_value_data_and_position(); + let mut stream_data_runs = + NtfsDataRuns::from_state(self.ntfs, data, position, data_runs_state); + + // Do we have a next data run? Save that. + let stream_data_run = match stream_data_runs.next() { + Some(stream_data_run) => stream_data_run, + None => return Ok(false), + }; + let stream_data_run = stream_data_run?; + self.stream_state.set_stream_data_run(stream_data_run); + + // We got another data run, so serialize the updated `NtfsDataRuns` state for the next iteration. + // This step is skipped when we got no data run, because it means we have fully iterated this iterator (and hence also the attribute and file). + attribute_state.data_runs_state = Some(stream_data_runs.into_state()); + + Ok(true) + } + + /// Returns whether we got another connected attribute. + fn next_attribute(&mut self, fs: &mut T) -> Result + where + T: Read + Seek, + { + // Do we have another connected attribute? + let entry = match self.connected_entries.next(fs) { + Some(entry) => entry, + None => return Ok(false), + }; + + // Read the correspoding FILE record into an `NtfsFile` and get the corresponding `NtfsAttribute`. + let entry = entry?; + let file = entry.to_file(self.ntfs, fs)?; + let attribute = entry.to_attribute(&file)?; + let attribute_offset = attribute.offset(); + + // Connected attributes must always be non-resident. Verify that. + if attribute.is_resident() { + return Err(NtfsError::UnexpectedResidentAttribute { + position: attribute.position(), + }); + } + + // Get an `NtfsDataRuns` iterator for iterating through the attribute value's data runs. + let (data, position) = attribute.non_resident_value_data_and_position(); + let mut stream_data_runs = NtfsDataRuns::new(self.ntfs, data, position); + + // Get the first data run already here to save time and let `data_position` return something meaningful. + let stream_data_run = match stream_data_runs.next() { + Some(stream_data_run) => stream_data_run, + None => return Ok(false), + }; + let stream_data_run = stream_data_run?; + self.stream_state.set_stream_data_run(stream_data_run); + + // Store the `NtfsFile` and serialize the `NtfsDataRuns` state for a later iteration. + let data_runs_state = Some(stream_data_runs.into_state()); + self.attribute_state = Some(AttributeState { + file, + attribute_offset, + data_runs_state, + }); + + Ok(true) + } +} + +impl<'n, 'f> NtfsReadSeek for NtfsAttributeListNonResidentAttributeValue<'n, 'f> { + fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result + where + T: Read + Seek, + { + let mut bytes_read = 0usize; + + while bytes_read < buf.len() { + // Read from the current data run if there is one. + if self.stream_state.read_data_run(fs, buf, &mut bytes_read)? { + // We read something, so check the loop condition again if we need to read more. + continue; + } + + // Move to the next data run of the current attribute. + if self.next_data_run()? { + // We got another data run of the current attribute, so read again. + continue; + } + + // Move to the first data run of the next connected attribute. + if self.next_attribute(fs)? { + // We got another attribute, so read again. + continue; + } else { + // We read everything we could. + break; + } + } + + Ok(bytes_read) + } + + fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result + where + T: Read + Seek, + { + let pos = self.stream_state.optimize_seek(pos, self.len())?; + + let mut bytes_left_to_seek = match pos { + SeekFrom::Start(n) => { + // Rewind to the very beginning. + self.connected_entries.attribute_list_entries = + Some(self.initial_attribute_list_entries.clone()); + self.attribute_state = None; + self.stream_state = StreamState::new(self.len()); + n + } + SeekFrom::Current(n) if n >= 0 => n as u64, + _ => unreachable!(), + }; + + while bytes_left_to_seek > 0 { + // Seek inside the current data run if there is one. + if self + .stream_state + .seek_data_run(fs, pos, &mut bytes_left_to_seek)? + { + // We have reached our final seek position. + break; + } + + // Move to the next data run of the current attribute. + if self.next_data_run()? { + // We got another data run of the current attribute, so seek some more. + continue; + } + + // Move to the first data run of the next connected attribute. + if self.next_attribute(fs)? { + // We got another connected attribute, so seek some more. + continue; + } else { + // We seeked as far as we could. + break; + } + } + + match pos { + SeekFrom::Start(n) => self.stream_state.set_stream_position(n), + SeekFrom::Current(n) => self + .stream_state + .set_stream_position(self.stream_position() + n as u64), + _ => unreachable!(), + } + + Ok(self.stream_position()) + } + + fn stream_position(&self) -> u64 { + self.stream_state.stream_position() + } +} + +#[derive(Clone, Debug)] +struct AttributeListConnectedEntries<'n, 'f> { + attribute_list_entries: Option>, + instance: u16, + ty: NtfsAttributeType, +} + +impl<'n, 'f> AttributeListConnectedEntries<'n, 'f> { + fn new( + attribute_list_entries: NtfsAttributeListEntries<'n, 'f>, + instance: u16, + ty: NtfsAttributeType, + ) -> Self { + Self { + attribute_list_entries: Some(attribute_list_entries), + instance, + ty, + } + } + + fn next(&mut self, fs: &mut T) -> Option> + where + T: Read + Seek, + { + let attribute_list_entries = self.attribute_list_entries.as_mut()?; + + let entry = iter_try!(attribute_list_entries.next(fs)?); + if entry.instance() == self.instance && iter_try!(entry.ty()) == self.ty { + Some(Ok(entry)) + } else { + self.attribute_list_entries = None; + None + } + } +} + +#[derive(Clone, Debug)] +struct AttributeState<'n> { + file: NtfsFile<'n>, + attribute_offset: usize, + /// We cannot store an `NtfsDataRuns` here, because it has a reference to the `NtfsFile` that is also stored here. + /// This is why we have to go via `DataRunsState` in an `Option` to take() it and deserialize it into an `NtfsDataRuns` whenever necessary. + data_runs_state: Option, +} diff --git a/src/value/mod.rs b/src/value/mod.rs new file mode 100644 index 0000000..fb39a42 --- /dev/null +++ b/src/value/mod.rs @@ -0,0 +1,157 @@ +// Copyright 2021 Colin Finck +// SPDX-License-Identifier: GPL-2.0-or-later + +pub(crate) mod attribute_list_non_resident_attribute; +pub(crate) mod non_resident_attribute; +pub(crate) mod slice; + +use binread::io; +use binread::io::{Read, Seek, SeekFrom}; + +use crate::error::{NtfsError, Result}; +use crate::traits::NtfsReadSeek; +use attribute_list_non_resident_attribute::NtfsAttributeListNonResidentAttributeValue; +use non_resident_attribute::NtfsNonResidentAttributeValue; +use slice::NtfsSliceValue; + +#[derive(Clone, Debug)] +pub enum NtfsValue<'n, 'f> { + Slice(NtfsSliceValue<'f>), + NonResidentAttribute(NtfsNonResidentAttributeValue<'n, 'f>), + AttributeListNonResidentAttribute(NtfsAttributeListNonResidentAttributeValue<'n, 'f>), +} + +impl<'n, 'f> NtfsValue<'n, 'f> { + pub fn attach<'a, T>(self, fs: &'a mut T) -> NtfsValueAttached<'n, 'f, 'a, T> + where + T: Read + Seek, + { + NtfsValueAttached::new(fs, self) + } + + pub fn data_position(&self) -> Option { + match self { + Self::Slice(inner) => inner.data_position(), + Self::NonResidentAttribute(inner) => inner.data_position(), + Self::AttributeListNonResidentAttribute(inner) => inner.data_position(), + } + } + + pub fn len(&self) -> u64 { + match self { + Self::Slice(inner) => inner.len(), + Self::NonResidentAttribute(inner) => inner.len(), + Self::AttributeListNonResidentAttribute(inner) => inner.len(), + } + } +} + +impl<'n, 'f> NtfsReadSeek for NtfsValue<'n, 'f> { + fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result + where + T: Read + Seek, + { + match self { + Self::Slice(inner) => inner.read(fs, buf), + Self::NonResidentAttribute(inner) => inner.read(fs, buf), + Self::AttributeListNonResidentAttribute(inner) => inner.read(fs, buf), + } + } + + fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result + where + T: Read + Seek, + { + match self { + Self::Slice(inner) => inner.seek(fs, pos), + Self::NonResidentAttribute(inner) => inner.seek(fs, pos), + Self::AttributeListNonResidentAttribute(inner) => inner.seek(fs, pos), + } + } + + fn stream_position(&self) -> u64 { + match self { + Self::Slice(inner) => inner.stream_position(), + Self::NonResidentAttribute(inner) => inner.stream_position(), + Self::AttributeListNonResidentAttribute(inner) => inner.stream_position(), + } + } +} + +pub struct NtfsValueAttached<'n, 'f, 'a, T: Read + Seek> { + fs: &'a mut T, + value: NtfsValue<'n, 'f>, +} + +impl<'n, 'f, 'a, T> NtfsValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn new(fs: &'a mut T, value: NtfsValue<'n, 'f>) -> Self { + Self { fs, value } + } + + pub fn data_position(&self) -> Option { + self.value.data_position() + } + + pub fn detach(self) -> NtfsValue<'n, 'f> { + self.value + } + + pub fn len(&self) -> u64 { + self.value.len() + } +} + +impl<'n, 'f, 'a, T> Read for NtfsValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.value.read(self.fs, buf).map_err(io::Error::from) + } +} + +impl<'n, 'f, 'a, T> Seek for NtfsValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.value.seek(self.fs, pos).map_err(io::Error::from) + } +} + +pub(crate) fn seek_contiguous( + stream_position: &mut u64, + length: u64, + pos: SeekFrom, +) -> Result { + // This implementation is taken from https://github.com/rust-lang/rust/blob/18c524fbae3ab1bf6ed9196168d8c68fc6aec61a/library/std/src/io/cursor.rs + // It handles all signed/unsigned arithmetics properly and outputs the known `io` error message. + let (base_pos, offset) = match pos { + SeekFrom::Start(n) => { + *stream_position = n; + return Ok(n); + } + SeekFrom::End(n) => (length, n), + SeekFrom::Current(n) => (*stream_position, n), + }; + + let new_pos = if offset >= 0 { + base_pos.checked_add(offset as u64) + } else { + base_pos.checked_sub(offset.wrapping_neg() as u64) + }; + + match new_pos { + Some(n) => { + *stream_position = n; + Ok(*stream_position) + } + None => Err(NtfsError::Io(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + ))), + } +} diff --git a/src/value/non_resident_attribute.rs b/src/value/non_resident_attribute.rs new file mode 100644 index 0000000..4639261 --- /dev/null +++ b/src/value/non_resident_attribute.rs @@ -0,0 +1,670 @@ +// Copyright 2021 Colin Finck +// SPDX-License-Identifier: GPL-2.0-or-later +// +//! This module implements a reader for a non-resident attribute value (that is not part of an AttributeList). +//! Non-resident attribute values are split up into one or more data runs, which are spread across the filesystem. +//! This reader provides one contiguous data stream for all data runs. + +use core::convert::TryFrom; +use core::iter::FusedIterator; +use core::mem; + +use binread::io; +use binread::io::Cursor; +use binread::io::{Read, Seek, SeekFrom}; +use binread::BinRead; + +use super::seek_contiguous; +use crate::error::{NtfsError, Result}; +use crate::ntfs::Ntfs; +use crate::traits::NtfsReadSeek; +use crate::types::{Lcn, Vcn}; + +#[derive(Clone, Debug)] +pub struct NtfsNonResidentAttributeValue<'n, 'f> { + /// Reference to the base `Ntfs` object of this filesystem. + ntfs: &'n Ntfs, + /// Attribute bytes where the data run information of this non-resident value is stored on the filesystem. + data: &'f [u8], + /// Absolute position of the data run information within the filesystem, in bytes. + position: u64, + /// Iterator of data runs used for reading/seeking. + stream_data_runs: NtfsDataRuns<'n, 'f>, + /// Iteration state of the current data run. + stream_state: StreamState, +} + +impl<'n, 'f> NtfsNonResidentAttributeValue<'n, 'f> { + pub(crate) fn new( + ntfs: &'n Ntfs, + data: &'f [u8], + position: u64, + data_size: u64, + ) -> Result { + let mut stream_data_runs = NtfsDataRuns::new(ntfs, data, position); + let mut stream_state = StreamState::new(data_size); + + // Get the first data run already here to let `data_position` return something meaningful. + if let Some(stream_data_run) = stream_data_runs.next() { + let stream_data_run = stream_data_run?; + stream_state.set_stream_data_run(stream_data_run); + } + + Ok(Self { + ntfs, + data, + position, + stream_data_runs, + stream_state, + }) + } + + pub fn attach<'a, T>( + self, + fs: &'a mut T, + ) -> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> + where + T: Read + Seek, + { + NtfsNonResidentAttributeValueAttached::new(fs, self) + } + + /// Returns the absolute current data seek position within the filesystem, in bytes. + /// This may be `None` if: + /// * The current seek position is outside the valid range, or + /// * The current data run is a "sparse" data run + pub fn data_position(&self) -> Option { + self.stream_state.data_position() + } + + pub fn data_runs(&self) -> NtfsDataRuns<'n, 'f> { + NtfsDataRuns::new(self.ntfs, self.data, self.position) + } + + pub fn len(&self) -> u64 { + self.stream_state.data_size() + } + + /// Returns whether we got another data run. + fn next_data_run(&mut self) -> Result { + let stream_data_run = match self.stream_data_runs.next() { + Some(stream_data_run) => stream_data_run, + None => return Ok(false), + }; + let stream_data_run = stream_data_run?; + self.stream_state.set_stream_data_run(stream_data_run); + + Ok(true) + } + + pub fn ntfs(&self) -> &'n Ntfs { + self.ntfs + } + + /// Returns the absolute position of the data run information within the filesystem, in bytes. + pub fn position(&self) -> u64 { + self.position + } +} + +impl<'n, 'f> NtfsReadSeek for NtfsNonResidentAttributeValue<'n, 'f> { + fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result + where + T: Read + Seek, + { + let mut bytes_read = 0usize; + + while bytes_read < buf.len() { + // Read from the current data run if there is one. + if self.stream_state.read_data_run(fs, buf, &mut bytes_read)? { + // We read something, so check the loop condition again if we need to read more. + continue; + } + + // Move to the next data run. + if self.next_data_run()? { + // We got another data run, so read again. + continue; + } else { + // We read everything we could. + break; + } + } + + Ok(bytes_read) + } + + fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result + where + T: Read + Seek, + { + let pos = self.stream_state.optimize_seek(pos, self.len())?; + + let mut bytes_left_to_seek = match pos { + SeekFrom::Start(n) => { + // Rewind to the very beginning. + self.stream_data_runs = self.data_runs(); + self.stream_state = StreamState::new(self.len()); + n + } + SeekFrom::Current(n) if n >= 0 => n as u64, + _ => unreachable!(), + }; + + while bytes_left_to_seek > 0 { + // Seek inside the current data run if there is one. + if self + .stream_state + .seek_data_run(fs, pos, &mut bytes_left_to_seek)? + { + // We have reached our final seek position. + break; + } + + // Move to the next data run. + if self.next_data_run()? { + // We got another data run, so seek some more. + continue; + } else { + // We seeked as far as we could. + break; + } + } + + match pos { + SeekFrom::Start(n) => self.stream_state.set_stream_position(n), + SeekFrom::Current(n) => self + .stream_state + .set_stream_position(self.stream_position() + n as u64), + _ => unreachable!(), + } + + Ok(self.stream_position()) + } + + fn stream_position(&self) -> u64 { + self.stream_state.stream_position() + } +} + +pub struct NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T: Read + Seek> { + fs: &'a mut T, + value: NtfsNonResidentAttributeValue<'n, 'f>, +} + +impl<'n, 'f, 'a, T> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn new(fs: &'a mut T, value: NtfsNonResidentAttributeValue<'n, 'f>) -> Self { + Self { fs, value } + } + + pub fn data_position(&self) -> Option { + self.value.data_position() + } + + pub fn detach(self) -> NtfsNonResidentAttributeValue<'n, 'f> { + self.value + } + + pub fn len(&self) -> u64 { + self.value.len() + } +} + +impl<'n, 'f, 'a, T> Read for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.value.read(self.fs, buf).map_err(io::Error::from) + } +} + +impl<'n, 'f, 'a, T> Seek for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> +where + T: Read + Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.value.seek(self.fs, pos).map_err(io::Error::from) + } +} + +#[derive(Clone, Debug)] +pub struct NtfsDataRuns<'n, 'f> { + ntfs: &'n Ntfs, + data: &'f [u8], + position: u64, + state: DataRunsState, +} + +impl<'n, 'f> NtfsDataRuns<'n, 'f> { + pub(crate) fn new(ntfs: &'n Ntfs, data: &'f [u8], position: u64) -> Self { + let state = DataRunsState { + offset: 0, + previous_lcn: Lcn::from(0), + }; + + Self { + ntfs, + data, + position, + state, + } + } + + pub(crate) fn from_state( + ntfs: &'n Ntfs, + data: &'f [u8], + position: u64, + state: DataRunsState, + ) -> Self { + Self { + ntfs, + data, + position, + state, + } + } + + pub(crate) fn into_state(self) -> DataRunsState { + self.state + } + + pub fn position(&self) -> u64 { + self.position + self.state.offset as u64 + } + + fn read_variable_length_bytes( + &self, + cursor: &mut Cursor<&[u8]>, + byte_count: u8, + ) -> Result<[u8; 8]> { + const MAX_BYTE_COUNT: u8 = mem::size_of::() as u8; + + if byte_count > MAX_BYTE_COUNT { + return Err(NtfsError::InvalidByteCountInDataRunHeader { + position: self.position(), + expected: byte_count, + actual: MAX_BYTE_COUNT, + }); + } + + let mut buf = [0u8; MAX_BYTE_COUNT as usize]; + cursor.read_exact(&mut buf[..byte_count as usize])?; + + Ok(buf) + } + + fn read_variable_length_signed_integer( + &self, + cursor: &mut Cursor<&[u8]>, + byte_count: u8, + ) -> Result { + let buf = self.read_variable_length_bytes(cursor, byte_count)?; + let mut integer = i64::from_le_bytes(buf); + + // We have read `byte_count` bytes into a zeroed buffer and just interpreted that as an `i64`. + // Sign-extend `integer` to make it replicate the proper value. + let unused_bits = (mem::size_of::() as u32 - byte_count as u32) * 8; + integer = integer.wrapping_shl(unused_bits).wrapping_shr(unused_bits); + + Ok(integer) + } + + fn read_variable_length_unsigned_integer( + &self, + cursor: &mut Cursor<&[u8]>, + byte_count: u8, + ) -> Result { + let buf = self.read_variable_length_bytes(cursor, byte_count)?; + let integer = u64::from_le_bytes(buf); + Ok(integer) + } +} + +impl<'n, 'f> Iterator for NtfsDataRuns<'n, 'f> { + type Item = Result; + + fn next(&mut self) -> Option> { + if self.state.offset >= self.data.len() { + return None; + } + + // Read the single header byte. + let mut cursor = Cursor::new(&self.data[self.state.offset..]); + let header = iter_try!(u8::read(&mut cursor)); + + // A zero byte marks the end of the data runs. + if header == 0 { + // Ensure that any further call uses the fast path above. + self.state.offset = self.data.len(); + return None; + } + + // The lower nibble indicates the length of the following cluster count variable length integer. + let cluster_count_byte_count = header & 0x0f; + let cluster_count = iter_try!( + self.read_variable_length_unsigned_integer(&mut cursor, cluster_count_byte_count) + ); + + // The upper nibble indicates the length of the following VCN variable length integer. + let vcn_byte_count = (header & 0xf0) >> 4; + let vcn = Vcn::from(iter_try!( + self.read_variable_length_signed_integer(&mut cursor, vcn_byte_count) + )); + + // Turn the read VCN into an absolute LCN. + let lcn = iter_try!(self.state.previous_lcn.checked_add(vcn).ok_or({ + NtfsError::InvalidVcnInDataRunHeader { + position: NtfsDataRuns::position(self), + vcn, + previous_lcn: self.state.previous_lcn, + } + })); + self.state.previous_lcn = lcn; + + // Only advance after having checked for success. + // In case of an error, a subsequent call shall output the same error again. + let bytes_to_advance = cursor.stream_position().unwrap() as usize; + self.state.offset += bytes_to_advance; + + let data_run = iter_try!(NtfsDataRun::new(self.ntfs, lcn, cluster_count)); + Some(Ok(data_run)) + } +} + +impl<'n, 'f> FusedIterator for NtfsDataRuns<'n, 'f> {} + +#[derive(Clone, Debug)] +pub(crate) struct DataRunsState { + offset: usize, + previous_lcn: Lcn, +} + +/// Describes a single NTFS data run, which is a continuous cluster range of a non-resident value. +/// +/// A data run's size is a multiple of the cluster size configured for the filesystem. +/// However, a data run does not know about the actual size used by data. This information is only available in the corresponding attribute. +/// Keep this in mind when doing reads and seeks on data runs. You may end up on allocated but unused data. +#[derive(Clone, Debug)] +pub struct NtfsDataRun { + /// Absolute position of the data run within the filesystem, in bytes. + /// This may be zero if this is a "sparse" data run. + position: u64, + /// Total allocated size of the data run, in bytes. + /// The actual size used by data may be lower, but a data run does not know about that. + allocated_size: u64, + /// Current relative position within the data run value, in bytes. + stream_position: u64, +} + +impl NtfsDataRun { + pub(crate) fn new(ntfs: &Ntfs, lcn: Lcn, cluster_count: u64) -> Result { + let position = lcn.position(ntfs)?; + let allocated_size = cluster_count + .checked_mul(ntfs.cluster_size() as u64) + .ok_or(NtfsError::InvalidClusterCount { cluster_count })?; + + Ok(Self { + position, + allocated_size, + stream_position: 0, + }) + } + + /// Returns the absolute current data seek position within the filesystem, in bytes. + /// This may be `None` if: + /// * The current seek position is outside the valid range, or + /// * The data run is a "sparse" data run + pub fn data_position(&self) -> Option { + if self.position > 0 && self.stream_position < self.len() { + Some(self.position + self.stream_position) + } else { + None + } + } + + pub fn len(&self) -> u64 { + self.allocated_size + } + + pub(crate) fn remaining_len(&self) -> u64 { + self.len().saturating_sub(self.stream_position) + } +} + +impl NtfsReadSeek for NtfsDataRun { + fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result + where + T: Read + Seek, + { + if self.remaining_len() == 0 { + return Ok(0); + } + + let bytes_to_read = usize::min(buf.len(), self.remaining_len() as usize); + let work_slice = &mut buf[..bytes_to_read]; + + if self.position == 0 { + // This is a sparse data run. + work_slice.fill(0); + } else { + // This data run contains "real" data. + // We have already performed all necessary sanity checks above, so we can just unwrap here. + fs.seek(SeekFrom::Start(self.data_position().unwrap()))?; + fs.read(work_slice)?; + } + + self.stream_position += bytes_to_read as u64; + Ok(bytes_to_read) + } + + fn seek(&mut self, _fs: &mut T, pos: SeekFrom) -> Result + where + T: Read + Seek, + { + let length = self.len(); + seek_contiguous(&mut self.stream_position, length, pos) + } + + fn stream_position(&self) -> u64 { + self.stream_position + } +} + +#[derive(Clone, Debug)] +pub(crate) struct StreamState { + /// Current data run we are reading from. + stream_data_run: Option, + /// Current relative position within the entire value, in bytes. + stream_position: u64, + /// Total (used) data size, in bytes. + data_size: u64, +} + +impl StreamState { + pub(crate) const fn new(data_size: u64) -> Self { + Self { + stream_data_run: None, + stream_position: 0, + data_size, + } + } + + /// Returns the absolute current data seek position within the filesystem, in bytes. + /// This may be `None` if: + /// * The current seek position is outside the valid range, or + /// * The current data run is a "sparse" data run + pub(crate) fn data_position(&self) -> Option { + let stream_data_run = self.stream_data_run.as_ref()?; + stream_data_run.data_position() + } + + /// Returns the total (used) data size of the value, in bytes. + pub(crate) fn data_size(&self) -> u64 { + self.data_size + } + + pub(crate) fn optimize_seek(&self, pos: SeekFrom, data_size: u64) -> Result { + let mut pos = self.simplify_seek(pos, data_size)?; + + // Translate `SeekFrom::Start(n)` into a more efficient `SeekFrom::Current` if n >= self.stream_position. + // We don't need to traverse data runs from the very beginning then. + if let SeekFrom::Start(n) = pos { + if let Some(n_from_current) = n.checked_sub(self.stream_position()) { + if let Ok(signed_n_from_current) = i64::try_from(n_from_current) { + pos = SeekFrom::Current(signed_n_from_current); + } + } + } + + Ok(pos) + } + + /// Simplifies any [`SeekFrom`] to the two cases [`SeekFrom::Start(n)`] and [`SeekFrom::Current(n)`], with n >= 0. + /// This is necessary, because an NTFS data run has necessary information for the next data run, but not the other way round. + /// Hence, we can't efficiently move backwards. + fn simplify_seek(&self, pos: SeekFrom, data_size: u64) -> Result { + match pos { + SeekFrom::Start(n) => { + // Seek n bytes from the very beginning. + return Ok(SeekFrom::Start(n)); + } + SeekFrom::End(n) => { + if n >= 0 { + if let Some(bytes_to_seek) = data_size.checked_add(n as u64) { + // Seek data_size + n bytes from the very beginning. + return Ok(SeekFrom::Start(bytes_to_seek)); + } + } else { + if let Some(bytes_to_seek) = data_size.checked_sub(n.wrapping_neg() as u64) { + // Seek data_size + n bytes (with n being negative) from the very beginning. + return Ok(SeekFrom::Start(bytes_to_seek)); + } + } + } + SeekFrom::Current(n) => { + if n >= 0 { + if self.stream_position().checked_add(n as u64).is_some() { + // Seek n bytes from the current position. + // This is an optimization for the common case, as we don't need to traverse all + // data runs from the very beginning. + return Ok(SeekFrom::Current(n)); + } + } else { + if let Some(bytes_to_seek) = + self.stream_position().checked_sub(n.wrapping_neg() as u64) + { + // Seek stream_position + n bytes (with n being negative) from the very beginning. + return Ok(SeekFrom::Start(bytes_to_seek)); + } + } + } + } + + Err(NtfsError::Io(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + ))) + } + + /// Returns whether we read some bytes. + pub(crate) fn read_data_run( + &mut self, + fs: &mut T, + buf: &mut [u8], + bytes_read: &mut usize, + ) -> Result + where + T: Read + Seek, + { + // Is there a data run to read from? + let data_run = match &mut self.stream_data_run { + Some(data_run) => data_run, + None => return Ok(false), + }; + + // Have we already seeked past the size of the data run? + if data_run.stream_position() >= data_run.len() { + return Ok(false); + } + + // We also must not read past the (used) data size of the entire value. + // (remember that a data run only knows about its allocated size, not its used size!) + let remaining_data_size = self.data_size.saturating_sub(self.stream_position); + if remaining_data_size == 0 { + return Ok(false); + } + + // Read up to the buffer length or up to the (used) data size, whatever comes first. + let start = *bytes_read; + let remaining_buf_len = buf.len() - start; + let end = start + usize::min(remaining_buf_len, remaining_data_size as usize); + + // Perform the actual read. + let bytes_read_in_data_run = data_run.read(fs, &mut buf[start..end])?; + *bytes_read += bytes_read_in_data_run; + self.stream_position += bytes_read_in_data_run as u64; + + Ok(true) + } + + /// Returns whether we have reached the final seek position within this data run and can therefore stop seeking. + /// + /// In all other cases, the caller should move to the next data run and seek again. + pub(crate) fn seek_data_run( + &mut self, + fs: &mut T, + bytes_to_seek: SeekFrom, + bytes_left_to_seek: &mut u64, + ) -> Result + where + T: Read + Seek, + { + // Is there a data run to seek in? + let data_run = match &mut self.stream_data_run { + Some(data_run) => data_run, + None => return Ok(false), + }; + + if *bytes_left_to_seek < data_run.remaining_len() { + // We have found the right data run, now we have to seek inside the data run. + // + // If we were called to seek from the very beginning, we can be sure that this + // data run is also seeked from the beginning. + // Hence, we can use SeekFrom::Start and use the full u64 range. + // + // If we were called to seek from the current position, we have to use + // SeekFrom::Current and can only use the positive part of the i64 range. + // This is no problem though, as `bytes_left_to_seek` was also created from a + // positive i64 value in that case. + let pos = match bytes_to_seek { + SeekFrom::Start(_) => SeekFrom::Start(*bytes_left_to_seek), + SeekFrom::Current(_) => SeekFrom::Current(*bytes_left_to_seek as i64), + _ => unreachable!(), + }; + + data_run.seek(fs, pos)?; + Ok(true) + } else { + // We can skip the entire data run. + *bytes_left_to_seek -= data_run.remaining_len(); + Ok(false) + } + } + + pub(crate) fn set_stream_data_run(&mut self, stream_data_run: NtfsDataRun) { + self.stream_data_run = Some(stream_data_run); + } + + pub(crate) fn set_stream_position(&mut self, stream_position: u64) { + self.stream_position = stream_position; + } + + /// Returns the current relative position within the entire value, in bytes. + pub(crate) fn stream_position(&self) -> u64 { + self.stream_position + } +} diff --git a/src/value/slice.rs b/src/value/slice.rs new file mode 100644 index 0000000..abfb0f9 --- /dev/null +++ b/src/value/slice.rs @@ -0,0 +1,86 @@ +// Copyright 2021 Colin Finck +// SPDX-License-Identifier: GPL-2.0-or-later +// +//! This module implements a reader for a value that is already in memory and can therefore be accessed via a slice. +//! This is the case for all resident attribute values and index record values. +//! Such values are part of NTFS records. NTFS records can't be directly read from the filesystem, which is why they +//! are always read into a buffer first and then fixed up in memory. +//! Further accesses to the record data can then happen via slices. + +use binread::io::{Read, Seek, SeekFrom}; + +use super::seek_contiguous; +use crate::error::Result; +use crate::traits::NtfsReadSeek; + +#[derive(Clone, Debug)] +pub struct NtfsSliceValue<'f> { + data: &'f [u8], + position: u64, + stream_position: u64, +} + +impl<'f> NtfsSliceValue<'f> { + pub(crate) fn new(data: &'f [u8], position: u64) -> Self { + Self { + data, + position, + stream_position: 0, + } + } + + pub fn data(&self) -> &'f [u8] { + self.data + } + + /// Returns the absolute current data seek position within the filesystem, in bytes. + /// This may be `None` if the current seek position is outside the valid range. + pub fn data_position(&self) -> Option { + if self.stream_position < self.len() { + Some(self.position + self.stream_position) + } else { + None + } + } + + pub fn len(&self) -> u64 { + self.data.len() as u64 + } + + fn remaining_len(&self) -> u64 { + self.len().saturating_sub(self.stream_position) + } +} + +impl<'f> NtfsReadSeek for NtfsSliceValue<'f> { + fn read(&mut self, _fs: &mut T, buf: &mut [u8]) -> Result + where + T: Read + Seek, + { + if self.remaining_len() == 0 { + return Ok(0); + } + + let bytes_to_read = usize::min(buf.len(), self.remaining_len() as usize); + let work_slice = &mut buf[..bytes_to_read]; + + let start = self.stream_position as usize; + let end = start + bytes_to_read; + work_slice.copy_from_slice(&self.data[start..end]); + + self.stream_position += bytes_to_read as u64; + Ok(bytes_to_read) + } + + fn seek(&mut self, _fs: &mut T, pos: SeekFrom) -> Result + where + T: Read + Seek, + { + let length = self.len(); + seek_contiguous(&mut self.stream_position, length, pos) + } + + fn stream_position(&self) -> u64 { + self.stream_position + } +} -- cgit v1.2.3