// Copyright 2021 Colin Finck // SPDX-License-Identifier: MIT OR Apache-2.0 // //! This module implements a reader for a non-resident attribute value (that is not part of an Attribute List). //! Non-resident attribute values are split up into one or more data runs, which are spread across the filesystem. //! This reader provides one contiguous data stream for all data runs. use core::convert::TryFrom; use core::iter::FusedIterator; use core::mem; use binread::io; use binread::io::Cursor; use binread::io::{Read, Seek, SeekFrom}; use binread::BinRead; use super::seek_contiguous; use crate::error::{NtfsError, Result}; use crate::ntfs::Ntfs; use crate::traits::NtfsReadSeek; use crate::types::{Lcn, Vcn}; /// Reader for a non-resident attribute value (whose data is in a cluster range outside the File Record). #[derive(Clone, Debug)] pub struct NtfsNonResidentAttributeValue<'n, 'f> { /// Reference to the base `Ntfs` object of this filesystem. ntfs: &'n Ntfs, /// Attribute bytes where the Data Run information of this non-resident value is stored on the filesystem. data: &'f [u8], /// Absolute position of the Data Run information within the filesystem, in bytes. position: u64, /// Iterator of data runs used for reading/seeking. stream_data_runs: NtfsDataRuns<'n, 'f>, /// Iteration state of the current Data Run. stream_state: StreamState, } impl<'n, 'f> NtfsNonResidentAttributeValue<'n, 'f> { pub(crate) fn new( ntfs: &'n Ntfs, data: &'f [u8], position: u64, data_size: u64, ) -> Result { let mut stream_data_runs = NtfsDataRuns::new(ntfs, data, position); let mut stream_state = StreamState::new(data_size); // Get the first Data Run already here to let `data_position` return something meaningful. if let Some(stream_data_run) = stream_data_runs.next() { let stream_data_run = stream_data_run?; stream_state.set_stream_data_run(stream_data_run); } Ok(Self { ntfs, data, position, stream_data_runs, stream_state, }) } /// Returns a variant of this reader that implements [`Read`] and [`Seek`] /// by mutably borrowing the filesystem reader. pub fn attach<'a, T>( self, fs: &'a mut T, ) -> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> where T: Read + Seek, { NtfsNonResidentAttributeValueAttached::new(fs, self) } /// Returns the absolute current data seek position within the filesystem, in bytes. /// This may be `None` if: /// * The current seek position is outside the valid range, or /// * The current Data Run is a "sparse" Data Run pub fn data_position(&self) -> Option { self.stream_state.data_position() } /// Returns an iterator over all data runs of this non-resident attribute. pub fn data_runs(&self) -> NtfsDataRuns<'n, 'f> { NtfsDataRuns::new(self.ntfs, self.data, self.position) } /// Returns the total length of the non-resident attribute value data, in bytes. pub fn len(&self) -> u64 { self.stream_state.data_size() } /// Returns whether we got another Data Run. fn next_data_run(&mut self) -> Result { let stream_data_run = match self.stream_data_runs.next() { Some(stream_data_run) => stream_data_run, None => return Ok(false), }; let stream_data_run = stream_data_run?; self.stream_state.set_stream_data_run(stream_data_run); Ok(true) } /// Returns the [`Ntfs`] object reference associated to this value. pub fn ntfs(&self) -> &'n Ntfs { self.ntfs } /// Returns the absolute position of the Data Run information within the filesystem, in bytes. pub fn position(&self) -> u64 { self.position } } impl<'n, 'f> NtfsReadSeek for NtfsNonResidentAttributeValue<'n, 'f> { fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result where T: Read + Seek, { let mut bytes_read = 0usize; while bytes_read < buf.len() { // Read from the current Data Run if there is one. if self.stream_state.read_data_run(fs, buf, &mut bytes_read)? { // We read something, so check the loop condition again if we need to read more. continue; } // Move to the next Data Run. if self.next_data_run()? { // We got another Data Run, so read again. continue; } else { // We read everything we could. break; } } Ok(bytes_read) } fn seek(&mut self, fs: &mut T, pos: SeekFrom) -> Result where T: Read + Seek, { let pos = self.stream_state.optimize_seek(pos, self.len())?; let mut bytes_left_to_seek = match pos { SeekFrom::Start(n) => { // Rewind to the very beginning. self.stream_data_runs = self.data_runs(); self.stream_state = StreamState::new(self.len()); n } SeekFrom::Current(n) if n >= 0 => n as u64, _ => unreachable!(), }; while bytes_left_to_seek > 0 { // Seek inside the current Data Run if there is one. if self .stream_state .seek_data_run(fs, pos, &mut bytes_left_to_seek)? { // We have reached our final seek position. break; } // Move to the next Data Run. if self.next_data_run()? { // We got another Data Run, so seek some more. continue; } else { // We seeked as far as we could. break; } } match pos { SeekFrom::Start(n) => self.stream_state.set_stream_position(n), SeekFrom::Current(n) => self .stream_state .set_stream_position(self.stream_position() + n as u64), _ => unreachable!(), } Ok(self.stream_position()) } fn stream_position(&self) -> u64 { self.stream_state.stream_position() } } /// A variant of [`NtfsNonResidentAttributeValue`] that implements [`Read`] and [`Seek`] /// by mutably borrowing the filesystem reader. #[derive(Debug)] pub struct NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T: Read + Seek> { fs: &'a mut T, value: NtfsNonResidentAttributeValue<'n, 'f>, } impl<'n, 'f, 'a, T> NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> where T: Read + Seek, { fn new(fs: &'a mut T, value: NtfsNonResidentAttributeValue<'n, 'f>) -> Self { Self { fs, value } } /// Returns the absolute current data seek position within the filesystem, in bytes. /// This may be `None` if: /// * The current seek position is outside the valid range, or /// * The current Data Run is a "sparse" Data Run. pub fn data_position(&self) -> Option { self.value.data_position() } /// Consumes this reader and returns the inner [`NtfsNonResidentAttributeValue`]. pub fn detach(self) -> NtfsNonResidentAttributeValue<'n, 'f> { self.value } /// Returns the total length of the attribute value, in bytes. pub fn len(&self) -> u64 { self.value.len() } } impl<'n, 'f, 'a, T> Read for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> where T: Read + Seek, { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.value.read(self.fs, buf).map_err(io::Error::from) } } impl<'n, 'f, 'a, T> Seek for NtfsNonResidentAttributeValueAttached<'n, 'f, 'a, T> where T: Read + Seek, { fn seek(&mut self, pos: SeekFrom) -> io::Result { self.value.seek(self.fs, pos).map_err(io::Error::from) } } /// Iterator over /// all data runs of a non-resident attribute, /// returning an [`NtfsDataRun`] for each entry, /// implementing [`Iterator`] and [`FusedIterator`]. /// /// This iterator is returned from the [`NtfsNonResidentAttributeValue::data_runs`] function. #[derive(Clone, Debug)] pub struct NtfsDataRuns<'n, 'f> { ntfs: &'n Ntfs, data: &'f [u8], position: u64, state: DataRunsState, } impl<'n, 'f> NtfsDataRuns<'n, 'f> { pub(crate) fn new(ntfs: &'n Ntfs, data: &'f [u8], position: u64) -> Self { let state = DataRunsState { offset: 0, previous_lcn: Lcn::from(0), }; Self { ntfs, data, position, state, } } pub(crate) fn from_state( ntfs: &'n Ntfs, data: &'f [u8], position: u64, state: DataRunsState, ) -> Self { Self { ntfs, data, position, state, } } pub(crate) fn into_state(self) -> DataRunsState { self.state } /// Returns the absolute position of the current Data Run header within the filesystem, in bytes. pub fn position(&self) -> u64 { self.position + self.state.offset as u64 } fn read_variable_length_bytes( &self, cursor: &mut Cursor<&[u8]>, byte_count: u8, ) -> Result<[u8; 8]> { const MAX_BYTE_COUNT: u8 = mem::size_of::() as u8; if byte_count > MAX_BYTE_COUNT { return Err(NtfsError::InvalidByteCountInDataRunHeader { position: self.position(), expected: byte_count, actual: MAX_BYTE_COUNT, }); } let mut buf = [0u8; MAX_BYTE_COUNT as usize]; cursor.read_exact(&mut buf[..byte_count as usize])?; Ok(buf) } fn read_variable_length_signed_integer( &self, cursor: &mut Cursor<&[u8]>, byte_count: u8, ) -> Result { let buf = self.read_variable_length_bytes(cursor, byte_count)?; let mut integer = i64::from_le_bytes(buf); // We have read `byte_count` bytes into a zeroed buffer and just interpreted that as an `i64`. // Sign-extend `integer` to make it replicate the proper value. let unused_bits = (mem::size_of::() as u32 - byte_count as u32) * 8; integer = integer.wrapping_shl(unused_bits).wrapping_shr(unused_bits); Ok(integer) } fn read_variable_length_unsigned_integer( &self, cursor: &mut Cursor<&[u8]>, byte_count: u8, ) -> Result { let buf = self.read_variable_length_bytes(cursor, byte_count)?; let integer = u64::from_le_bytes(buf); Ok(integer) } } impl<'n, 'f> Iterator for NtfsDataRuns<'n, 'f> { type Item = Result; fn next(&mut self) -> Option> { if self.state.offset >= self.data.len() { return None; } // Read the single header byte. let mut cursor = Cursor::new(&self.data[self.state.offset..]); let header = iter_try!(u8::read(&mut cursor)); // A zero byte marks the end of the data runs. if header == 0 { // Ensure that any further call uses the fast path above. self.state.offset = self.data.len(); return None; } // The lower nibble indicates the length of the following cluster count variable length integer. let cluster_count_byte_count = header & 0x0f; let cluster_count = iter_try!( self.read_variable_length_unsigned_integer(&mut cursor, cluster_count_byte_count) ); // The upper nibble indicates the length of the following VCN variable length integer. let vcn_byte_count = (header & 0xf0) >> 4; let vcn = Vcn::from(iter_try!( self.read_variable_length_signed_integer(&mut cursor, vcn_byte_count) )); // Turn the read VCN into an absolute LCN. let lcn = iter_try!(self.state.previous_lcn.checked_add(vcn).ok_or({ NtfsError::InvalidVcnInDataRunHeader { position: NtfsDataRuns::position(self), vcn, previous_lcn: self.state.previous_lcn, } })); self.state.previous_lcn = lcn; // Only advance after having checked for success. // In case of an error, a subsequent call shall output the same error again. let bytes_to_advance = cursor.stream_position().unwrap() as usize; self.state.offset += bytes_to_advance; let data_run = iter_try!(NtfsDataRun::new(self.ntfs, lcn, cluster_count)); Some(Ok(data_run)) } } impl<'n, 'f> FusedIterator for NtfsDataRuns<'n, 'f> {} #[derive(Clone, Debug)] pub(crate) struct DataRunsState { offset: usize, previous_lcn: Lcn, } /// A single NTFS Data Run, which is a continuous cluster range of a non-resident value. /// /// A Data Run's size is a multiple of the cluster size configured for the filesystem. /// However, a Data Run does not know about the actual size used by data. This information is only available in the corresponding attribute. /// Keep this in mind when doing reads and seeks on data runs. You may end up on allocated but unused data. #[derive(Clone, Debug)] pub struct NtfsDataRun { /// Absolute position of the Data Run within the filesystem, in bytes. /// This may be zero if this is a "sparse" Data Run. position: u64, /// Total allocated size of the Data Run, in bytes. /// The actual size used by data may be lower, but a Data Run does not know about that. allocated_size: u64, /// Current relative position within the Data Run value, in bytes. stream_position: u64, } impl NtfsDataRun { pub(crate) fn new(ntfs: &Ntfs, lcn: Lcn, cluster_count: u64) -> Result { let position = lcn.position(ntfs)?; let allocated_size = cluster_count .checked_mul(ntfs.cluster_size() as u64) .ok_or(NtfsError::InvalidClusterCount { cluster_count })?; Ok(Self { position, allocated_size, stream_position: 0, }) } /// Returns the absolute current data seek position within the filesystem, in bytes. /// This may be `None` if: /// * The current seek position is outside the valid range, or /// * The Data Run is a "sparse" Data Run pub fn data_position(&self) -> Option { if self.position > 0 && self.stream_position < self.len() { Some(self.position + self.stream_position) } else { None } } /// Returns the allocated size of the Data Run, in bytes. pub fn len(&self) -> u64 { self.allocated_size } pub(crate) fn remaining_len(&self) -> u64 { self.len().saturating_sub(self.stream_position) } } impl NtfsReadSeek for NtfsDataRun { fn read(&mut self, fs: &mut T, buf: &mut [u8]) -> Result where T: Read + Seek, { if self.remaining_len() == 0 { return Ok(0); } let bytes_to_read = usize::min(buf.len(), self.remaining_len() as usize); let work_slice = &mut buf[..bytes_to_read]; if self.position == 0 { // This is a sparse Data Run. work_slice.fill(0); } else { // This Data Run contains "real" data. // We have already performed all necessary sanity checks above, so we can just unwrap here. fs.seek(SeekFrom::Start(self.data_position().unwrap()))?; fs.read(work_slice)?; } self.stream_position += bytes_to_read as u64; Ok(bytes_to_read) } fn seek(&mut self, _fs: &mut T, pos: SeekFrom) -> Result where T: Read + Seek, { let length = self.len(); seek_contiguous(&mut self.stream_position, length, pos) } fn stream_position(&self) -> u64 { self.stream_position } } #[derive(Clone, Debug)] pub(crate) struct StreamState { /// Current Data Run we are reading from. stream_data_run: Option, /// Current relative position within the entire value, in bytes. stream_position: u64, /// Total (used) data size, in bytes. data_size: u64, } impl StreamState { pub(crate) const fn new(data_size: u64) -> Self { Self { stream_data_run: None, stream_position: 0, data_size, } } /// Returns the absolute current data seek position within the filesystem, in bytes. /// This may be `None` if: /// * The current seek position is outside the valid range, or /// * The current Data Run is a "sparse" Data Run pub(crate) fn data_position(&self) -> Option { let stream_data_run = self.stream_data_run.as_ref()?; stream_data_run.data_position() } /// Returns the total (used) data size of the value, in bytes. pub(crate) fn data_size(&self) -> u64 { self.data_size } pub(crate) fn optimize_seek(&self, pos: SeekFrom, data_size: u64) -> Result { let mut pos = self.simplify_seek(pos, data_size)?; // Translate `SeekFrom::Start(n)` into a more efficient `SeekFrom::Current` if n >= self.stream_position. // We don't need to traverse data runs from the very beginning then. if let SeekFrom::Start(n) = pos { if let Some(n_from_current) = n.checked_sub(self.stream_position()) { if let Ok(signed_n_from_current) = i64::try_from(n_from_current) { pos = SeekFrom::Current(signed_n_from_current); } } } Ok(pos) } /// Simplifies any [`SeekFrom`] to the two cases [`SeekFrom::Start(n)`] and [`SeekFrom::Current(n)`], with n >= 0. /// This is necessary, because an NTFS Data Run has necessary information for the next Data Run, but not the other way round. /// Hence, we can't efficiently move backwards. fn simplify_seek(&self, pos: SeekFrom, data_size: u64) -> Result { match pos { SeekFrom::Start(n) => { // Seek n bytes from the very beginning. return Ok(SeekFrom::Start(n)); } SeekFrom::End(n) => { if n >= 0 { if let Some(bytes_to_seek) = data_size.checked_add(n as u64) { // Seek data_size + n bytes from the very beginning. return Ok(SeekFrom::Start(bytes_to_seek)); } } else { if let Some(bytes_to_seek) = data_size.checked_sub(n.wrapping_neg() as u64) { // Seek data_size + n bytes (with n being negative) from the very beginning. return Ok(SeekFrom::Start(bytes_to_seek)); } } } SeekFrom::Current(n) => { if n >= 0 { if self.stream_position().checked_add(n as u64).is_some() { // Seek n bytes from the current position. // This is an optimization for the common case, as we don't need to traverse all // data runs from the very beginning. return Ok(SeekFrom::Current(n)); } } else { if let Some(bytes_to_seek) = self.stream_position().checked_sub(n.wrapping_neg() as u64) { // Seek stream_position + n bytes (with n being negative) from the very beginning. return Ok(SeekFrom::Start(bytes_to_seek)); } } } } Err(NtfsError::Io(io::Error::new( io::ErrorKind::InvalidInput, "invalid seek to a negative or overflowing position", ))) } /// Returns whether we read some bytes. pub(crate) fn read_data_run( &mut self, fs: &mut T, buf: &mut [u8], bytes_read: &mut usize, ) -> Result where T: Read + Seek, { // Is there a Data Run to read from? let data_run = match &mut self.stream_data_run { Some(data_run) => data_run, None => return Ok(false), }; // Have we already seeked past the size of the Data Run? if data_run.stream_position() >= data_run.len() { return Ok(false); } // We also must not read past the (used) data size of the entire value. // (remember that a Data Run only knows about its allocated size, not its used size!) let remaining_data_size = self.data_size.saturating_sub(self.stream_position); if remaining_data_size == 0 { return Ok(false); } // Read up to the buffer length or up to the (used) data size, whatever comes first. let start = *bytes_read; let remaining_buf_len = buf.len() - start; let end = start + usize::min(remaining_buf_len, remaining_data_size as usize); // Perform the actual read. let bytes_read_in_data_run = data_run.read(fs, &mut buf[start..end])?; *bytes_read += bytes_read_in_data_run; self.stream_position += bytes_read_in_data_run as u64; Ok(true) } /// Returns whether we have reached the final seek position within this Data Run and can therefore stop seeking. /// /// In all other cases, the caller should move to the next Data Run and seek again. pub(crate) fn seek_data_run( &mut self, fs: &mut T, bytes_to_seek: SeekFrom, bytes_left_to_seek: &mut u64, ) -> Result where T: Read + Seek, { // Is there a Data Run to seek in? let data_run = match &mut self.stream_data_run { Some(data_run) => data_run, None => return Ok(false), }; if *bytes_left_to_seek < data_run.remaining_len() { // We have found the right Data Run, now we have to seek inside the Data Run. // // If we were called to seek from the very beginning, we can be sure that this // Data Run is also seeked from the beginning. // Hence, we can use SeekFrom::Start and use the full u64 range. // // If we were called to seek from the current position, we have to use // SeekFrom::Current and can only use the positive part of the i64 range. // This is no problem though, as `bytes_left_to_seek` was also created from a // positive i64 value in that case. let pos = match bytes_to_seek { SeekFrom::Start(_) => SeekFrom::Start(*bytes_left_to_seek), SeekFrom::Current(_) => SeekFrom::Current(*bytes_left_to_seek as i64), _ => unreachable!(), }; data_run.seek(fs, pos)?; Ok(true) } else { // We can skip the entire Data Run. *bytes_left_to_seek -= data_run.remaining_len(); Ok(false) } } pub(crate) fn set_stream_data_run(&mut self, stream_data_run: NtfsDataRun) { self.stream_data_run = Some(stream_data_run); } pub(crate) fn set_stream_position(&mut self, stream_position: u64) { self.stream_position = stream_position; } /// Returns the current relative position within the entire value, in bytes. pub(crate) fn stream_position(&self) -> u64 { self.stream_position } }