diff options
author | Omer BenAmram <omerbenamram@gmail.com> | 2019-05-23 18:42:17 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-23 18:42:17 +0300 |
commit | 963dcdaee596a3ab69a3eeec9c3403b4f6b0047b (patch) | |
tree | bb8069806db5563b5e7034f3bd1d6d5210edd8da | |
parent | 1c874804967b3eee6f7e204e0da73f4fbef71d71 (diff) | |
parent | 92c7a80671497acad380da54f13d6ffd9d965285 (diff) |
Merge pull request #1 from omerbenamram/feature/csv
CSV Output
-rw-r--r-- | Cargo.toml | 4 | ||||
-rw-r--r-- | src/attribute/header.rs | 26 | ||||
-rw-r--r-- | src/attribute/mod.rs | 84 | ||||
-rw-r--r-- | src/attribute/raw.rs | 44 | ||||
-rw-r--r-- | src/attribute/x10.rs | 21 | ||||
-rw-r--r-- | src/attribute/x30.rs | 53 | ||||
-rw-r--r-- | src/attribute/x40.rs | 40 | ||||
-rw-r--r-- | src/attribute/x80.rs | 31 | ||||
-rw-r--r-- | src/attribute/x90.rs | 31 | ||||
-rw-r--r-- | src/bin/mft_dump.rs | 83 | ||||
-rw-r--r-- | src/csv.rs | 113 | ||||
-rw-r--r-- | src/entry.rs | 189 | ||||
-rw-r--r-- | src/err.rs | 4 | ||||
-rw-r--r-- | src/lib.rs | 5 | ||||
-rw-r--r-- | src/mft.rs | 47 |
15 files changed, 562 insertions, 213 deletions
@@ -7,7 +7,7 @@ license = "MIT/Apache-2.0" readme = "README.md" version = "0.1.2-alpha.0" -authors = ["Omer Ben-Amram <omerbenamram@gmail.com>", "matthew seyer <matthew.seyer@gmail.com>"] +authors = ["Omer Ben-Amram <omerbenamram@gmail.com>"] edition = "2018" [dependencies] @@ -18,12 +18,14 @@ byteorder = "1.3.1" bitflags = "1.0.4" serde = {version = "1.0.91", features = ["derive"]} serde_json = "1.0.39" +csv = "1.0.7" env_logger = "0.6.1" snafu = {version="0.3.1", features = ["backtraces", "rust_1_30"]} num-traits = "0.2" num-derive = "0.2" winstructs = {git = "https://github.com/omerbenamram/winstructs.git", branch = "master"} cached = "0.8.0" +itertools = "0.8.0" [dependencies.chrono] version = "0.4.6" diff --git a/src/attribute/header.rs b/src/attribute/header.rs index 4920e9a..3d3121d 100644 --- a/src/attribute/header.rs +++ b/src/attribute/header.rs @@ -1,4 +1,4 @@ -use crate::attribute::{AttributeDataFlags, AttributeType}; +use crate::attribute::{AttributeDataFlags, MftAttributeType}; use crate::err::{self, Result}; use crate::utils::read_utf16_string; use crate::ReadSeek; @@ -11,8 +11,8 @@ use std::io::Read; /// Represents the union defined in /// https://docs.microsoft.com/en-us/windows/desktop/devnotes/attribute-record-header #[derive(Serialize, Clone, Debug)] -pub struct AttributeHeader { - pub type_code: AttributeType, +pub struct MftAttributeHeader { + pub type_code: MftAttributeType, /// The size of the attribute record, in bytes. /// This value reflects the required size for the record variant and is always rounded to the nearest quadword boundary. pub record_length: u32, @@ -39,17 +39,17 @@ pub enum ResidentialHeader { NonResident(NonResidentHeader), } -impl AttributeHeader { +impl MftAttributeHeader { /// Tries to read an AttributeHeader from the stream. /// Will return `None` if the type code is $END. - pub fn from_stream<S: ReadSeek>(stream: &mut S) -> Result<Option<AttributeHeader>> { + pub fn from_stream<S: ReadSeek>(stream: &mut S) -> Result<Option<MftAttributeHeader>> { let type_code_value = stream.read_u32::<LittleEndian>()?; if type_code_value == 0xFFFF_FFFF { return Ok(None); } - let type_code = match AttributeType::from_u32(type_code_value) { + let type_code = match MftAttributeType::from_u32(type_code_value) { Some(attribute_type) => attribute_type, None => { return err::UnknownAttributeType { @@ -94,7 +94,7 @@ impl AttributeHeader { String::new() }; - Ok(Some(AttributeHeader { + Ok(Some(MftAttributeHeader { type_code, record_length: attribute_size, form_code: resident_flag, @@ -190,8 +190,8 @@ impl NonResidentHeader { #[cfg(test)] mod tests { - use super::AttributeHeader; - use crate::attribute::AttributeType; + use super::MftAttributeHeader; + use crate::attribute::MftAttributeType; use std::io::Cursor; #[test] @@ -203,13 +203,13 @@ mod tests { let mut cursor = Cursor::new(raw); - let attribute_header = AttributeHeader::from_stream(&mut cursor) + let attribute_header = MftAttributeHeader::from_stream(&mut cursor) .expect("Should not be $End") .expect("Shold parse correctly"); assert_eq!( attribute_header.type_code, - AttributeType::StandardInformation + MftAttributeType::StandardInformation ); assert_eq!(attribute_header.record_length, 96); assert_eq!(attribute_header.form_code, 0); @@ -230,11 +230,11 @@ mod tests { let mut cursor = Cursor::new(raw); - let attribute_header = AttributeHeader::from_stream(&mut cursor) + let attribute_header = MftAttributeHeader::from_stream(&mut cursor) .expect("Should not be $End") .expect("Shold parse correctly"); - assert_eq!(attribute_header.type_code, AttributeType::DATA); + assert_eq!(attribute_header.type_code, MftAttributeType::DATA); assert_eq!(attribute_header.record_length, 80); assert_eq!(attribute_header.form_code, 1); assert_eq!(attribute_header.name_size, 0); diff --git a/src/attribute/mod.rs b/src/attribute/mod.rs index 8e6b100..8b3acde 100644 --- a/src/attribute/mod.rs +++ b/src/attribute/mod.rs @@ -2,42 +2,84 @@ pub mod header; pub mod raw; pub mod x10; pub mod x30; +pub mod x40; +pub mod x80; +pub mod x90; - -use crate::impl_serialize_for_bitflags; +use crate::err::Result; +use crate::{impl_serialize_for_bitflags, ReadSeek}; use bitflags::bitflags; -use num_traits::FromPrimitive; use crate::attribute::raw::RawAttribute; use crate::attribute::x10::StandardInfoAttr; use crate::attribute::x30::FileNameAttr; -use crate::attribute::header::AttributeHeader; - -use serde::{Serialize}; - +use crate::attribute::header::{MftAttributeHeader, ResidentHeader}; +use crate::attribute::x40::ObjectIdAttr; +use crate::attribute::x80::DataAttr; +use crate::attribute::x90::IndexRootAttr; +use serde::Serialize; #[derive(Serialize, Clone, Debug)] -pub struct Attribute { - pub header: AttributeHeader, +pub struct MftAttribute { + pub header: MftAttributeHeader, pub data: MftAttributeContent, } +impl MftAttributeContent { + pub fn from_stream_resident<S: ReadSeek>( + stream: &mut S, + header: &MftAttributeHeader, + resident: &ResidentHeader, + ) -> Result<Self> { + match header.type_code { + MftAttributeType::StandardInformation => Ok(MftAttributeContent::AttrX10( + StandardInfoAttr::from_reader(stream)?, + )), + MftAttributeType::FileName => Ok(MftAttributeContent::AttrX30( + FileNameAttr::from_stream(stream)?, + )), + // Resident DATA + MftAttributeType::DATA => Ok(MftAttributeContent::AttrX80(DataAttr::from_stream( + stream, + resident.data_size as usize, + )?)), + // Always Resident + MftAttributeType::ObjectId => Ok(MftAttributeContent::AttrX40( + ObjectIdAttr::from_stream(stream, resident.data_size as usize)?, + )), + // Always Resident + MftAttributeType::IndexRoot => Ok(MftAttributeContent::AttrX90( + IndexRootAttr::from_stream(stream)?, + )), + // An unparsed resident attribute + _ => Ok(MftAttributeContent::Raw(RawAttribute::from_stream( + stream, + header.type_code.clone(), + resident.data_size as usize, + )?)), + } + } +} + #[derive(Serialize, Clone, Debug)] #[serde(untagged)] pub enum MftAttributeContent { Raw(RawAttribute), + AttrX80(DataAttr), AttrX10(StandardInfoAttr), AttrX30(FileNameAttr), + AttrX40(ObjectIdAttr), + AttrX90(IndexRootAttr), /// Empty - used when data is non resident. None, } /// MFT Possible attribute types, from https://docs.microsoft.com/en-us/windows/desktop/devnotes/attribute-list-entry -#[derive(Serialize, Debug, Clone, FromPrimitive, PartialOrd, PartialEq)] +#[derive(Serialize, Debug, Clone, FromPrimitive, ToPrimitive, PartialOrd, PartialEq)] #[repr(u32)] -pub enum AttributeType { +pub enum MftAttributeType { /// File attributes (such as read-only and archive), time stamps (such as file creation and last modified), and the hard link count. StandardInformation = 0x10_u32, /// A list of attributes that make up the file and the file reference of the MFT file record in which each attribute is located. @@ -65,6 +107,26 @@ pub enum AttributeType { } bitflags! { + pub struct FileAttributeFlags: u32 { + const FILE_ATTRIBUTE_READONLY = 0x0000_0001; + const FILE_ATTRIBUTE_HIDDEN = 0x0000_0002; + const FILE_ATTRIBUTE_SYSTEM = 0x0000_0004; + const FILE_ATTRIBUTE_ARCHIVE = 0x0000_0020; + const FILE_ATTRIBUTE_DEVICE = 0x0000_0040; + const FILE_ATTRIBUTE_NORMAL = 0x0000_0080; + const FILE_ATTRIBUTE_TEMPORARY = 0x0000_0100; + const FILE_ATTRIBUTE_SPARSE_FILE = 0x0000_0200; + const FILE_ATTRIBUTE_REPARSE_POINT = 0x0000_0400; + const FILE_ATTRIBUTE_COMPRESSED = 0x0000_0800; + const FILE_ATTRIBUTE_OFFLINE = 0x0000_1000; + const FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 0x0000_2000; + const FILE_ATTRIBUTE_ENCRYPTED = 0x0000_4000; + } +} + +impl_serialize_for_bitflags! {FileAttributeFlags} + +bitflags! { #[derive(Default)] pub struct AttributeDataFlags: u16 { const IS_COMPRESSED = 0x0001; diff --git a/src/attribute/raw.rs b/src/attribute/raw.rs index bfc7fac..1e24969 100644 --- a/src/attribute/raw.rs +++ b/src/attribute/raw.rs @@ -1,15 +1,37 @@ -use crate::utils; -use serde::ser; +use crate::attribute::MftAttributeType; +use crate::err::{self, Result}; +use crate::{utils, ReadSeek}; +use serde::{ser, Serialize}; +use snafu::ResultExt; /// Placeholder attribute for currently unparsed attributes. -#[derive(Clone, Debug)] -pub struct RawAttribute(pub Vec<u8>); - -impl ser::Serialize for RawAttribute { - fn serialize<S>(&self, serializer: S) -> ::std::result::Result<S::Ok, S::Error> - where - S: ser::Serializer, - { - serializer.serialize_str(&utils::to_hex_string(&self.0).to_string()) +#[derive(Serialize, Clone, Debug)] +pub struct RawAttribute { + pub attribute_type: MftAttributeType, + #[serde(serialize_with = "data_as_hex")] + pub data: Vec<u8>, +} + +impl RawAttribute { + pub fn from_stream<S: ReadSeek>( + stream: &mut S, + attribute_type: MftAttributeType, + data_size: usize, + ) -> Result<Self> { + let mut data = vec![0_u8; data_size]; + + stream.read_exact(&mut data).context(err::IoError)?; + + Ok(RawAttribute { + attribute_type, + data, + }) } } + +fn data_as_hex<S>(x: &Vec<u8>, s: S) -> std::result::Result<S::Ok, S::Error> +where + S: ser::Serializer, +{ + s.serialize_str(&utils::to_hex_string(x)) +} diff --git a/src/attribute/x10.rs b/src/attribute/x10.rs index 7e08ec0..ce7a610 100644 --- a/src/attribute/x10.rs +++ b/src/attribute/x10.rs @@ -1,6 +1,7 @@ +use crate::attribute::FileAttributeFlags; use crate::err::{self, Result}; - use crate::ReadSeek; + use byteorder::{LittleEndian, ReadBytesExt}; use chrono::{DateTime, Utc}; use log::trace; @@ -14,7 +15,8 @@ pub struct StandardInfoAttr { pub modified: DateTime<Utc>, pub mft_modified: DateTime<Utc>, pub accessed: DateTime<Utc>, - pub file_flags: u32, + /// DOS File Permissions + pub file_flags: FileAttributeFlags, pub max_version: u32, pub version: u32, pub class_id: u32, @@ -33,8 +35,8 @@ impl StandardInfoAttr { /// /// ``` /// use mft::attribute::x10::StandardInfoAttr; + /// use mft::attribute::FileAttributeFlags; /// # use std::io::Cursor; - /// # fn test_standard_information() { /// let attribute_buffer: &[u8] = &[ /// 0x2F,0x6D,0xB6,0x6F,0x0C,0x97,0xCE,0x01,0x56,0xCD,0x1A,0x75,0x73,0xB5,0xCE,0x01, /// 0x56,0xCD,0x1A,0x75,0x73,0xB5,0xCE,0x01,0x56,0xCD,0x1A,0x75,0x73,0xB5,0xCE,0x01, @@ -45,18 +47,17 @@ impl StandardInfoAttr { /// /// let attribute = StandardInfoAttr::from_reader(&mut Cursor::new(attribute_buffer)).unwrap(); /// - /// assert_eq!(attribute.created.timestamp(), 130207518909951279); - /// assert_eq!(attribute.modified.timestamp(), 130240946730880342); - /// assert_eq!(attribute.mft_modified.timestamp(), 130240946730880342); - /// assert_eq!(attribute.accessed.timestamp(), 130240946730880342); - /// assert_eq!(attribute.file_flags, 32); + /// assert_eq!(attribute.created.timestamp(), 1376278290); + /// assert_eq!(attribute.modified.timestamp(), 1379621073); + /// assert_eq!(attribute.mft_modified.timestamp(), 1379621073); + /// assert_eq!(attribute.accessed.timestamp(), 1379621073); + /// assert_eq!(attribute.file_flags.bits(), 32); /// assert_eq!(attribute.max_version, 0); /// assert_eq!(attribute.version, 0); /// assert_eq!(attribute.class_id, 0); /// assert_eq!(attribute.security_id, 1456); /// assert_eq!(attribute.quota, 0); /// assert_eq!(attribute.usn, 8768215144); - /// # } /// ``` pub fn from_reader<S: ReadSeek>(reader: &mut S) -> Result<StandardInfoAttr> { trace!("Offset {}: StandardInfoAttr", reader.tell()?); @@ -78,7 +79,7 @@ impl StandardInfoAttr { modified, mft_modified, accessed, - file_flags: reader.read_u32::<LittleEndian>()?, + file_flags: FileAttributeFlags::from_bits_truncate(reader.read_u32::<LittleEndian>()?), max_version: reader.read_u32::<LittleEndian>()?, version: reader.read_u32::<LittleEndian>()?, class_id: reader.read_u32::<LittleEndian>()?, diff --git a/src/attribute/x30.rs b/src/attribute/x30.rs index 9fa4453..1e764fb 100644 --- a/src/attribute/x30.rs +++ b/src/attribute/x30.rs @@ -1,19 +1,30 @@ +use crate::attribute::FileAttributeFlags; use crate::err::{self, Result}; -use crate::{impl_serialize_for_bitflags, ReadSeek}; +use crate::ReadSeek; use log::trace; +use snafu::OptionExt; -use bitflags::bitflags; use byteorder::{LittleEndian, ReadBytesExt}; use encoding::all::UTF_16LE; use encoding::{DecoderTrap, Encoding}; use chrono::{DateTime, Utc}; +use num_traits::FromPrimitive; use serde::Serialize; use snafu::ResultExt; use winstructs::ntfs::mft_reference::MftReference; use winstructs::timestamp::WinTimestamp; +#[derive(FromPrimitive, Serialize, Clone, Debug, PartialOrd, PartialEq)] +#[repr(u8)] +pub enum FileNamespace { + POSIX = 0, + Win32 = 1, + DOS = 2, + Win32AndDos = 3, +} + #[derive(Serialize, Clone, Debug)] pub struct FileNameAttr { pub parent: MftReference, @@ -26,30 +37,10 @@ pub struct FileNameAttr { pub flags: FileAttributeFlags, pub reparse_value: u32, pub name_length: u8, - pub namespace: u8, + pub namespace: FileNamespace, pub name: String, } -bitflags! { - pub struct FileAttributeFlags: u32 { - const FILE_ATTRIBUTE_READONLY = 0x0000_0001; - const FILE_ATTRIBUTE_HIDDEN = 0x0000_0002; - const FILE_ATTRIBUTE_SYSTEM = 0x0000_0004; - const FILE_ATTRIBUTE_ARCHIVE = 0x0000_0020; - const FILE_ATTRIBUTE_DEVICE = 0x0000_0040; - const FILE_ATTRIBUTE_NORMAL = 0x0000_0080; - const FILE_ATTRIBUTE_TEMPORARY = 0x0000_0100; - const FILE_ATTRIBUTE_SPARSE_FILE = 0x0000_0200; - const FILE_ATTRIBUTE_REPARSE_POINT = 0x0000_0400; - const FILE_ATTRIBUTE_COMPRESSED = 0x0000_0800; - const FILE_ATTRIBUTE_OFFLINE = 0x0000_1000; - const FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 0x0000_2000; - const FILE_ATTRIBUTE_ENCRYPTED = 0x0000_4000; - } -} - -impl_serialize_for_bitflags! {FileAttributeFlags} - impl FileNameAttr { /// Parse a Filename attrbiute buffer. /// @@ -60,7 +51,6 @@ impl FileNameAttr { /// ``` /// use mft::attribute::x30::FileNameAttr; /// # use std::io::Cursor; - /// # fn test_filename_attribute() { /// let attribute_buffer: &[u8] = &[ /// 0x05,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0xD5,0x2D,0x48,0x58,0x43,0x5F,0xCE,0x01, /// 0xD5,0x2D,0x48,0x58,0x43,0x5F,0xCE,0x01,0xD5,0x2D,0x48,0x58,0x43,0x5F,0xCE,0x01, @@ -72,19 +62,18 @@ impl FileNameAttr { /// /// let attribute = FileNameAttr::from_stream(&mut Cursor::new(attribute_buffer)).unwrap(); /// - /// assert_eq!(attribute.parent.entry, 1407374883553285); - /// assert_eq!(attribute.created.timestamp(), 130146182088895957); - /// assert_eq!(attribute.modified.timestamp(), 130146182088895957); - /// assert_eq!(attribute.mft_modified.timestamp(), 130146182088895957); - /// assert_eq!(attribute.accessed.timestamp(), 130146182088895957); + /// assert_eq!(attribute.parent.entry, 5); + /// assert_eq!(attribute.created.timestamp(), 1370144608); + /// assert_eq!(attribute.modified.timestamp(), 1370144608); + /// assert_eq!(attribute.mft_modified.timestamp(), 1370144608); + /// assert_eq!(attribute.accessed.timestamp(), 1370144608); /// assert_eq!(attribute.logical_size, 67108864); /// assert_eq!(attribute.physical_size, 67108864); - /// assert_eq!(attribute.flags, 6); + /// assert_eq!(attribute.flags.bits(), 6); /// assert_eq!(attribute.reparse_value, 0); /// assert_eq!(attribute.name_length, 8); /// assert_eq!(attribute.namespace, 3); /// assert_eq!(attribute.name, "$LogFile"); - /// # } /// ``` pub fn from_stream<S: ReadSeek>(stream: &mut S) -> Result<FileNameAttr> { trace!("Offset {}: FilenameAttr", stream.tell()?); @@ -108,6 +97,8 @@ impl FileNameAttr { let reparse_value = stream.read_u32::<LittleEndian>()?; let name_length = stream.read_u8()?; let namespace = stream.read_u8()?; + let namespace = + FileNamespace::from_u8(namespace).context(err::UnknownNamespace { namespace })?; let mut name_buffer = vec![0; (name_length as usize * 2) as usize]; stream.read_exact(&mut name_buffer)?; diff --git a/src/attribute/x40.rs b/src/attribute/x40.rs new file mode 100644 index 0000000..beea829 --- /dev/null +++ b/src/attribute/x40.rs @@ -0,0 +1,40 @@ +use crate::err::{self, Result}; +use crate::ReadSeek; +use serde::Serialize; +use snafu::ResultExt; +use winstructs::guid::Guid; + +/// $Data Attribute +#[derive(Serialize, Clone, Debug)] +pub struct ObjectIdAttr { + /// Unique Id assigned to file + pub object_id: Guid, + /// Volume where file was created + pub birth_volume_id: Option<Guid>, + /// Original Object Id of file + pub birth_object_id: Option<Guid>, + /// Domain in which object was created + pub domain_id: Option<Guid>, +} + +impl ObjectIdAttr { + /// Data size should be either 16 or 64 + pub fn from_stream<S: ReadSeek>(stream: &mut S, data_size: usize) -> Result<ObjectIdAttr> { + let object_id = Guid::from_stream(stream).context(err::FailedToReadGuid)?; + let (birth_volume_id, birth_object_id, domain_id) = if data_size == 64 { + let g1 = Guid::from_stream(stream).context(err::FailedToReadGuid)?; + let g2 = Guid::from_stream(stream).context(err::FailedToReadGuid)?; + let g3 = Guid::from_stream(stream).context(err::FailedToReadGuid)?; + (Some(g1), Some(g2), Some(g3)) + } else { + (None, None, None) + }; + + Ok(ObjectIdAttr { + object_id, + birth_volume_id, + birth_object_id, + domain_id, + }) + } +} diff --git a/src/attribute/x80.rs b/src/attribute/x80.rs new file mode 100644 index 0000000..f194a0e --- /dev/null +++ b/src/attribute/x80.rs @@ -0,0 +1,31 @@ +use crate::err::{self, Result}; +use crate::{utils, ReadSeek}; +use serde::ser; +use snafu::ResultExt; + +/// $Data Attribute +#[derive(Clone, Debug)] +pub struct DataAttr(Vec<u8>); + +impl DataAttr { + pub fn from_stream<S: ReadSeek>(stream: &mut S, data_size: usize) -> Result<DataAttr> { + let mut data = vec![0_u8; data_size]; + + stream.read_exact(&mut data).context(err::IoError)?; + + Ok(DataAttr(data)) + } + + pub fn data(&self) -> &[u8] { + &self.0 + } +} + +impl ser::Serialize for DataAttr { + fn serialize<S>(&self, serializer: S) -> ::std::result::Result<S::Ok, S::Error> + where + S: ser::Serializer, + { + serializer.serialize_str(&utils::to_hex_string(&self.0).to_string()) + } +} diff --git a/src/attribute/x90.rs b/src/attribute/x90.rs new file mode 100644 index 0000000..80379cc --- /dev/null +++ b/src/attribute/x90.rs @@ -0,0 +1,31 @@ +use crate::err::Result; +use crate::ReadSeek; +use byteorder::{LittleEndian, ReadBytesExt}; + +use serde::Serialize; + +/// $IndexRoot Attribute +#[derive(Serialize, Clone, Debug)] +pub struct IndexRootAttr { + /// Unique Id assigned to file + pub attribute_type: u32, + /// Collation rule used to sort the index entries. + /// If type is $FILENAME, this must be COLLATION_FILENAME + pub collation_rule: u32, + /// The index entry size + pub index_entry_size: u32, + /// The index entry number of cluster blocks + pub index_entry_number_of_cluster_blocks: u32, +} + +impl IndexRootAttr { + /// Data size should be either 16 or 64 + pub fn from_stream<S: ReadSeek>(stream: &mut S) -> Result<IndexRootAttr> { + Ok(IndexRootAttr { + attribute_type: stream.read_u32::<LittleEndian>()?, + collation_rule: stream.read_u32::<LittleEndian>()?, + index_entry_size: stream.read_u32::<LittleEndian>()?, + index_entry_number_of_cluster_blocks: stream.read_u32::<LittleEndian>()?, + }) + } +} diff --git a/src/bin/mft_dump.rs b/src/bin/mft_dump.rs index a3fe3dd..fdb1f26 100644 --- a/src/bin/mft_dump.rs +++ b/src/bin/mft_dump.rs @@ -3,6 +3,11 @@ use env_logger; use log::info; use mft::mft::MftParser; +use mft::{MftEntry, ReadSeek}; + +use mft::csv::FlatMftEntryWithName; +use std::io; +use std::io::Write; use std::path::PathBuf; enum OutputFormat { @@ -10,9 +15,20 @@ enum OutputFormat { CSV, } +impl OutputFormat { + pub fn from_str(s: &str) -> Option<Self> { + match s { + "json" => Some(OutputFormat::JSON), + "csv" => Some(OutputFormat::CSV), + _ => None, + } + } +} + struct MftDump { filepath: PathBuf, indent: bool, + output_format: OutputFormat, } impl MftDump { @@ -20,9 +36,34 @@ impl MftDump { MftDump { filepath: PathBuf::from(matches.value_of("INPUT").expect("Required argument")), indent: !matches.is_present("no-indent"), + output_format: OutputFormat::from_str( + matches.value_of("output-format").unwrap_or_default(), + ) + .expect("Validated with clap default values"), } } + pub fn print_json_entry(&self, entry: &MftEntry) { + let json_str = if self.indent { + serde_json::to_string_pretty(&entry).expect("It should be valid UTF-8") + } else { + serde_json::to_string(&entry).expect("It should be valid UTF-8") + }; + + println!("{}", json_str); + } + + pub fn print_csv_entry<W: Write>( + &self, + entry: &MftEntry, + parser: &mut MftParser<impl ReadSeek>, + writer: &mut csv::Writer<W>, + ) { + let flat_entry = FlatMftEntryWithName::from_entry(&entry, parser); + + writer.serialize(flat_entry).expect("Writing to CSV failed"); + } + pub fn parse_file(&self) { info!("Opening file {:?}", &self.filepath); let mut mft_handler = match MftParser::from_path(&self.filepath) { @@ -36,21 +77,32 @@ impl MftDump { } }; - for (i, entry) in mft_handler.iter_entries().enumerate() { - match entry { - Ok(mft_entry) => { - let json_str = if self.indent { - serde_json::to_string_pretty(&mft_entry).unwrap() - } else { - serde_json::to_string(&mft_entry).unwrap() - }; + let mut csv_writer = match self.output_format { + OutputFormat::CSV => Some(csv::Writer::from_writer(io::stdout())), + _ => None, + }; - println!("{}", json_str); - } + let number_of_entries = mft_handler.get_entry_count(); + for i in 0..number_of_entries { + let entry = mft_handler.get_entry(i); + + let entry = match entry { + Ok(entry) => entry, Err(error) => { eprintln!("Failed to parse MFT entry {}, failed with: [{}]", i, error); continue; } + }; + + match self.output_format { + OutputFormat::JSON => self.print_json_entry(&entry), + OutputFormat::CSV => self.print_csv_entry( + &entry, + &mut mft_handler, + csv_writer + .as_mut() + .expect("CSV Writer is for OutputFormat::CSV"), + ), } } } @@ -68,7 +120,16 @@ fn main() { Arg::with_name("no-indent") .long("--no-indent") .takes_value(false) - .help("When set, output will not be indented."), + .help("When set, output will not be indented (works only with JSON output)."), + ) + .arg( + Arg::with_name("output-format") + .short("-o") + .long("--output-format") + .takes_value(true) + .possible_values(&["csv", "json"]) + .default_value("json") + .help("Output format."), ) .get_matches(); diff --git a/src/csv.rs b/src/csv.rs new file mode 100644 index 0000000..04d96f5 --- /dev/null +++ b/src/csv.rs @@ -0,0 +1,113 @@ +use crate::attribute::x30::FileNamespace; +use crate::attribute::{FileAttributeFlags, MftAttributeContent, MftAttributeType}; +use crate::entry::EntryFlags; +use crate::{MftAttribute, MftEntry, MftParser, ReadSeek}; + +use serde::Serialize; + +use chrono::{DateTime, Utc}; +use std::path::PathBuf; + +/// Used for CSV output +#[derive(Serialize)] +#[serde(rename_all = "PascalCase")] +pub struct FlatMftEntryWithName { + pub signature: String, + + pub entry_id: u64, + pub sequence: u16, + + pub base_entry_id: u64, + pub base_entry_sequence: u16, + + pub hard_link_count: u16, + pub flags: EntryFlags, + + /// The size of the file, in bytes. + pub used_entry_size: u32, + pub total_entry_size: u32, + + /// Indicates whether the record is a directory. + pub is_a_directory: bool, + + /// Indicates whether the record has alternate data streams. + pub has_alternate_data_streams: bool, + + /// All of these fields are present for entries that have an 0x10 attribute. + pub standard_info_flags: Option<FileAttributeFlags>, + pub standard_info_last_modified: Option<DateTime<Utc>>, + pub standard_info_last_access: Option<DateTime<Utc>>, + pub standard_info_created: Option<DateTime<Utc>>, + /// All of these fields are present for entries that have an 0x30 attribute. + pub file_name_flags: Option<FileAttributeFlags>, + pub file_name_last_modified: Option<DateTime<Utc>>, + pub file_name_last_access: Option<DateTime<Utc>>, + pub file_name_created: Option<DateTime<Utc>>, + + pub full_path: PathBuf, +} + +impl FlatMftEntryWithName { + pub fn from_entry( + entry: &MftEntry, + parser: &mut MftParser<impl ReadSeek>, + ) -> FlatMftEntryWithName { + let entry_attributes: Vec<MftAttribute> = entry + .iter_attributes_matching(Some(vec![ + MftAttributeType::FileName, + MftAttributeType::StandardInformation, + MftAttributeType::DATA, + ])) + .filter_map(Result::ok) + .collect(); + + let mut file_name = None; + let mut standard_info = None; + + for attr in entry_attributes.iter() { + if let MftAttributeContent::AttrX30(data) = &attr.data { + if [FileNamespace::Win32, FileNamespace::Win32AndDos].contains(&data.namespace) { + file_name = Some(data.clone()); + break; + } + } + } + for attr in entry_attributes.iter() { + if let MftAttributeContent::AttrX10(data) = &attr.data { + standard_info = Some(data.clone()); + break; + } + } + + let has_ads = entry_attributes + .iter() + .any(|a| a.header.type_code == MftAttributeType::DATA && a.header.name_size > 0); + + FlatMftEntryWithName { + entry_id: entry.header.record_number, + signature: String::from_utf8(entry.header.signature.to_ascii_uppercase()) + .expect("It should be either FILE or BAAD (valid utf-8)"), + sequence: entry.header.sequence, + hard_link_count: entry.header.hard_link_count, + flags: entry.header.flags, + used_entry_size: entry.header.used_entry_size, + total_entry_size: entry.header.total_entry_size, + base_entry_id: entry.header.base_reference.entry, + base_entry_sequence: entry.header.base_reference.sequence, + is_a_directory: entry.is_dir(), + has_alternate_data_streams: has_ads, + standard_info_flags: standard_info.as_ref().and_then(|i| Some(i.file_flags)), + standard_info_last_modified: standard_info.as_ref().and_then(|i| Some(i.modified)), + standard_info_last_access: standard_info.as_ref().and_then(|i| Some(i.accessed)), + standard_info_created: standard_info.as_ref().and_then(|i| Some(i.created)), + file_name_flags: file_name.as_ref().and_then(|i| Some(i.flags)), + file_name_last_modified: file_name.as_ref().and_then(|i| Some(i.modified)), + file_name_last_access: file_name.as_ref().and_then(|i| Some(i.accessed)), + file_name_created: file_name.as_ref().and_then(|i| Some(i.created)), + full_path: parser + .get_full_path_for_entry(entry) + .expect("I/O Err") + .unwrap_or_default(), + } + } +} diff --git a/src/entry.rs b/src/entry.rs index 98cab38..1ed3fec 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -12,12 +12,9 @@ use bitflags::bitflags; use serde::ser::{self, SerializeStruct, Serializer}; use serde::Serialize; -use crate::attribute::header::{AttributeHeader, ResidentialHeader}; -use crate::attribute::x10::StandardInfoAttr; -use crate::attribute::{Attribute, AttributeType, MftAttributeContent}; +use crate::attribute::header::{MftAttributeHeader, ResidentialHeader}; +use crate::attribute::{MftAttribute, MftAttributeContent, MftAttributeType}; -use crate::attribute::raw::RawAttribute; -use crate::attribute::x30::FileNameAttr; use std::io::Read; use std::io::SeekFrom; use std::io::{Cursor, Seek}; @@ -36,7 +33,7 @@ impl ser::Serialize for MftEntry { S: Serializer, { let mut state = serializer.serialize_struct("Color", 2)?; - let attributes: Vec<Attribute> = self.iter_attributes().filter_map(Result::ok).collect(); + let attributes: Vec<MftAttribute> = self.iter_attributes().filter_map(Result::ok).collect(); state.serialize_field("header", &self.header)?; state.serialize_field("attributes", &attributes)?; state.end() @@ -55,11 +52,13 @@ pub struct EntryHeader { /// The update sequence array must end before the last USHORT value in the first sector. pub usa_offset: u16, pub usa_size: u16, + /// Metadata transaction journal sequence number (Reserved1 in windows docs) + /// Contains a $LogFile Sequence Number (LSN) (metz) + pub metadata_transaction_journal: u64, /// The sequence number. /// This value is incremented each time that a file record segment is freed; it is 0 if the segment is not used. /// The SequenceNumber field of a file reference must match the contents of this field; /// if they do not match, the file reference is incorrect and probably obsolete. - pub logfile_sequence_number: u64, pub sequence: u16, pub hard_link_count: u16, /// The offset of the first attribute record, in bytes. @@ -71,9 +70,8 @@ pub struct EntryHeader { /// A file reference to the base file record segment for this file. /// If this is the base file record, the value is 0. See MFT_SEGMENT_REFERENCE. pub base_reference: MftReference, - pub next_attribute_id: u16, + pub first_attribute_id: u16, pub record_number: u64, - pub entry_reference: MftReference, } bitflags! { pub struct EntryFlags: u16 { @@ -87,18 +85,12 @@ bitflags! { impl_serialize_for_bitflags! {EntryFlags} impl EntryHeader { - pub fn from_reader<R: Read>(reader: &mut R) -> Result<EntryHeader> { + /// Reads an entry from a stream, will error if the entry is empty (zeroes) + /// Since the entry id is not present in the header, it should be provided by the caller. + pub fn from_reader<R: Read>(reader: &mut R, entry_id: u64) -> Result<EntryHeader> { let mut signature = [0; 4]; reader.read_exact(&mut signature)?; - // Corrupted entry - ensure!( - &signature != b"BAAD", - err::InvalidEntrySignature { - bad_sig: signature.to_vec() - } - ); - // Empty entry ensure!( &signature != b"\x00\x00\x00\x00", @@ -112,34 +104,30 @@ impl EntryHeader { let logfile_sequence_number = reader.read_u64::<LittleEndian>()?; let sequence = reader.read_u16::<LittleEndian>()?; let hard_link_count = reader.read_u16::<LittleEndian>()?; - let fst_attr_offset = reader.read_u16::<LittleEndian>()?; + let first_attribute_offset = reader.read_u16::<LittleEndian>()?; let flags = EntryFlags::from_bits_truncate(reader.read_u16::<LittleEndian>()?); let entry_size_real = reader.read_u32::<LittleEndian>()?; let entry_size_allocated = reader.read_u32::<LittleEndian>()?; + let base_reference = MftReference::from_reader(reader).context(err::FailedToReadMftReference)?; - let next_attribute_id = reader.read_u16::<LittleEndian>()?; - - let _padding = reader.read_u16::<LittleEndian>()?; - let record_number = u64::from(reader.read_u32::<LittleEndian>()?); - let entry_reference = MftReference::new(record_number as u64, sequence); + let first_attribute_id = reader.read_u16::<LittleEndian>()?; Ok(EntryHeader { signature, usa_offset, usa_size, - logfile_sequence_number, + metadata_transaction_journal: logfile_sequence_number, sequence, hard_link_count, - first_attribute_record_offset: fst_attr_offset, + first_attribute_record_offset: first_attribute_offset, flags, used_entry_size: entry_size_real, total_entry_size: entry_size_allocated, base_reference, - next_attribute_id, - record_number, - entry_reference, + first_attribute_id, + record_number: entry_id, }) } } @@ -148,10 +136,10 @@ impl MftEntry { /// Initializes an MFT Entry from a buffer. /// Since the parser is the entity responsible for knowing the entry size, /// we take ownership of the buffer instead of trying to read it from stream. - pub fn from_buffer(mut buffer: Vec<u8>) -> Result<MftEntry> { + pub fn from_buffer(mut buffer: Vec<u8>, entry_number: u64) -> Result<MftEntry> { let mut cursor = Cursor::new(&buffer); // Get Header - let entry_header = EntryHeader::from_reader(&mut cursor)?; + let entry_header = EntryHeader::from_reader(&mut cursor, entry_number)?; trace!("Number of sectors: {:#?}", entry_header); Self::apply_fixups(&entry_header, &mut buffer)?; @@ -215,81 +203,81 @@ impl MftEntry { self.header.flags.bits() & 0x02 != 0 } - /// Returns an iterator over the attributes of the entry. - pub fn iter_attributes(&self) -> impl Iterator<Item = Result<Attribute>> + '_ { + /// Returns an iterator over all the attributes of the entry. + pub fn iter_attributes(&self) -> impl Iterator<Item = Result<MftAttribute>> + '_ { + self.iter_attributes_matching(None) + } + + /// Returns an iterator over the attributes in the list given in `types`, skips other attributes. + pub fn iter_attributes_matching( + &self, + types: Option<Vec<MftAttributeType>>, + ) -> impl Iterator<Item = Result<MftAttribute>> + '_ { let mut cursor = Cursor::new(&self.data); let mut offset = u64::from(self.header.first_attribute_record_offset); let mut exhausted = false; std::iter::from_fn(move || { - if exhausted { - return None; - } + // We use a loop here to allow skipping filtered attributes. + loop { + if exhausted { + return None; + } - match cursor - .seek(SeekFrom::Start(offset)) - .context(err::IoError) - { - Ok(_) => {} - Err(e) => { - exhausted = true; - return Some(Err(e.into())); + match cursor.seek(SeekFrom::Start(offset)).context(err::IoError) { + Ok(_) => {} + Err(e) => { + exhausted = true; + return Some(Err(e.into())); + } + }; + + let header = MftAttributeHeader::from_stream(&mut cursor); + + // Unexpected I/O error, return err and stop iterating + let header = match header { + Ok(h) => h, + Err(e) => { + exhausted = true; + return Some(Err(e)); + } + }; + + let header = match header { + Some(attribute_header) => attribute_header, + // Header is 0xFFFF_FFFF, we are finished + None => return None, + }; + + // Increment offset before moving header. + offset += u64::from(header.record_length); + + // Skip attribute if filtered + if let Some(filter) = &types { + if !filter.contains(&header.type_code) { + continue; + } } - }; - - match AttributeHeader::from_stream(&mut cursor) { - Ok(maybe_header) => match maybe_header { - Some(header) => { - // Increment offset before moving header. - offset += u64::from(header.record_length); - - // Check if the header is resident, and if it is, read the attribute content. - match header.residential_header { - ResidentialHeader::Resident(ref resident) => match header.type_code { - AttributeType::StandardInformation => { - match StandardInfoAttr::from_reader(&mut cursor) { - Ok(content) => Some(Ok(Attribute { - header, - data: MftAttributeContent::AttrX10(content), - })), - Err(e) => Some(Err(e)), - } - } - AttributeType::FileName => { - match FileNameAttr::from_stream(&mut cursor) { - Ok(content) => Some(Ok(Attribute { - header, - data: MftAttributeContent::AttrX30(content), - })), - Err(e) => Some(Err(e.into())), - } - } - _ => { - let mut data = vec![0_u8; resident.data_size as usize]; - - match cursor.read_exact(&mut data).context(err::IoError) { - Ok(_) => {} - Err(err) => return Some(Err(err.into())), - }; - - Some(Ok(Attribute { - header, - data: MftAttributeContent::Raw(RawAttribute(data)), - })) - } - }, - ResidentialHeader::NonResident(_) => Some(Ok(Attribute { - header, - data: MftAttributeContent::None, - })), + + // Check if the header is resident, and if it is, read the attribute content. + let attribute_content = match header.residential_header { + ResidentialHeader::Resident(ref resident) => { + match MftAttributeContent::from_stream_resident( + &mut cursor, + &header, + resident, + ) { + Ok(content) => content, + Err(e) => return Some(Err(e)), } } - None => None, - }, - Err(e) => { - exhausted = true; - Some(Err(e)) - } + ResidentialHeader::NonResident(_) => MftAttributeContent::None, + }; + + return Some(Ok(MftAttribute { + header, + data: attribute_content, + })); } }) } @@ -309,12 +297,13 @@ mod tests { 0x00, 0x00, 0xD5, 0x95, 0x00, 0x00, 0x53, 0x57, 0x81, 0x37, 0x00, 0x00, 0x00, 0x00, ]; - let entry_header = EntryHeader::from_reader(&mut Cursor::new(header_buffer)).unwrap(); + let entry_header = + EntryHeader::from_reader(&mut Cursor::new(header_buffer), 38357).unwrap(); assert_eq!(&entry_header.signature, b"FILE"); assert_eq!(entry_header.usa_offset, 48); assert_eq!(entry_header.usa_size, 3); - assert_eq!(entry_header.logfile_sequence_number, 53_762_438_092); + assert_eq!(entry_header.metadata_transaction_journal, 53_762_438_092); assert_eq!(entry_header.sequence, 5); assert_eq!(entry_header.hard_link_count, 1); assert_eq!(entry_header.first_attribute_record_offset, 56); @@ -322,7 +311,7 @@ mod tests { assert_eq!(entry_header.used_entry_size, 840); assert_eq!(entry_header.total_entry_size, 1024); assert_eq!(entry_header.base_reference.entry, 0); - assert_eq!(entry_header.next_attribute_id, 6); + assert_eq!(entry_header.first_attribute_id, 6); assert_eq!(entry_header.record_number, 38357); } } @@ -23,6 +23,8 @@ pub enum Error { InvalidEntrySignature { bad_sig: Vec<u8> }, #[snafu(display("Unknown `AttributeType`: {:04X}", attribute_type))] UnknownAttributeType { attribute_type: u32 }, + #[snafu(display("Unknown filename namespace {}", namespace))] + UnknownNamespace { namespace: u8 }, #[snafu(display("Unhandled resident flag: {} (offset: {})", flag, offset))] UnhandledResidentFlag { flag: u8, offset: u64 }, #[snafu(display( @@ -40,6 +42,8 @@ pub enum Error { FailedToReadMftReference { source: winstructs::err::Error }, #[snafu(display("Failed to read WindowsTime: `{}`", source))] FailedToReadWindowsTime { source: winstructs::err::Error }, + #[snafu(display("Failed to read GUID: `{}`", source))] + FailedToReadGuid { source: winstructs::err::Error }, #[snafu(display("An unexpected error has occurred: {}", detail))] Any { detail: String }, } @@ -3,14 +3,15 @@ extern crate num_derive; pub use attribute::x10::StandardInfoAttr; pub use attribute::x30::FileNameAttr; -pub use attribute::Attribute; +pub use attribute::MftAttribute; +pub use crate::mft::MftParser; pub use entry::{EntryHeader, MftEntry}; -pub use mft::MftParser; use std::io::{self, Read, Seek, SeekFrom}; pub mod attribute; +pub mod csv; pub mod entry; pub mod err; pub mod mft; @@ -7,6 +7,7 @@ use snafu::ResultExt; use crate::attribute::MftAttributeContent::AttrX30; +use crate::attribute::x30::FileNamespace; use cached::stores::SizedCache; use cached::Cached; use std::fs::{self, File}; @@ -31,7 +32,7 @@ impl MftParser<BufReader<File>> { let mft_fh = File::open(f).context(err::FailedToOpenFile { path: f.to_owned() })?; let size = fs::metadata(f)?.len(); - Self::from_read_seek(BufReader::with_capacity(4096, mft_fh), size) + Self::from_read_seek(BufReader::with_capacity(4096, mft_fh), Some(size)) } } @@ -42,14 +43,20 @@ impl MftParser<Cursor<Vec<u8>>> { let size = buffer.len() as u64; let cursor = Cursor::new(buffer); - Self::from_read_seek(cursor, size) + Self::from_read_seek(cursor, Some(size)) } } impl<T: ReadSeek> MftParser<T> { - pub fn from_read_seek(mut data: T, size: u64) -> Result<Self> { + pub fn from_read_seek(mut data: T, size: Option<u64>) -> Result<Self> { // We use the first entry to guess the entry size for all the other records. - let first_entry = EntryHeader::from_reader(&mut data)?; + let first_entry = EntryHeader::from_reader(&mut data, 0)?; + + let size = match size { + Some(sz) => sz, + None => data.seek(SeekFrom::End(0))?, + }; + data.seek(SeekFrom::Start(0))?; Ok(Self { @@ -74,31 +81,29 @@ impl<T: ReadSeek> MftParser<T> { self.data.read_exact(&mut entry_buffer)?; - Ok(MftEntry::from_buffer(entry_buffer)?) + Ok(MftEntry::from_buffer(entry_buffer, entry_number)?) } /// Iterates over all the entries in the MFT. pub fn iter_entries(&mut self) -> impl Iterator<Item = Result<MftEntry>> + '_ { let total_entries = self.get_entry_count(); - let mut count = 0; - std::iter::from_fn(move || { - if count == total_entries { - None - } else { - count += 1; - Some(self.get_entry(count)) - } - }) + (0..total_entries).map(move |i| self.get_entry(i)) } /// Gets the full path for an entry. /// Caches computations. pub fn get_full_path_for_entry(&mut self, entry: &MftEntry) -> Result<Option<PathBuf>> { - let entry_id = entry.header.entry_reference.entry; + let entry_id = entry.header.record_number; for attribute in entry.iter_attributes().filter_map(|a| a.ok()) { if let AttrX30(filename_header) = attribute.data { + if ![FileNamespace::Win32, FileNamespace::Win32AndDos] + .contains(&filename_header.namespace) + { + continue; + } + let parent_entry_id = filename_header.parent.entry; // MFT entry 5 is the root path. @@ -125,13 +130,9 @@ impl<T: ReadSeek> MftParser<T> { let path = match self.get_entry(parent_entry_id).ok() { Some(parent) => match self.get_full_path_for_entry(&parent) { Ok(Some(path)) => path, - _ => { - return err::Any { - detail: "Unexpected missing parent.\ - This is a bug, please report it at report at https://github.com/omerbenamram/mft/issues", - } - .fail() - } + // I have a parent, which doesn't have a filename attribute. + // Default to root. + _ => PathBuf::new(), }, // Parent is maybe corrupted or incomplete, use a sentinel instead. None => PathBuf::from("[Unknown]"), @@ -146,7 +147,7 @@ impl<T: ReadSeek> MftParser<T> { let orphan = PathBuf::from("[Orphaned]").join(filename_header.name); self.entries_cache - .cache_set(entry.header.entry_reference.entry, orphan.clone()); + .cache_set(entry.header.record_number, orphan.clone()); return Ok(Some(orphan)); } } |