From 2efa10565ac395d2ce9a679ead46e70fb2f963eb Mon Sep 17 00:00:00 2001 From: Igor Pavlov Date: Sun, 30 Apr 2017 00:00:00 +0000 Subject: 17.00 --- CPP/7zip/Archive/Zip/ZipAddCommon.cpp | 197 ++- CPP/7zip/Archive/Zip/ZipAddCommon.h | 9 +- CPP/7zip/Archive/Zip/ZipCompressionMode.h | 20 +- CPP/7zip/Archive/Zip/ZipHandler.cpp | 555 +++++--- CPP/7zip/Archive/Zip/ZipHandler.h | 10 +- CPP/7zip/Archive/Zip/ZipHandlerOut.cpp | 281 ++-- CPP/7zip/Archive/Zip/ZipHeader.h | 48 +- CPP/7zip/Archive/Zip/ZipIn.cpp | 2112 ++++++++++++++++++++--------- CPP/7zip/Archive/Zip/ZipIn.h | 162 ++- CPP/7zip/Archive/Zip/ZipItem.cpp | 98 +- CPP/7zip/Archive/Zip/ZipItem.h | 27 +- CPP/7zip/Archive/Zip/ZipOut.cpp | 211 +-- CPP/7zip/Archive/Zip/ZipOut.h | 39 +- CPP/7zip/Archive/Zip/ZipRegister.cpp | 11 +- CPP/7zip/Archive/Zip/ZipUpdate.cpp | 303 +++-- CPP/7zip/Archive/Zip/ZipUpdate.h | 13 + 16 files changed, 2848 insertions(+), 1248 deletions(-) (limited to 'CPP/7zip/Archive/Zip') diff --git a/CPP/7zip/Archive/Zip/ZipAddCommon.cpp b/CPP/7zip/Archive/Zip/ZipAddCommon.cpp index 06fbe22f..dd83f87f 100644 --- a/CPP/7zip/Archive/Zip/ZipAddCommon.cpp +++ b/CPP/7zip/Archive/Zip/ZipAddCommon.cpp @@ -17,6 +17,7 @@ #include "../../Compress/LzmaEncoder.h" #include "../../Compress/PpmdZip.h" +#include "../../Compress/XzEncoder.h" #include "../Common/InStreamWithCRC.h" @@ -26,8 +27,8 @@ namespace NArchive { namespace NZip { -static const CMethodId kMethodId_ZipBase = 0x040100; -static const CMethodId kMethodId_BZip2 = 0x040202; +using namespace NFileHeader; + static const UInt32 kLzmaPropsSize = 5; static const UInt32 kLzmaHeaderSize = 4 + kLzmaPropsSize; @@ -37,10 +38,11 @@ class CLzmaEncoder: public ICompressSetCoderProperties, public CMyUnknownImp { +public: NCompress::NLzma::CEncoder *EncoderSpec; CMyComPtr Encoder; Byte Header[kLzmaHeaderSize]; -public: + STDMETHOD(Code)(ISequentialInStream *inStream, ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress); STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps); @@ -81,7 +83,8 @@ CAddCommon::CAddCommon(const CCompressionMethodMode &options): _options(options), _copyCoderSpec(NULL), _cryptoStreamSpec(NULL), - _buf(NULL) + _buf(NULL), + _isLzmaEos(false) {} CAddCommon::~CAddCommon() @@ -114,49 +117,100 @@ HRESULT CAddCommon::CalcStreamCRC(ISequentialInStream *inStream, UInt32 &resultC } } + +HRESULT CAddCommon::Set_Pre_CompressionResult(bool seqMode, UInt64 unpackSize, CCompressingResult &opRes) const +{ + // We use Zip64, if unPackSize size is larger than 0xF8000000 to support + // cases when compressed size can be about 3% larger than uncompressed size + + const UInt32 kUnpackZip64Limit = 0xF8000000; + + opRes.UnpackSize = unpackSize; + opRes.PackSize = (UInt64)1 << 60; // we use big value to force Zip64 mode. + + if (unpackSize < kUnpackZip64Limit) + opRes.PackSize = (UInt32)0xFFFFFFFF - 1; // it will not use Zip64 for that size + + if (opRes.PackSize < unpackSize) + opRes.PackSize = unpackSize; + + Byte method = _options.MethodSequence[0]; + + if (method == NCompressionMethod::kStore && !_options.PasswordIsDefined) + opRes.PackSize = unpackSize; + + opRes.CRC = 0; + + opRes.LzmaEos = false; + + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_Default; + opRes.FileTimeWasUsed = false; + + if (_options.PasswordIsDefined) + { + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_ZipCrypto; + if (_options.IsAesMode) + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_Aes; + else + { + if (seqMode) + opRes.FileTimeWasUsed = true; + } + } + + opRes.Method = method; + Byte ver = 0; + + switch (method) + { + case NCompressionMethod::kStore: break; + case NCompressionMethod::kDeflate: ver = NCompressionMethod::kExtractVersion_Deflate; break; + case NCompressionMethod::kDeflate64: ver = NCompressionMethod::kExtractVersion_Deflate64; break; + case NCompressionMethod::kXz : ver = NCompressionMethod::kExtractVersion_Xz; break; + case NCompressionMethod::kPPMd : ver = NCompressionMethod::kExtractVersion_PPMd; break; + case NCompressionMethod::kBZip2: ver = NCompressionMethod::kExtractVersion_BZip2; break; + case NCompressionMethod::kLZMA : + { + ver = NCompressionMethod::kExtractVersion_LZMA; + const COneMethodInfo *oneMethodMain = &_options._methods[0]; + opRes.LzmaEos = oneMethodMain->Get_Lzma_Eos(); + break; + } + } + if (opRes.ExtractVersion < ver) + opRes.ExtractVersion = ver; + + return S_OK; +} + + HRESULT CAddCommon::Compress( DECL_EXTERNAL_CODECS_LOC_VARS ISequentialInStream *inStream, IOutStream *outStream, - UInt32 /* fileTime */, + bool seqMode, UInt32 fileTime, ICompressProgressInfo *progress, CCompressingResult &opRes) { + opRes.LzmaEos = false; + if (!inStream) { // We can create empty stream here. But it was already implemented in caller code in 9.33+ return E_INVALIDARG; } - // CSequentialInStreamWithCRC *inSecCrcStreamSpec = NULL; - CInStreamWithCRC *inCrcStreamSpec = NULL; - CMyComPtr inCrcStream; - { - CMyComPtr inStream2; - + CSequentialInStreamWithCRC *inSecCrcStreamSpec = new CSequentialInStreamWithCRC; + CMyComPtr inCrcStream = inSecCrcStreamSpec; + + CMyComPtr inStream2; + if (!seqMode) inStream->QueryInterface(IID_IInStream, (void **)&inStream2); - if (inStream2) - { - inCrcStreamSpec = new CInStreamWithCRC; - inCrcStream = inCrcStreamSpec; - inCrcStreamSpec->SetStream(inStream2); - inCrcStreamSpec->Init(); - } - else - { - // we don't support stdin, since stream from stdin can require 64-bit size header - return E_NOTIMPL; - /* - inSecCrcStreamSpec = new CSequentialInStreamWithCRC; - inCrcStream = inSecCrcStreamSpec; - inSecCrcStreamSpec->SetStream(inStream); - inSecCrcStreamSpec->Init(); - */ - } - } + inSecCrcStreamSpec->SetStream(inStream); + inSecCrcStreamSpec->Init(); unsigned numTestMethods = _options.MethodSequence.Size(); - if (numTestMethods > 1 && !inCrcStreamSpec) + if (seqMode || (numTestMethods > 1 && !inStream2)) numTestMethods = 1; UInt32 crc = 0; @@ -164,20 +218,24 @@ HRESULT CAddCommon::Compress( Byte method = 0; CFilterCoder::C_OutStream_Releaser outStreamReleaser; - opRes.ExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_Default; + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_Default; opRes.FileTimeWasUsed = false; for (unsigned i = 0; i < numTestMethods; i++) { - opRes.ExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_Default; - if (inCrcStreamSpec) - RINOK(inCrcStreamSpec->Seek(0, STREAM_SEEK_SET, NULL)); + opRes.LzmaEos = false; + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_Default; + if (inStream2 && i != 0) + { + inSecCrcStreamSpec->Init(); + RINOK(inStream2->Seek(0, STREAM_SEEK_SET, NULL)); + } RINOK(outStream->SetSize(0)); RINOK(outStream->Seek(0, STREAM_SEEK_SET, NULL)); if (_options.PasswordIsDefined) { - opRes.ExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_ZipCrypto; + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_ZipCrypto; if (!_cryptoStream) { @@ -187,7 +245,7 @@ HRESULT CAddCommon::Compress( if (_options.IsAesMode) { - opRes.ExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_Aes; + opRes.ExtractVersion = NCompressionMethod::kExtractVersion_Aes; if (!_cryptoStreamSpec->Filter) { _cryptoStreamSpec->Filter = _filterAesSpec = new NCrypto::NWzAes::CEncoder; @@ -206,23 +264,22 @@ HRESULT CAddCommon::Compress( UInt32 check; - // if (inCrcStreamSpec) + if (inStream2) { if (!crc_IsCalculated) { RINOK(CalcStreamCRC(inStream, crc)); crc_IsCalculated = true; - RINOK(inCrcStreamSpec->Seek(0, STREAM_SEEK_SET, NULL)); + RINOK(inStream2->Seek(0, STREAM_SEEK_SET, NULL)); + inSecCrcStreamSpec->Init(); } check = (crc >> 16); } - /* else { opRes.FileTimeWasUsed = true; check = (fileTime & 0xFFFF); } - */ RINOK(_filterSpec->WriteHeader_Check16(outStream, (UInt16)check)); } @@ -236,7 +293,7 @@ HRESULT CAddCommon::Compress( switch (method) { - case NFileHeader::NCompressionMethod::kStored: + case NCompressionMethod::kStore: { if (_copyCoderSpec == NULL) { @@ -256,15 +313,22 @@ HRESULT CAddCommon::Compress( { if (!_compressEncoder) { - if (method == NFileHeader::NCompressionMethod::kLZMA) + CLzmaEncoder *_lzmaEncoder = NULL; + if (method == NCompressionMethod::kLZMA) { - _compressExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_LZMA; - CLzmaEncoder *_lzmaEncoder = new CLzmaEncoder(); + _compressExtractVersion = NCompressionMethod::kExtractVersion_LZMA; + _lzmaEncoder = new CLzmaEncoder(); _compressEncoder = _lzmaEncoder; } - else if (method == NFileHeader::NCompressionMethod::kPPMd) + else if (method == NCompressionMethod::kXz) + { + _compressExtractVersion = NCompressionMethod::kExtractVersion_Xz; + NCompress::NXz::CEncoder *encoder = new NCompress::NXz::CEncoder(); + _compressEncoder = encoder; + } + else if (method == NCompressionMethod::kPPMd) { - _compressExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_PPMd; + _compressExtractVersion = NCompressionMethod::kExtractVersion_PPMd; NCompress::NPpmdZip::CEncoder *encoder = new NCompress::NPpmdZip::CEncoder(); _compressEncoder = encoder; } @@ -273,14 +337,14 @@ HRESULT CAddCommon::Compress( CMethodId methodId; switch (method) { - case NFileHeader::NCompressionMethod::kBZip2: + case NCompressionMethod::kBZip2: methodId = kMethodId_BZip2; - _compressExtractVersion = NFileHeader::NCompressionMethod::kExtractVersion_BZip2; + _compressExtractVersion = NCompressionMethod::kExtractVersion_BZip2; break; default: - _compressExtractVersion = ((method == NFileHeader::NCompressionMethod::kDeflated64) ? - NFileHeader::NCompressionMethod::kExtractVersion_Deflate64 : - NFileHeader::NCompressionMethod::kExtractVersion_Deflate); + _compressExtractVersion = ((method == NCompressionMethod::kDeflate64) ? + NCompressionMethod::kExtractVersion_Deflate64 : + NCompressionMethod::kExtractVersion_Deflate); methodId = kMethodId_ZipBase + method; break; } @@ -290,11 +354,11 @@ HRESULT CAddCommon::Compress( if (!_compressEncoder) return E_NOTIMPL; - if (method == NFileHeader::NCompressionMethod::kDeflated || - method == NFileHeader::NCompressionMethod::kDeflated64) + if (method == NCompressionMethod::kDeflate || + method == NCompressionMethod::kDeflate64) { } - else if (method == NFileHeader::NCompressionMethod::kBZip2) + else if (method == NCompressionMethod::kBZip2) { } } @@ -303,11 +367,22 @@ HRESULT CAddCommon::Compress( _compressEncoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProps); if (setCoderProps) { - RINOK(_options.MethodInfo.SetCoderProps(setCoderProps, - _options._dataSizeReduceDefined ? &_options._dataSizeReduce : NULL)); + if (!_options._methods.IsEmpty()) + { + COneMethodInfo *oneMethodMain = &_options._methods[0]; + + RINOK(oneMethodMain->SetCoderProps(setCoderProps, + _options._dataSizeReduceDefined ? &_options._dataSizeReduce : NULL)); + } } } + if (method == NCompressionMethod::kLZMA) + _isLzmaEos = _lzmaEncoder->EncoderSpec->IsWriteEndMark(); } + + if (method == NCompressionMethod::kLZMA) + opRes.LzmaEos = _isLzmaEos; + CMyComPtr outStreamNew; if (_options.PasswordIsDefined) outStreamNew = _cryptoStream; @@ -332,18 +407,10 @@ HRESULT CAddCommon::Compress( RINOK(outStream->Seek(0, STREAM_SEEK_CUR, &opRes.PackSize)); - // if (inCrcStreamSpec) - { - opRes.CRC = inCrcStreamSpec->GetCRC(); - opRes.UnpackSize = inCrcStreamSpec->GetSize(); - } - /* - else { opRes.CRC = inSecCrcStreamSpec->GetCRC(); opRes.UnpackSize = inSecCrcStreamSpec->GetSize(); } - */ if (_options.PasswordIsDefined) { diff --git a/CPP/7zip/Archive/Zip/ZipAddCommon.h b/CPP/7zip/Archive/Zip/ZipAddCommon.h index 1e0c3bfa..ea5f8180 100644 --- a/CPP/7zip/Archive/Zip/ZipAddCommon.h +++ b/CPP/7zip/Archive/Zip/ZipAddCommon.h @@ -27,6 +27,7 @@ struct CCompressingResult UInt16 Method; Byte ExtractVersion; bool FileTimeWasUsed; + bool LzmaEos; }; class CAddCommon @@ -37,6 +38,7 @@ class CAddCommon CMyComPtr _compressEncoder; Byte _compressExtractVersion; + bool _isLzmaEos; CFilterCoder *_cryptoStreamSpec; CMyComPtr _cryptoStream; @@ -50,11 +52,14 @@ class CAddCommon public: CAddCommon(const CCompressionMethodMode &options); ~CAddCommon(); + + HRESULT Set_Pre_CompressionResult(bool seqMode, UInt64 unpackSize, CCompressingResult &opRes) const; + HRESULT Compress( DECL_EXTERNAL_CODECS_LOC_VARS ISequentialInStream *inStream, IOutStream *outStream, - UInt32 fileTime, - ICompressProgressInfo *progress, CCompressingResult &operationResult); + bool seqMode, UInt32 fileTime, + ICompressProgressInfo *progress, CCompressingResult &opRes); }; }} diff --git a/CPP/7zip/Archive/Zip/ZipCompressionMode.h b/CPP/7zip/Archive/Zip/ZipCompressionMode.h index 86548d95..1125f6ed 100644 --- a/CPP/7zip/Archive/Zip/ZipCompressionMode.h +++ b/CPP/7zip/Archive/Zip/ZipCompressionMode.h @@ -14,26 +14,18 @@ namespace NArchive { namespace NZip { -struct CBaseProps -{ - CMethodProps MethodInfo; - Int32 Level; +const CMethodId kMethodId_ZipBase = 0x040100; +const CMethodId kMethodId_BZip2 = 0x040202; - #ifndef _7ZIP_ST - UInt32 NumThreads; - bool NumThreadsWasChanged; - #endif +struct CBaseProps: public CMultiMethodProps +{ bool IsAesMode; Byte AesKeyMode; void Init() { - MethodInfo.Clear(); - Level = -1; - #ifndef _7ZIP_ST - NumThreads = NWindows::NSystem::GetNumberOfProcessors();; - NumThreadsWasChanged = false; - #endif + CMultiMethodProps::Init(); + IsAesMode = false; AesKeyMode = 3; } diff --git a/CPP/7zip/Archive/Zip/ZipHandler.cpp b/CPP/7zip/Archive/Zip/ZipHandler.cpp index 75034de0..75fad760 100644 --- a/CPP/7zip/Archive/Zip/ZipHandler.cpp +++ b/CPP/7zip/Archive/Zip/ZipHandler.cpp @@ -3,10 +3,10 @@ #include "StdAfx.h" #include "../../../Common/ComTry.h" -#include "../../../Common/IntToString.h" #include "../../../Common/StringConvert.h" #include "../../../Windows/PropVariant.h" +#include "../../../Windows/PropVariantUtils.h" #include "../../../Windows/TimeUtils.h" #include "../../IPassword.h" @@ -22,6 +22,7 @@ #include "../../Compress/ImplodeDecoder.h" #include "../../Compress/PpmdZip.h" #include "../../Compress/ShrinkDecoder.h" +#include "../../Compress/XzDecoder.h" #include "../../Crypto/WzAes.h" #include "../../Crypto/ZipCrypto.h" @@ -30,7 +31,6 @@ #include "../Common/ItemNameUtils.h" #include "../Common/OutStreamWithCRC.h" -#include "../XzHandler.h" #include "ZipHandler.h" @@ -39,9 +39,6 @@ using namespace NWindows; namespace NArchive { namespace NZip { -static const CMethodId kMethodId_ZipBase = 0x040100; -static const CMethodId kMethodId_BZip2 = 0x040202; - static const char * const kHostOS[] = { "FAT" @@ -66,24 +63,57 @@ static const char * const kHostOS[] = , "OS/X" }; -static const char * const kMethods[] = + +const char * const kMethodNames1[kNumMethodNames1] = { "Store" , "Shrink" - , "Reduced1" - , "Reduced2" - , "Reduced3" - , "Reduced4" + , "Reduce1" + , "Reduce2" + , "Reduce3" + , "Reduce4" , "Implode" - , "Tokenizing" + , NULL // "Tokenize" , "Deflate" , "Deflate64" , "PKImploding" + , NULL + , "BZip2" + , NULL + , "LZMA" +}; + + +const char * const kMethodNames2[kNumMethodNames2] = +{ + "xz" + , "Jpeg" + , "WavPack" + , "PPMd" + , "WzAES" }; -static const char *kMethod_AES = "AES"; -static const char *kMethod_ZipCrypto = "ZipCrypto"; -static const char *kMethod_StrongCrypto = "StrongCrypto"; +#define kMethod_AES "AES" +#define kMethod_ZipCrypto "ZipCrypto" +#define kMethod_StrongCrypto "StrongCrypto" + +static const char * const kDeflateLevels[4] = +{ + "Normal" + , "Maximum" + , "Fast" + , "Fastest" +}; + + +static const CUInt32PCharPair g_HeaderCharacts[] = +{ + { 0, "Encrypt" }, + { 3, "Descriptor" }, + // { 5, "Patched" }, + { 6, kMethod_StrongCrypto }, + { 11, "UTF8" } +}; struct CIdToNamePair { @@ -91,15 +121,6 @@ struct CIdToNamePair const char *Name; }; -static const CIdToNamePair k_MethodIdNamePairs[] = -{ - { NFileHeader::NCompressionMethod::kBZip2, "BZip2" }, - { NFileHeader::NCompressionMethod::kLZMA, "LZMA" }, - { NFileHeader::NCompressionMethod::kXz, "xz" }, - { NFileHeader::NCompressionMethod::kJpeg, "Jpeg" }, - { NFileHeader::NCompressionMethod::kWavPack, "WavPack" }, - { NFileHeader::NCompressionMethod::kPPMd, "PPMd" } -}; static const CIdToNamePair k_StrongCryptoPairs[] = { @@ -116,7 +137,7 @@ static const CIdToNamePair k_StrongCryptoPairs[] = { NStrongCrypto_AlgId::kRC4, "RC4" } }; -const char *FindNameForId(const CIdToNamePair *pairs, unsigned num, unsigned id) +static const char *FindNameForId(const CIdToNamePair *pairs, unsigned num, unsigned id) { for (unsigned i = 0; i < num; i++) { @@ -127,6 +148,7 @@ const char *FindNameForId(const CIdToNamePair *pairs, unsigned num, unsigned id) return NULL; } + static const Byte kProps[] = { kpidPath, @@ -142,9 +164,11 @@ static const Byte kProps[] = kpidComment, kpidCRC, kpidMethod, + kpidCharacts, kpidHostOS, kpidUnpackVer, - kpidVolumeIndex + kpidVolumeIndex, + kpidOffset }; static const Byte kArcProps[] = @@ -152,6 +176,7 @@ static const Byte kArcProps[] = kpidEmbeddedStubSize, kpidBit64, kpidComment, + kpidCharacts, kpidTotalPhySize, kpidIsVolume, kpidVolumeIndex, @@ -193,11 +218,34 @@ STDMETHODIMP CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value) break; } - case kpidTotalPhySize: if (m_Archive.IsMultiVol) prop = m_Archive.Vols.GetTotalSize(); break; + case kpidTotalPhySize: if (m_Archive.IsMultiVol) prop = m_Archive.Vols.TotalBytesSize; break; case kpidVolumeIndex: if (m_Archive.IsMultiVol) prop = (UInt32)m_Archive.Vols.StartVolIndex; break; case kpidIsVolume: if (m_Archive.IsMultiVol) prop = true; break; case kpidNumVolumes: if (m_Archive.IsMultiVol) prop = (UInt32)m_Archive.Vols.Streams.Size(); break; + case kpidCharacts: + { + AString s; + + if (m_Archive.LocalsWereRead) + { + s.Add_OptSpaced("Local"); + + if (m_Archive.LocalsCenterMerged) + s.Add_OptSpaced("Central"); + } + + if (m_Archive.IsZip64) + s.Add_OptSpaced("Zip64"); + + if (m_Archive.ExtraMinorError) + s.Add_OptSpaced("Minor_Extra_ERROR"); + + if (!s.IsEmpty()) + prop = s; + break; + } + case kpidWarningFlags: { UInt32 v = 0; @@ -208,12 +256,23 @@ STDMETHODIMP CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value) break; } + case kpidWarning: + { + AString s; + if (m_Archive.Overflow32bit) + s.Add_OptSpaced("32-bit overflow in headers"); + if (m_Archive.Cd_NumEntries_Overflow_16bit) + s.Add_OptSpaced("16-bit overflow for number of files in headers"); + if (!s.IsEmpty()) + prop = s; + break; + } + case kpidError: { if (!m_Archive.Vols.MissingName.IsEmpty()) { - UString s; - s.SetFromAscii("Missing volume : "); + UString s("Missing volume : "); s += m_Archive.Vols.MissingName; prop = s; } @@ -273,13 +332,19 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val { UString res; item.GetUnicodeString(res, item.Name, false, _forceCodePage, _specifiedCodePage); - NItemName::ConvertToOSName2(res); + NItemName::ReplaceToOsSlashes_Remove_TailSlash(res); prop = res; break; } case kpidIsDir: prop = item.IsDir(); break; - case kpidSize: prop = item.Size; break; + case kpidSize: + { + if (item.FromCentral || !item.FromLocal || !item.HasDescriptor() || item.DescriptorWasRead) + prop = item.Size; + break; + } + case kpidPackSize: prop = item.PackSize; break; case kpidTimeType: @@ -299,17 +364,36 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val case kpidCTime: { - FILETIME ft; - if (extra.GetNtfsTime(NFileHeader::NNtfsExtra::kCTime, ft)) - prop = ft; + FILETIME utc; + bool defined = true; + if (!extra.GetNtfsTime(NFileHeader::NNtfsExtra::kCTime, utc)) + { + UInt32 unixTime = 0; + if (extra.GetUnixTime(true, NFileHeader::NUnixTime::kCTime, unixTime)) + NTime::UnixTimeToFileTime(unixTime, utc); + else + defined = false; + } + if (defined) + prop = utc; break; } case kpidATime: { - FILETIME ft; - if (extra.GetNtfsTime(NFileHeader::NNtfsExtra::kATime, ft)) - prop = ft; + FILETIME utc; + bool defined = true; + if (!extra.GetNtfsTime(NFileHeader::NNtfsExtra::kATime, utc)) + { + UInt32 unixTime = 0; + if (extra.GetUnixTime(true, NFileHeader::NUnixTime::kATime, unixTime)) + NTime::UnixTimeToFileTime(unixTime, utc); + else + defined = false; + } + if (defined) + prop = utc; + break; } @@ -375,10 +459,8 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val CWzAesExtra aesField; if (extra.GetWzAes(aesField)) { - char s[16]; - s[0] = '-'; - ConvertUInt32ToString(((unsigned)aesField.Strength + 1) * 64 , s + 1); - m += s; + m += '-'; + m.Add_UInt32(((unsigned)aesField.Strength + 1) * 64); id = aesField.Method; } } @@ -394,10 +476,8 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val else { m += kMethod_StrongCrypto; - char temp[16]; - temp[0] = ':'; - ConvertUInt32ToString(f.AlgId, temp + 1); - m += temp; + m += ':'; + m.Add_UInt32(f.AlgId); } if (f.CertificateIsUsed()) m += "-Cert"; @@ -411,41 +491,96 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val } { - char temp[16]; const char *s = NULL; - if (id < ARRAY_SIZE(kMethods)) - s = kMethods[id]; + if (id < kNumMethodNames1) + s = kMethodNames1[id]; else { - s = FindNameForId(k_MethodIdNamePairs, ARRAY_SIZE(k_MethodIdNamePairs), id); - if (!s) + int id2 = (int)id - (int)kMethodNames2Start; + if (id2 >= 0 && id2 < kNumMethodNames2) + s = kMethodNames2[id2]; + } + if (s) + m += s; + else + m.Add_UInt32(id); + } + { + unsigned level = item.GetDeflateLevel(); + if (level != 0) + { + if (id == NFileHeader::NCompressionMethod::kLZMA) + { + if (level & 1) + m += ":eos"; + level &= ~1; + } + else if (id == NFileHeader::NCompressionMethod::kDeflate) { - ConvertUInt32ToString(id, temp); - s = temp; + m += ':'; + m += kDeflateLevels[level]; + level = 0; + } + + if (level != 0) + { + m += ":v"; + m.Add_UInt32(level); } } - m += s; - if (id == NFileHeader::NCompressionMethod::kLZMA && item.IsLzmaEOS()) - m += ":EOS"; } prop = m; break; } - case kpidHostOS: + case kpidCharacts: { - Byte hostOS = item.GetHostOS(); - char temp[16]; - const char *s = NULL; - if (hostOS < ARRAY_SIZE(kHostOS)) - s = kHostOS[hostOS]; - else + AString s; + + if (item.FromLocal) + { + s.Add_OptSpaced("Local"); + + item.LocalExtra.PrintInfo(s); + + if (item.FromCentral) + { + s.Add_OptSpaced(":"); + s.Add_OptSpaced("Central"); + } + } + + if (item.FromCentral) { - ConvertUInt32ToString(hostOS, temp); - s = temp; + item.CentralExtra.PrintInfo(s); } - prop = s; + + UInt32 flags = item.Flags; + flags &= ~(6); // we don't need compression related bits here. + + if (flags != 0) + { + AString s2 = FlagsToString(g_HeaderCharacts, ARRAY_SIZE(g_HeaderCharacts), flags); + if (!s2.IsEmpty()) + { + s.Add_OptSpaced(":"); + s.Add_OptSpaced(s2); + } + } + + if (!item.FromCentral && item.FromLocal && item.HasDescriptor() && !item.DescriptorWasRead) + s.Add_OptSpaced("Descriptor_ERROR"); + + if (!s.IsEmpty()) + prop = s; + break; + } + + case kpidHostOS: + { + const Byte hostOS = item.GetHostOS(); + TYPE_TO_PROP(kHostOS, hostOS, prop); break; } @@ -456,6 +591,10 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val case kpidVolumeIndex: prop = item.Disk; break; + + case kpidOffset: + prop = item.LocalHeaderPos; + break; } prop.Detach(value); @@ -475,7 +614,7 @@ STDMETHODIMP CHandler::Open(IInStream *inStream, if (res != S_OK) { m_Items.Clear(); - m_Archive.ClearRefs(); + m_Archive.ClearRefs(); // we don't want to clear error flags } return res; } @@ -493,16 +632,24 @@ STDMETHODIMP CHandler::Close() class CLzmaDecoder: public ICompressCoder, + public ICompressSetFinishMode, + public ICompressGetInStreamProcessedSize, public CMyUnknownImp { +public: NCompress::NLzma::CDecoder *DecoderSpec; CMyComPtr Decoder; -public: - CLzmaDecoder(); + + MY_UNKNOWN_IMP2( + ICompressSetFinishMode, + ICompressGetInStreamProcessedSize) + STDMETHOD(Code)(ISequentialInStream *inStream, ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress); + STDMETHOD(SetFinishMode)(UInt32 finishMode); + STDMETHOD(GetInStreamProcessedSize)(UInt64 *value); - MY_UNKNOWN_IMP + CLzmaDecoder(); }; CLzmaDecoder::CLzmaDecoder() @@ -511,44 +658,45 @@ CLzmaDecoder::CLzmaDecoder() Decoder = DecoderSpec; } +static const unsigned kZipLzmaPropsSize = 4 + LZMA_PROPS_SIZE; + HRESULT CLzmaDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream, - const UInt64 * /* inSize */, const UInt64 *outSize, ICompressProgressInfo *progress) + const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress) { - Byte buf[9]; - RINOK(ReadStream_FALSE(inStream, buf, 9)); - if (buf[2] != 5 || buf[3] != 0) + Byte buf[kZipLzmaPropsSize]; + RINOK(ReadStream_FALSE(inStream, buf, kZipLzmaPropsSize)); + if (buf[2] != LZMA_PROPS_SIZE || buf[3] != 0) return E_NOTIMPL; - RINOK(DecoderSpec->SetDecoderProperties2(buf + 4, 5)); - return Decoder->Code(inStream, outStream, NULL, outSize, progress); + RINOK(DecoderSpec->SetDecoderProperties2(buf + 4, LZMA_PROPS_SIZE)); + UInt64 inSize2 = 0; + if (inSize) + { + inSize2 = *inSize; + if (inSize2 < kZipLzmaPropsSize) + return S_FALSE; + inSize2 -= kZipLzmaPropsSize; + } + return Decoder->Code(inStream, outStream, inSize ? &inSize2 : NULL, outSize, progress); } - -class CXzDecoder: - public ICompressCoder, - public CMyUnknownImp +STDMETHODIMP CLzmaDecoder::SetFinishMode(UInt32 finishMode) { - NArchive::NXz::CDecoder _decoder; -public: - - STDMETHOD(Code)(ISequentialInStream *inStream, ISequentialOutStream *outStream, - const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress); - - MY_UNKNOWN_IMP -}; + DecoderSpec->FinishStream = (finishMode != 0); + return S_OK; +} -HRESULT CXzDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream, - const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress) +STDMETHODIMP CLzmaDecoder::GetInStreamProcessedSize(UInt64 *value) { - RINOK(_decoder.Decode(inStream, outStream, progress)); - Int32 opRes = _decoder.Get_Extract_OperationResult(); - if (opRes == NExtract::NOperationResult::kUnsupportedMethod) - return E_NOTIMPL; - if (opRes != NExtract::NOperationResult::kOK) - return S_FALSE; + *value = DecoderSpec->GetInputProcessedSize() + kZipLzmaPropsSize; return S_OK; } + + + + + struct CMethodItem { unsigned ZipMethod; @@ -572,12 +720,15 @@ class CZipDecoder CMyComPtr getTextPassword; CObjectVector methodItems; + CLzmaDecoder *lzmaDecoderSpec; public: CZipDecoder(): _zipCryptoDecoderSpec(0), _pkAesDecoderSpec(0), _wzAesDecoderSpec(0), - filterStreamSpec(0) {} + filterStreamSpec(0), + lzmaDecoderSpec(0) + {} HRESULT Decode( DECL_EXTERNAL_CODECS_LOC_VARS @@ -592,19 +743,18 @@ public: }; -static HRESULT SkipStreamData(ISequentialInStream *stream, UInt64 size) +static HRESULT SkipStreamData(ISequentialInStream *stream, bool &thereAreData) { + thereAreData = false; const size_t kBufSize = 1 << 12; Byte buf[kBufSize]; for (;;) { + size_t size = kBufSize; + RINOK(ReadStream(stream, buf, &size)); if (size == 0) return S_OK; - size_t curSize = kBufSize; - if (curSize > size) - curSize = (size_t)size; - RINOK(ReadStream_FALSE(stream, buf, curSize)); - size -= curSize; + thereAreData = true; } } @@ -620,12 +770,15 @@ HRESULT CZipDecoder::Decode( #endif Int32 &res) { - res = NExtract::NOperationResult::kDataError; + res = NExtract::NOperationResult::kHeadersError; + CFilterCoder::C_InStream_Releaser inStreamReleaser; + CFilterCoder::C_Filter_Releaser filterReleaser; bool needCRC = true; bool wzAesMode = false; bool pkAesMode = false; + unsigned id = item.Method; if (item.IsEncrypted()) @@ -633,27 +786,23 @@ HRESULT CZipDecoder::Decode( if (item.IsStrongEncrypted()) { CStrongCryptoExtra f; - if (item.CentralExtra.GetStrongCrypto(f)) - { - pkAesMode = true; - } - if (!pkAesMode) + if (!item.CentralExtra.GetStrongCrypto(f)) { res = NExtract::NOperationResult::kUnsupportedMethod; return S_OK; } + pkAesMode = true; } - if (!pkAesMode && id == NFileHeader::NCompressionMethod::kWzAES) + else if (id == NFileHeader::NCompressionMethod::kWzAES) { CWzAesExtra aesField; - if (item.GetMainExtra().GetWzAes(aesField)) - { - wzAesMode = true; - needCRC = aesField.NeedCrc(); - } + if (!item.GetMainExtra().GetWzAes(aesField)) + return S_OK; + wzAesMode = true; + needCRC = aesField.NeedCrc(); } } - + COutStreamWithCRC *outStreamSpec = new COutStreamWithCRC; CMyComPtr outStream = outStreamSpec; outStreamSpec->SetStream(realOutStream); @@ -681,6 +830,9 @@ HRESULT CZipDecoder::Decode( limitedStreamSpec->SetStream(packStream); limitedStreamSpec->Init(packSize); } + + + res = NExtract::NOperationResult::kDataError; CMyComPtr cryptoFilter; @@ -725,6 +877,8 @@ HRESULT CZipDecoder::Decode( CMyComPtr cryptoSetPassword; RINOK(cryptoFilter.QueryInterface(IID_ICryptoSetPassword, &cryptoSetPassword)); + if (!cryptoSetPassword) + return E_FAIL; if (!getTextPassword) extractCallback->QueryInterface(IID_ICryptoGetTextPassword, (void **)&getTextPassword); @@ -736,39 +890,35 @@ HRESULT CZipDecoder::Decode( AString charPassword; if (password) { + UnicodeStringToMultiByte2(charPassword, (const wchar_t *)password, CP_ACP); + /* if (wzAesMode || pkAesMode) { - charPassword = UnicodeStringToMultiByte((const wchar_t *)password, CP_ACP); - /* - for (unsigned i = 0;; i++) - { - wchar_t c = password[i]; - if (c == 0) - break; - if (c >= 0x80) - { - res = NExtract::NOperationResult::kDataError; - return S_OK; - } - charPassword += (char)c; - } - */ } else { - /* pkzip25 / WinZip / Windows probably use ANSI for some files - We use OEM for compatibility with previous versions of 7-Zip? */ - charPassword = UnicodeStringToMultiByte((const wchar_t *)password, CP_OEMCP); + // PASSWORD encoding for ZipCrypto: + // pkzip25 / WinZip / Windows probably use ANSI + // 7-Zip < 4.43 creates ZIP archives with OEM encoding in password + // 7-Zip >= 4.43 creates ZIP archives only with ASCII characters in password + // 7-Zip < 17.00 uses CP_OEMCP for password decoding + // 7-Zip >= 17.00 uses CP_ACP for password decoding } + */ } HRESULT result = cryptoSetPassword->CryptoSetPassword( (const Byte *)(const char *)charPassword, charPassword.Len()); if (result != S_OK) + { + res = NExtract::NOperationResult::kWrongPassword; return S_OK; + } } else { - RINOK(cryptoSetPassword->CryptoSetPassword(0, 0)); + res = NExtract::NOperationResult::kWrongPassword; + return S_OK; + // RINOK(cryptoSetPassword->CryptoSetPassword(NULL, 0)); } } @@ -781,16 +931,19 @@ HRESULT CZipDecoder::Decode( { CMethodItem mi; mi.ZipMethod = id; - if (id == NFileHeader::NCompressionMethod::kStored) + if (id == NFileHeader::NCompressionMethod::kStore) mi.Coder = new NCompress::CCopyCoder; - else if (id == NFileHeader::NCompressionMethod::kShrunk) + else if (id == NFileHeader::NCompressionMethod::kShrink) mi.Coder = new NCompress::NShrink::CDecoder; - else if (id == NFileHeader::NCompressionMethod::kImploded) + else if (id == NFileHeader::NCompressionMethod::kImplode) mi.Coder = new NCompress::NImplode::NDecoder::CCoder; else if (id == NFileHeader::NCompressionMethod::kLZMA) - mi.Coder = new CLzmaDecoder; + { + lzmaDecoderSpec = new CLzmaDecoder; + mi.Coder = lzmaDecoderSpec; + } else if (id == NFileHeader::NCompressionMethod::kXz) - mi.Coder = new CXzDecoder; + mi.Coder = new NCompress::NXz::CComDecoder; else if (id == NFileHeader::NCompressionMethod::kPPMd) mi.Coder = new NCompress::NPpmdZip::CDecoder(true); else @@ -810,7 +963,7 @@ HRESULT CZipDecoder::Decode( RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS szMethodID, false, mi.Coder)); - if (mi.Coder == 0) + if (!mi.Coder) { res = NExtract::NOperationResult::kUnsupportedMethod; return S_OK; @@ -842,9 +995,17 @@ HRESULT CZipDecoder::Decode( } #endif + CMyComPtr inStreamNew; + + bool isFullStreamExpected = (!item.HasDescriptor() || item.PackSize != 0); + bool needReminderCheck = false; + + bool dataAfterEnd = false; + bool truncatedError = false; + bool lzmaEosError = false; + { HRESULT result = S_OK; - CMyComPtr inStreamNew; if (item.IsEncrypted()) { if (!filterStream) @@ -853,6 +1014,7 @@ HRESULT CZipDecoder::Decode( filterStream = filterStreamSpec; } + filterReleaser.FilterCoder = filterStreamSpec; filterStreamSpec->Filter = cryptoFilter; if (wzAesMode) @@ -869,6 +1031,7 @@ HRESULT CZipDecoder::Decode( } else if (pkAesMode) { + isFullStreamExpected = false; result =_pkAesDecoderSpec->ReadHeader(inStream, item.Crc, item.Size); if (result == S_OK) { @@ -926,7 +1089,70 @@ HRESULT CZipDecoder::Decode( inStreamNew = inStream; if (result == S_OK) - result = coder->Code(inStreamNew, outStream, NULL, &item.Size, compressProgress); + { + CMyComPtr setFinishMode; + coder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode); + if (setFinishMode) + { + RINOK(setFinishMode->SetFinishMode(BoolToInt(true))); + } + + const UInt64 coderPackSize = limitedStreamSpec->GetRem(); + + bool useUnpackLimit = (id == 0 + || !item.HasDescriptor() + || item.Size >= ((UInt64)1 << 32) + || item.LocalExtra.IsZip64 + || item.CentralExtra.IsZip64 + ); + + result = coder->Code(inStreamNew, outStream, + isFullStreamExpected ? &coderPackSize : NULL, + // NULL, + useUnpackLimit ? &item.Size : NULL, + compressProgress); + + if (result == S_OK) + { + CMyComPtr getInStreamProcessedSize; + coder->QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize); + if (getInStreamProcessedSize && setFinishMode) + { + UInt64 processed; + RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed)); + if (processed != (UInt64)(Int64)-1) + { + if (pkAesMode) + { + const UInt32 padSize = _pkAesDecoderSpec->GetPadSize((UInt32)processed); + if (processed + padSize > coderPackSize) + truncatedError = true; + else + { + if (processed + padSize < coderPackSize) + dataAfterEnd = true; + // also here we can check PKCS7 padding data from reminder (it can be inside stream buffer in coder). + } + } + else + { + if (processed < coderPackSize) + { + if (isFullStreamExpected) + dataAfterEnd = true; + } + else if (processed > coderPackSize) + truncatedError = true; + needReminderCheck = isFullStreamExpected; + } + } + } + } + + if (result == S_OK && id == NFileHeader::NCompressionMethod::kLZMA) + if (!lzmaDecoderSpec->DecoderSpec->CheckFinishStatus(item.IsLzmaEOS())) + lzmaEosError = true; + } if (result == S_FALSE) return S_OK; @@ -947,19 +1173,40 @@ HRESULT CZipDecoder::Decode( if (wzAesMode) { - const UInt64 rem = limitedStreamSpec->GetRem(); - if (rem != 0) - if (SkipStreamData(inStream, rem) != S_OK) - authOk = false; + bool thereAreData = false; + if (SkipStreamData(inStreamNew, thereAreData) != S_OK) + authOk = false; + if (needReminderCheck && thereAreData) + dataAfterEnd = true; + limitedStreamSpec->Init(NCrypto::NWzAes::kMacSize); if (_wzAesDecoderSpec->CheckMac(inStream, authOk) != S_OK) authOk = false; } - - res = ((crcOK && authOk) ? - NExtract::NOperationResult::kOK : - NExtract::NOperationResult::kCRCError); + + res = NExtract::NOperationResult::kCRCError; + + if (crcOK && authOk) + { + res = NExtract::NOperationResult::kOK; + + if (dataAfterEnd) + res = NExtract::NOperationResult::kDataAfterEnd; + else if (truncatedError) + res = NExtract::NOperationResult::kUnexpectedEnd; + else if (lzmaEosError) + res = NExtract::NOperationResult::kHeadersError; + + // CheckDescriptor() supports only data descriptor with signature and + // it doesn't support "old" pkzip's data descriptor without signature. + // So we disable that check. + /* + if (item.HasDescriptor() && archive.CheckDescriptor(item) != S_OK) + res = NExtract::NOperationResult::kHeadersError; + */ + } + return S_OK; } @@ -1026,11 +1273,13 @@ STDMETHODIMP CHandler::Extract(const UInt32 *indices, UInt32 numItems, RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kUnavailable)); continue; } + + bool headersError = false; if (!item.FromLocal) { bool isAvail = true; - HRESULT res = m_Archive.ReadLocalItemAfterCdItem(item, isAvail); + HRESULT res = m_Archive.ReadLocalItemAfterCdItem(item, isAvail, headersError); if (res == S_FALSE) { if (item.IsDir() || realOutStream || testMode) @@ -1069,12 +1318,16 @@ STDMETHODIMP CHandler::Extract(const UInt32 *indices, UInt32 numItems, m_Archive, item, realOutStream, extractCallback, progress, #ifndef _7ZIP_ST - _props.NumThreads, + _props._numThreads, #endif res); + RINOK(hres); realOutStream.Release(); + if (res == NExtract::NOperationResult::kOK && headersError) + res = NExtract::NOperationResult::kHeadersError; + RINOK(extractCallback->SetOperationResult(res)) } diff --git a/CPP/7zip/Archive/Zip/ZipHandler.h b/CPP/7zip/Archive/Zip/ZipHandler.h index c2a362a7..53e6a460 100644 --- a/CPP/7zip/Archive/Zip/ZipHandler.h +++ b/CPP/7zip/Archive/Zip/ZipHandler.h @@ -9,12 +9,20 @@ #include "../../Common/CreateCoder.h" -#include "ZipIn.h" #include "ZipCompressionMode.h" +#include "ZipIn.h" namespace NArchive { namespace NZip { +const unsigned kNumMethodNames1 = NFileHeader::NCompressionMethod::kLZMA + 1; +const unsigned kMethodNames2Start = NFileHeader::NCompressionMethod::kXz; +const unsigned kNumMethodNames2 = NFileHeader::NCompressionMethod::kWzAES + 1 - kMethodNames2Start; + +extern const char * const kMethodNames1[kNumMethodNames1]; +extern const char * const kMethodNames2[kNumMethodNames2]; + + class CHandler: public IInArchive, public IOutArchive, diff --git a/CPP/7zip/Archive/Zip/ZipHandlerOut.cpp b/CPP/7zip/Archive/Zip/ZipHandlerOut.cpp index 8a8de511..8ecf7942 100644 --- a/CPP/7zip/Archive/Zip/ZipHandlerOut.cpp +++ b/CPP/7zip/Archive/Zip/ZipHandlerOut.cpp @@ -46,6 +46,30 @@ static bool IsSimpleAsciiString(const wchar_t *s) } } + +static int FindZipMethod(const char *s, const char * const *names, unsigned num) +{ + for (unsigned i = 0; i < num; i++) + { + const char *name = names[i]; + if (name && StringsAreEqualNoCase_Ascii(s, name)) + return i; + } + return -1; +} + +static int FindZipMethod(const char *s) +{ + int k = FindZipMethod(s, kMethodNames1, kNumMethodNames1); + if (k >= 0) + return k; + k = FindZipMethod(s, kMethodNames2, kNumMethodNames2); + if (k >= 0) + return kMethodNames2Start + k; + return -1; +} + + #define COM_TRY_BEGIN2 try { #define COM_TRY_END2 } \ catch(const CSystemException &e) { return e.ErrorCode; } \ @@ -63,6 +87,7 @@ static HRESULT GetTime(IArchiveUpdateCallback *callback, int index, PROPID propI return S_OK; } + STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numItems, IArchiveUpdateCallback *callback) { @@ -75,31 +100,46 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt } CObjectVector updateItems; + updateItems.ClearAndReserve(numItems); + bool thereAreAesUpdates = false; UInt64 largestSize = 0; bool largestSizeDefined = false; + UString name; + CUpdateItem ui; + for (UInt32 i = 0; i < numItems; i++) { - CUpdateItem ui; Int32 newData; Int32 newProps; - UInt32 indexInArchive; + UInt32 indexInArc; + if (!callback) return E_FAIL; - RINOK(callback->GetUpdateItemInfo(i, &newData, &newProps, &indexInArchive)); + + RINOK(callback->GetUpdateItemInfo(i, &newData, &newProps, &indexInArc)); + + name.Empty(); + ui.Clear(); + ui.NewProps = IntToBool(newProps); ui.NewData = IntToBool(newData); - ui.IndexInArc = indexInArchive; + ui.IndexInArc = indexInArc; ui.IndexInClient = i; - bool existInArchive = (indexInArchive != (UInt32)(Int32)-1); - if (existInArchive && newData) - if (m_Items[indexInArchive].IsAesEncrypted()) + + bool existInArchive = (indexInArc != (UInt32)(Int32)-1); + if (existInArchive) + { + const CItemEx &inputItem = m_Items[indexInArc]; + if (inputItem.IsAesEncrypted()) thereAreAesUpdates = true; + if (!IntToBool(newProps)) + ui.IsDir = inputItem.IsDir(); + } if (IntToBool(newProps)) { - UString name; { NCOM::CPropVariant prop; RINOK(callback->GetProperty(i, kpidAttrib, &prop)); @@ -115,12 +155,15 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt NCOM::CPropVariant prop; RINOK(callback->GetProperty(i, kpidPath, &prop)); if (prop.vt == VT_EMPTY) - name.Empty(); + { + // name.Empty(); + } else if (prop.vt != VT_BSTR) return E_INVALIDARG; else name = prop.bstrVal; } + { NCOM::CPropVariant prop; RINOK(callback->GetProperty(i, kpidIsDir, &prop)); @@ -153,7 +196,8 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt FileTimeToDosTime(localFileTime, ui.Time); } - name = NItemName::MakeLegalName(name); + NItemName::ReplaceSlashes_OsToUnix(name); + bool needSlash = ui.IsDir; const wchar_t kSlash = L'/'; if (!name.IsEmpty()) @@ -188,11 +232,37 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt if (ui.Name.Len() >= (1 << 16)) return E_INVALIDARG; - ui.IndexInClient = i; + { + NCOM::CPropVariant prop; + RINOK(callback->GetProperty(i, kpidComment, &prop)); + if (prop.vt == VT_EMPTY) + { + // ui.Comment.Free(); + } + else if (prop.vt != VT_BSTR) + return E_INVALIDARG; + else + { + UString s = prop.bstrVal; + AString a; + if (ui.IsUtf8) + ConvertUnicodeToUTF8(s, a); + else + { + bool defaultCharWasUsed; + a = UnicodeStringToMultiByte(s, codePage, '_', defaultCharWasUsed); + } + if (a.Len() >= (1 << 16)) + return E_INVALIDARG; + ui.Comment.CopyFrom((const Byte *)(const char *)a, a.Len()); + } + } + + /* if (existInArchive) { - const CItemEx &itemInfo = m_Items[indexInArchive]; + const CItemEx &itemInfo = m_Items[indexInArc]; // ui.Commented = itemInfo.IsCommented(); ui.Commented = false; if (ui.Commented) @@ -205,6 +275,8 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt ui.Commented = false; */ } + + if (IntToBool(newData)) { UInt64 size = 0; @@ -220,12 +292,12 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt largestSizeDefined = true; } ui.Size = size; - - // ui.Size -= ui.Size / 2; } + updateItems.Add(ui); } + CMyComPtr getTextPassword; { CMyComPtr udateCallBack2(callback); @@ -261,16 +333,52 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt } } - Byte mainMethod; - if (m_MainMethod < 0) - mainMethod = (Byte)(((_props.Level == 0) ? - NFileHeader::NCompressionMethod::kStored : - NFileHeader::NCompressionMethod::kDeflated)); + + int mainMethod = m_MainMethod; + + if (mainMethod < 0) + { + if (!_props._methods.IsEmpty()) + { + const AString &methodName = _props._methods.Front().MethodName; + if (!methodName.IsEmpty()) + { + mainMethod = FindZipMethod(methodName); + if (mainMethod < 0) + { + CMethodId methodId; + UInt32 numStreams; + if (!FindMethod(EXTERNAL_CODECS_VARS methodName, methodId, numStreams)) + return E_NOTIMPL; + if (numStreams != 1) + return E_NOTIMPL; + if (methodId == kMethodId_BZip2) + mainMethod = NFileHeader::NCompressionMethod::kBZip2; + else + { + if (methodId < kMethodId_ZipBase) + return E_NOTIMPL; + methodId -= kMethodId_ZipBase; + if (methodId > 0xFF) + return E_NOTIMPL; + mainMethod = (int)methodId; + } + } + } + } + } + + if (mainMethod < 0) + mainMethod = (Byte)(((_props.GetLevel() == 0) ? + NFileHeader::NCompressionMethod::kStore : + NFileHeader::NCompressionMethod::kDeflate)); else - mainMethod = (Byte)m_MainMethod; - options.MethodSequence.Add(mainMethod); - if (mainMethod != NFileHeader::NCompressionMethod::kStored) - options.MethodSequence.Add(NFileHeader::NCompressionMethod::kStored); + mainMethod = (Byte)mainMethod; + + options.MethodSequence.Add((Byte)mainMethod); + + if (mainMethod != NFileHeader::NCompressionMethod::kStore) + options.MethodSequence.Add(NFileHeader::NCompressionMethod::kStore); return Update( EXTERNAL_CODECS_VARS @@ -281,28 +389,11 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt COM_TRY_END2 } -struct CMethodIndexToName -{ - unsigned Method; - const char *Name; -}; -static const CMethodIndexToName k_SupportedMethods[] = -{ - { NFileHeader::NCompressionMethod::kStored, "copy" }, - { NFileHeader::NCompressionMethod::kDeflated, "deflate" }, - { NFileHeader::NCompressionMethod::kDeflated64, "deflate64" }, - { NFileHeader::NCompressionMethod::kBZip2, "bzip2" }, - { NFileHeader::NCompressionMethod::kLZMA, "lzma" }, - { NFileHeader::NCompressionMethod::kPPMd, "ppmd" } -}; STDMETHODIMP CHandler::SetProperties(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps) { InitMethodProps(); - #ifndef _7ZIP_ST - const UInt32 numProcessors = _props.NumThreads; - #endif for (UInt32 i = 0; i < numProps; i++) { @@ -313,82 +404,27 @@ STDMETHODIMP CHandler::SetProperties(const wchar_t * const *names, const PROPVAR const PROPVARIANT &prop = values[i]; - if (name[0] == L'x') - { - UInt32 level = 9; - RINOK(ParsePropToUInt32(name.Ptr(1), prop, level)); - _props.Level = level; - _props.MethodInfo.AddProp_Level(level); - } - else if (name == L"m") - { - if (prop.vt == VT_BSTR) - { - UString m = prop.bstrVal, m2; - m.MakeLower_Ascii(); - int colonPos = m.Find(L':'); - if (colonPos >= 0) - { - m2 = m.Ptr(colonPos + 1); - m.DeleteFrom(colonPos); - } - unsigned k; - for (k = 0; k < ARRAY_SIZE(k_SupportedMethods); k++) - { - const CMethodIndexToName &pair = k_SupportedMethods[k]; - if (m.IsEqualTo(pair.Name)) - { - if (!m2.IsEmpty()) - { - RINOK(_props.MethodInfo.ParseParamsFromString(m2)); - } - m_MainMethod = pair.Method; - break; - } - } - if (k == ARRAY_SIZE(k_SupportedMethods)) - return E_INVALIDARG; - } - else if (prop.vt == VT_UI4) - { - unsigned k; - for (k = 0; k < ARRAY_SIZE(k_SupportedMethods); k++) - { - unsigned method = k_SupportedMethods[k].Method; - if (prop.ulVal == method) - { - m_MainMethod = method; - break; - } - } - if (k == ARRAY_SIZE(k_SupportedMethods)) - return E_INVALIDARG; - } - else - return E_INVALIDARG; - } - else if (name.IsPrefixedBy(L"em")) + if (name.IsEqualTo_Ascii_NoCase("em")) { if (prop.vt != VT_BSTR) return E_INVALIDARG; { - UString m = prop.bstrVal; - m.MakeLower_Ascii(); - if (m.IsPrefixedBy(L"aes")) + const wchar_t *m = prop.bstrVal; + if (IsString1PrefixedByString2_NoCase_Ascii(m, "aes")) { - m.DeleteFrontal(3); - if (m == L"128") + m += 3; + if (StringsAreEqual_Ascii(m, "128")) _props.AesKeyMode = 1; - else if (m == L"192") + else if (StringsAreEqual_Ascii(m, "192")) _props.AesKeyMode = 2; - else if (m == L"256" || m.IsEmpty()) + else if (StringsAreEqual_Ascii(m, "256") || m[0] == 0) _props.AesKeyMode = 3; else return E_INVALIDARG; _props.IsAesMode = true; m_ForceAesMode = true; } - else if (m == L"zipcrypto") + else if (StringsAreEqualNoCase_Ascii(m, "ZipCrypto")) { _props.IsAesMode = false; m_ForceAesMode = true; @@ -397,13 +433,6 @@ STDMETHODIMP CHandler::SetProperties(const wchar_t * const *names, const PROPVAR return E_INVALIDARG; } } - else if (name.IsPrefixedBy(L"mt")) - { - #ifndef _7ZIP_ST - RINOK(ParseMtProp(name.Ptr(2), prop, numProcessors, _props.NumThreads)); - _props.NumThreadsWasChanged = true; - #endif - } else if (name.IsEqualTo("tc")) { RINOK(PROPVARIANT_to_bool(prop, m_WriteNtfsTimeExtra)); @@ -433,9 +462,39 @@ STDMETHODIMP CHandler::SetProperties(const wchar_t * const *names, const PROPVAR } else { - RINOK(_props.MethodInfo.ParseParamsFromPROPVARIANT(name, prop)); + if (name.IsEqualTo_Ascii_NoCase("m") && prop.vt == VT_UI4) + { + UInt32 id = prop.ulVal; + if (id > 0xFF) + return E_INVALIDARG; + m_MainMethod = id; + } + else + { + RINOK(_props.SetProperty(name, prop)); + } + // RINOK(_props.MethodInfo.ParseParamsFromPROPVARIANT(name, prop)); } } + + _props._methods.DeleteFrontal(_props.GetNumEmptyMethods()); + if (_props._methods.Size() > 1) + return E_INVALIDARG; + if (_props._methods.Size() == 1) + { + const AString &methodName = _props._methods[0].MethodName; + + if (!methodName.IsEmpty()) + { + const char *end; + UInt32 id = ConvertStringToUInt32(methodName, &end); + if (*end == 0 && id <= 0xFF) + m_MainMethod = id; + else if (methodName.IsEqualTo_Ascii_NoCase("Copy")) // it's alias for "Store" + m_MainMethod = 0; + } + } + return S_OK; } diff --git a/CPP/7zip/Archive/Zip/ZipHeader.h b/CPP/7zip/Archive/Zip/ZipHeader.h index fead0192..61b4ea4b 100644 --- a/CPP/7zip/Archive/Zip/ZipHeader.h +++ b/CPP/7zip/Archive/Zip/ZipHeader.h @@ -23,7 +23,8 @@ namespace NSignature } const unsigned kLocalHeaderSize = 4 + 26; // including signature -const unsigned kDataDescriptorSize = 4 + 12; // including signature +const unsigned kDataDescriptorSize32 = 4 + 4 + 4 * 2; // including signature +const unsigned kDataDescriptorSize64 = 4 + 4 + 8 * 2; // including signature const unsigned kCentralHeaderSize = 4 + 42; // including signature const unsigned kEcdSize = 22; // including signature @@ -37,28 +38,30 @@ namespace NFileHeader { enum EType { - kStored = 0, - kShrunk = 1, - kReduced1 = 2, - kReduced2 = 3, - kReduced3 = 4, - kReduced4 = 5, - kImploded = 6, - kReservedTokenizing = 7, // reserved for tokenizing - kDeflated = 8, - kDeflated64 = 9, + kStore = 0, + kShrink = 1, + kReduce1 = 2, + kReduce2 = 3, + kReduce3 = 4, + kReduce4 = 5, + kImplode = 6, + kTokenize = 7, + kDeflate = 8, + kDeflate64 = 9, kPKImploding = 10, kBZip2 = 12, + kLZMA = 14, + kTerse = 18, kLz77 = 19, - kXz = 0x5F, - kJpeg = 0x60, - kWavPack = 0x61, - kPPMd = 0x62, - kWzAES = 0x63 + kXz = 95, + kJpeg = 96, + kWavPack = 97, + kPPMd = 98, + kWzAES = 99 }; const Byte kMadeByProgramVersion = 63; @@ -73,6 +76,7 @@ namespace NFileHeader const Byte kExtractVersion_Aes = 51; const Byte kExtractVersion_LZMA = 63; const Byte kExtractVersion_PPMd = 63; + const Byte kExtractVersion_Xz = 20; // test it } namespace NExtraID @@ -83,6 +87,7 @@ namespace NFileHeader kNTFS = 0x0A, kStrongEncrypt = 0x17, kUnixTime = 0x5455, + kUnixExtra = 0x5855, kIzUnicodeComment = 0x6375, kIzUnicodeName = 0x7075, kWzAES = 0x9901 @@ -110,6 +115,15 @@ namespace NFileHeader }; } + namespace NUnixExtra + { + enum + { + kATime = 0, + kMTime + }; + } + namespace NFlags { const unsigned kEncrypted = 1 << 0; @@ -121,10 +135,12 @@ namespace NFileHeader const unsigned kImplodeDictionarySizeMask = 1 << 1; const unsigned kImplodeLiteralsOnMask = 1 << 2; + /* const unsigned kDeflateTypeBitStart = 1; const unsigned kNumDeflateTypeBits = 2; const unsigned kNumDeflateTypes = (1 << kNumDeflateTypeBits); const unsigned kDeflateTypeMask = (1 << kNumDeflateTypeBits) - 1; + */ } namespace NHostOS diff --git a/CPP/7zip/Archive/Zip/ZipIn.cpp b/CPP/7zip/Archive/Zip/ZipIn.cpp index 6361dc5c..09443a61 100644 --- a/CPP/7zip/Archive/Zip/ZipIn.cpp +++ b/CPP/7zip/Archive/Zip/ZipIn.cpp @@ -6,6 +6,7 @@ #include "../../../Common/DynamicBuffer.h" #include "../../../Common/IntToString.h" +#include "../../../Common/MyException.h" #include "../../../Common/StringToInt.h" #include "../../../Windows/PropVariant.h" @@ -27,6 +28,19 @@ namespace NArchive { namespace NZip { +// (kBufferSize >= kDataDescriptorSize64 + 4) + +static const size_t kSeqBufferSize = (size_t)1 << 14; + +/* + if (not defined ZIP_SELF_CHECK) : it reads CD and if error in first pass CD reading, it reads LOCALS-CD-MODE + if ( defined ZIP_SELF_CHECK) : it always reads CD and LOCALS-CD-MODE + use ZIP_SELF_CHECK to check LOCALS-CD-MODE for any zip archive +*/ + +// #define ZIP_SELF_CHECK + + struct CEcd { UInt16 ThisDisk; @@ -66,6 +80,7 @@ void CEcd::Parse(const Byte *p) void CCdInfo::ParseEcd32(const Byte *p) { + IsFromEcd64 = false; // (p) includes signature p += 4; G16(0, ThisDisk); @@ -79,6 +94,7 @@ void CCdInfo::ParseEcd32(const Byte *p) void CCdInfo::ParseEcd64e(const Byte *p) { + IsFromEcd64 = true; // (p) exclude signature G16(0, VersionMade); G16(2, VersionNeedExtract); @@ -106,9 +122,14 @@ struct CLocator G64(4, Ecd64Offset); G32(12, NumDisks); } -}; + bool IsEmptyArc() const + { + return Ecd64Disk == 0 && NumDisks == 0 && Ecd64Offset == 0; + } +}; + void CInArchive::ClearRefs() @@ -123,27 +144,174 @@ void CInArchive::ClearRefs() void CInArchive::Close() { - _processedCnt = 0; - IsArc = false; + _cnt = 0; + DisableBufMode(); + IsArcOpen = false; - IsMultiVol = false; - UseDisk_in_SingleVol = false; - EcdVolIndex = 0; + + IsArc = false; + IsZip64 = false; + HeadersError = false; HeadersWarning = false; ExtraMinorError = false; UnexpectedEnd = false; + LocalsWereRead = false; + LocalsCenterMerged = false; NoCentralDir = false; - IsZip64 = false; + Overflow32bit = false; + Cd_NumEntries_Overflow_16bit = false; + MarkerIsFound = false; + MarkerIsSafe = false; + + IsMultiVol = false; + UseDisk_in_SingleVol = false; + EcdVolIndex = 0; + ArcInfo.Clear(); + ClearRefs(); } -HRESULT CInArchive::Seek(UInt64 offset) + +HRESULT CInArchive::Seek_SavePos(UInt64 offset) +{ + // InitBuf(); + // if (!Stream) return S_FALSE; + return Stream->Seek(offset, STREAM_SEEK_SET, &_streamPos); +} + +HRESULT CInArchive::SeekToVol(int volIndex, UInt64 offset) { - return Stream->Seek(offset, STREAM_SEEK_SET, NULL); + if (volIndex != Vols.StreamIndex) + { + InitBuf(); + if (IsMultiVol && volIndex >= 0) + { + if ((unsigned)volIndex >= Vols.Streams.Size()) + return S_FALSE; + if (!Vols.Streams[volIndex].Stream) + return S_FALSE; + Stream = Vols.Streams[volIndex].Stream; + } + else if (volIndex == -2) + { + if (!Vols.ZipStream) + return S_FALSE; + Stream = Vols.ZipStream; + } + else + Stream = StartStream; + Vols.StreamIndex = volIndex; + } + else + { + if (offset <= _streamPos) + { + const UInt64 back = _streamPos - offset; + if (back <= _bufCached) + { + _bufPos = _bufCached - (size_t)back; + return S_OK; + } + } + InitBuf(); + } + return Seek_SavePos(offset); +} + + +// ---------- ReadFromCache ---------- +// reads from cache and from Stream +// move to next volume can be allowed if (CanStartNewVol) and only before first byte reading + +HRESULT CInArchive::ReadFromCache(Byte *data, unsigned size, unsigned &processed) +{ + HRESULT result = S_OK; + processed = 0; + + for (;;) + { + if (size == 0) + return S_OK; + + const size_t avail = GetAvail(); + + if (avail != 0) + { + unsigned cur = size; + if (cur > avail) + cur = (unsigned)avail; + memcpy(data, (const Byte *)Buffer + _bufPos, cur); + + data += cur; + size -= cur; + processed += cur; + + _bufPos += cur; + _cnt += cur; + + CanStartNewVol = false; + + continue; + } + + InitBuf(); + + if (_inBufMode) + { + UInt32 cur = 0; + result = Stream->Read(Buffer, (UInt32)Buffer.Size(), &cur); + _bufPos = 0; + _bufCached = cur; + _streamPos += cur; + if (cur != 0) + CanStartNewVol = false; + if (result != S_OK) + break; + if (cur != 0) + continue; + } + else + { + UInt32 cur = 0; + result = Stream->Read(data, size, &cur); + data += cur; + size -= cur; + processed += cur; + _streamPos += cur; + _cnt += cur; + if (cur != 0) + { + CanStartNewVol = false; + break; + } + if (result != S_OK) + break; + } + + if ( !IsMultiVol + || !CanStartNewVol + || Vols.StreamIndex < 0 + || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size()) + break; + + const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex + 1]; + if (!s.Stream) + break; + result = s.SeekToStart(); + if (result != S_OK) + break; + Vols.StreamIndex++; + _streamPos = 0; + // Vols.NeedSeek = false; + + Stream = s.Stream; + } + + return result; } @@ -168,18 +336,33 @@ API_FUNC_IsArc IsArc_Zip(const Byte *p, size_t size) if (p[0] != 'P') return k_IsArc_Res_NO; - UInt32 value = Get32(p); + UInt32 sig = Get32(p); - if (value == NSignature::kNoSpan - || value == NSignature::kSpan) + if (sig == NSignature::kNoSpan || sig == NSignature::kSpan) { p += 4; size -= 4; } - value = Get32(p); + sig = Get32(p); - if (value == NSignature::kEcd) + if (sig == NSignature::kEcd64) + { + if (size < kEcd64_FullSize) + return k_IsArc_Res_NEED_MORE; + + const UInt64 recordSize = Get64(p + 4); + if ( recordSize < kEcd64_MainSize + || recordSize > kEcd64_MainSize + (1 << 20)) + return k_IsArc_Res_NO; + CCdInfo cdInfo; + cdInfo.ParseEcd64e(p + 12); + if (!cdInfo.IsEmptyArc()) + return k_IsArc_Res_NO; + return k_IsArc_Res_YES; // k_IsArc_Res_YES_2; + } + + if (sig == NSignature::kEcd) { if (size < kEcdSize) return k_IsArc_Res_NEED_MORE; @@ -190,8 +373,8 @@ API_FUNC_IsArc IsArc_Zip(const Byte *p, size_t size) return k_IsArc_Res_NO; return k_IsArc_Res_YES; // k_IsArc_Res_YES_2; } - - if (value != NSignature::kLocalFileHeader) + + if (sig != NSignature::kLocalFileHeader) return k_IsArc_Res_NO; if (size < kLocalHeaderSize) @@ -240,8 +423,17 @@ API_FUNC_IsArc IsArc_Zip(const Byte *p, size_t size) const Byte *p2 = p + kLocalHeaderSize; for (size_t i = 0; i < rem; i++) if (p2[i] == 0) + { + // we support some "bad" zip archives that contain zeros after name + for (size_t k = i + 1; k < rem; k++) + if (p2[k] != 0) + return k_IsArc_Res_NO; + break; + /* if (i != nameSize - 1) return k_IsArc_Res_NO; + */ + } } if (size < extraOffset) @@ -288,398 +480,562 @@ static UInt32 IsArc_Zip_2(const Byte *p, size_t size, bool isFinal) } + +MY_NO_INLINE +static const Byte *FindPK(const Byte *p, const Byte *limit) +{ + for (;;) + { + for (;;) + { + Byte b0 = p[0]; + if (p >= limit) + return p; + p++; + if (b0 == 0x50) + break; + } + if (p[0] == 0x4B) + return p - 1; + } +} + + +/* +---------- FindMarker ---------- +returns: + S_OK: + ArcInfo.MarkerVolIndex : volume of marker + ArcInfo.MarkerPos : Pos of first signature + ArcInfo.MarkerPos2 : Pos of main signature (local item signature in most cases) + _streamPos : stream pos + _cnt : The number of virtal Bytes after start of search to offset after signature + _signature : main signature + + S_FALSE: can't find marker, or there is some non-zip data after marker -HRESULT CInArchive::FindMarker(IInStream *stream, const UInt64 *searchLimit) + Error code: stream reading error. +*/ + +HRESULT CInArchive::FindMarker(const UInt64 *searchLimit) { - ArcInfo.MarkerPos = m_Position; - ArcInfo.MarkerPos2 = m_Position; + ArcInfo.MarkerPos = GetVirtStreamPos(); + ArcInfo.MarkerPos2 = ArcInfo.MarkerPos; + ArcInfo.MarkerVolIndex = Vols.StreamIndex; + + _cnt = 0; + + CanStartNewVol = false; if (searchLimit && *searchLimit == 0) { Byte startBuf[kMarkerSize]; - { - size_t processed = kMarkerSize; - RINOK(ReadStream(stream, startBuf, &processed)); - m_Position += processed; - if (processed != kMarkerSize) - return S_FALSE; - } + unsigned processed; + RINOK(ReadFromCache(startBuf, kMarkerSize, processed)); + if (processed != kMarkerSize) + return S_FALSE; - m_Signature = Get32(startBuf); + UInt32 marker = Get32(startBuf); + _signature = marker; - if (m_Signature != NSignature::kEcd && - m_Signature != NSignature::kLocalFileHeader) + if ( marker == NSignature::kNoSpan + || marker == NSignature::kSpan) { - if (m_Signature != NSignature::kNoSpan) - { - if (m_Signature != NSignature::kSpan) - return S_FALSE; - if (m_Position != 4) // we don't support multivol archives with sfx stub - return S_FALSE; - ArcInfo.IsSpanMode = true; - } - size_t processed = kMarkerSize; - RINOK(ReadStream(stream, startBuf, &processed)); - m_Position += processed; + RINOK(ReadFromCache(startBuf, kMarkerSize, processed)); if (processed != kMarkerSize) return S_FALSE; - m_Signature = Get32(startBuf); - if (m_Signature != NSignature::kEcd && - m_Signature != NSignature::kLocalFileHeader) - return S_FALSE; - ArcInfo.MarkerPos2 += 4; + _signature = Get32(startBuf); } + + if ( _signature != NSignature::kEcd + && _signature != NSignature::kEcd64 + && _signature != NSignature::kLocalFileHeader) + return S_FALSE; + + ArcInfo.MarkerPos2 = GetVirtStreamPos() - 4; + ArcInfo.IsSpanMode = (marker == NSignature::kSpan); // we use weak test in case of (*searchLimit == 0) // since error will be detected later in Open function - return S_OK; // maybe we need to search backward. + return S_OK; } - const size_t kBufSize = (size_t)1 << 18; // must be larger than kCheckSize const size_t kCheckSize = (size_t)1 << 16; // must be smaller than kBufSize - CByteArr buffer(kBufSize); - - size_t numBytesInBuffer = 0; - UInt64 curScanPos = 0; + const size_t kBufSize = (size_t)1 << 17; // must be larger than kCheckSize + + if (Buffer.Size() < kBufSize) + { + InitBuf(); + Buffer.AllocAtLeast(kBufSize); + if (!Buffer.IsAllocated()) + return E_OUTOFMEMORY; + } + + _inBufMode = true; + + UInt64 progressPrev = 0; for (;;) { - size_t numReadBytes = kBufSize - numBytesInBuffer; - RINOK(ReadStream(stream, buffer + numBytesInBuffer, &numReadBytes)); - m_Position += numReadBytes; - numBytesInBuffer += numReadBytes; - const bool isFinished = (numBytesInBuffer != kBufSize); + RINOK(LookAhead(kBufSize)); + + const size_t avail = GetAvail(); - size_t limit = numBytesInBuffer;; + size_t limitPos; + const bool isFinished = (avail != kBufSize); if (isFinished) { - if (limit == 0) - break; - limit--; + const unsigned kMinAllowed = 4; + if (avail <= kMinAllowed) + { + if ( !IsMultiVol + || Vols.StreamIndex < 0 + || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size()) + break; + + SkipLookahed(avail); + + const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex + 1]; + if (!s.Stream) + break; + + RINOK(s.SeekToStart()); + + InitBuf(); + Vols.StreamIndex++; + _streamPos = 0; + Stream = s.Stream; + continue; + } + limitPos = avail - kMinAllowed; } else - limit -= kCheckSize; + limitPos = (avail - kCheckSize); + + // we don't check at (limitPos) for good fast aligned operations - if (searchLimit && curScanPos + limit > *searchLimit) - limit = (size_t)(*searchLimit - curScanPos + 1); + if (searchLimit) + { + if (_cnt > *searchLimit) + break; + UInt64 rem = *searchLimit - _cnt; + if (limitPos > rem) + limitPos = (size_t)rem + 1; + } - if (limit < 1) + if (limitPos == 0) break; - const Byte *buf = buffer; - for (size_t pos = 0; pos < limit; pos++) + const Byte * const pStart = Buffer + _bufPos; + const Byte * p = pStart; + const Byte * const limit = pStart + limitPos; + + for (;; p++) { - if (buf[pos] != 0x50) - continue; - if (buf[pos + 1] != 0x4B) - continue; - size_t rem = numBytesInBuffer - pos; - UInt32 res = IsArc_Zip_2(buf + pos, rem, isFinished); + p = FindPK(p, limit); + if (p >= limit) + break; + const size_t rem = pStart + avail - p; + UInt32 res = IsArc_Zip_2(p, rem, isFinished); if (res != k_IsArc_Res_NO) { if (rem < kMarkerSize) return S_FALSE; - m_Signature = Get32(buf + pos); - ArcInfo.MarkerPos += curScanPos + pos; + _signature = Get32(p); + SkipLookahed(p - pStart); + ArcInfo.MarkerVolIndex = Vols.StreamIndex; + ArcInfo.MarkerPos = GetVirtStreamPos(); ArcInfo.MarkerPos2 = ArcInfo.MarkerPos; - if (m_Signature == NSignature::kNoSpan - || m_Signature == NSignature::kSpan) + SkipLookahed(4); + if ( _signature == NSignature::kNoSpan + || _signature == NSignature::kSpan) { - m_Signature = Get32(buf + pos + 4); + if (rem < kMarkerSize * 2) + return S_FALSE; + ArcInfo.IsSpanMode = (_signature == NSignature::kSpan); + _signature = Get32(p + 4); ArcInfo.MarkerPos2 += 4; + SkipLookahed(4); } - m_Position = ArcInfo.MarkerPos2 + kMarkerSize; return S_OK; } } - if (isFinished) + if (!IsMultiVol && isFinished) break; - curScanPos += limit; - numBytesInBuffer -= limit; - memmove(buffer, buffer + limit, numBytesInBuffer); + SkipLookahed(p - pStart); + + if (Callback && (_cnt - progressPrev) >= ((UInt32)1 << 23)) + { + progressPrev = _cnt; + // const UInt64 numFiles64 = 0; + RINOK(Callback->SetCompleted(NULL, &_cnt)); + } } return S_FALSE; } -HRESULT CInArchive::IncreaseRealPosition(Int64 addValue, bool &isFinished) +/* +---------- IncreaseRealPosition ---------- +moves virtual offset in virtual stream. +changing to new volumes is allowed +*/ + +HRESULT CInArchive::IncreaseRealPosition(UInt64 offset, bool &isFinished) { isFinished = false; + + for (;;) + { + const size_t avail = GetAvail(); + + if (offset <= avail) + { + _bufPos += (size_t)offset; + _cnt += offset; + return S_OK; + } + + _cnt += avail; + offset -= avail; + + _bufCached = 0; + _bufPos = 0; + + if (!_inBufMode) + break; + + CanStartNewVol = true; + LookAhead(1); + + if (GetAvail() == 0) + return S_OK; + } + if (!IsMultiVol) - return Stream->Seek(addValue, STREAM_SEEK_CUR, &m_Position); + { + _cnt += offset; + return Stream->Seek(offset, STREAM_SEEK_CUR, &_streamPos); + } for (;;) { - if (addValue == 0) + if (offset == 0) return S_OK; - if (addValue > 0) + + if (Vols.StreamIndex < 0) + return S_FALSE; + if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size()) { - if (Vols.StreamIndex < 0) - return S_FALSE; - if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size()) + isFinished = true; + return S_OK; + } + { + const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex]; + if (!s.Stream) { isFinished = true; return S_OK; } + if (_streamPos > s.Size) + return S_FALSE; + const UInt64 rem = s.Size - _streamPos; + if ((UInt64)offset <= rem) { - const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex]; - if (!s.Stream) - { - isFinished = true; - return S_OK; - } - if (m_Position > s.Size) - return S_FALSE; - UInt64 rem = s.Size - m_Position; - if ((UInt64)addValue <= rem) - return Stream->Seek(addValue, STREAM_SEEK_CUR, &m_Position); - RINOK(Stream->Seek(s.Size, STREAM_SEEK_SET, &m_Position)); - addValue -= rem; - Stream = NULL; - Vols.StreamIndex++; - if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size()) - { - isFinished = true; - return S_OK; - } - } - const CVols::CSubStreamInfo &s2 = Vols.Streams[Vols.StreamIndex]; - if (!s2.Stream) - { - isFinished = true; - return S_OK; + _cnt += offset; + return Stream->Seek(offset, STREAM_SEEK_CUR, &_streamPos); } - Stream = s2.Stream; - m_Position = 0; - RINOK(Stream->Seek(0, STREAM_SEEK_SET, &m_Position)); + RINOK(Seek_SavePos(s.Size)); + offset -= rem; + _cnt += rem; } - else + + Stream = NULL; + _streamPos = 0; + Vols.StreamIndex++; + if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size()) { - if (!Stream) - return S_FALSE; - { - if (m_Position >= (UInt64)(-addValue)) - return Stream->Seek(addValue, STREAM_SEEK_CUR, &m_Position); - addValue += m_Position; - RINOK(Stream->Seek(0, STREAM_SEEK_SET, &m_Position)); - m_Position = 0; - Stream = NULL; - if (--Vols.StreamIndex < 0) - return S_FALSE; - } - const CVols::CSubStreamInfo &s2 = Vols.Streams[Vols.StreamIndex]; - if (!s2.Stream) - return S_FALSE; - Stream = s2.Stream; - m_Position = s2.Size; - RINOK(Stream->Seek(s2.Size, STREAM_SEEK_SET, &m_Position)); + isFinished = true; + return S_OK; + } + const CVols::CSubStreamInfo &s2 = Vols.Streams[Vols.StreamIndex]; + if (!s2.Stream) + { + isFinished = true; + return S_OK; } + Stream = s2.Stream; + RINOK(Seek_SavePos(0)); } } -class CUnexpectEnd {}; +/* +---------- LookAhead ---------- +Reads data to buffer, if required. -HRESULT CInArchive::ReadBytes(void *data, UInt32 size, UInt32 *processedSize) -{ - size_t realProcessedSize = size; - HRESULT result = S_OK; - if (_inBufMode) - { - try { realProcessedSize = _inBuffer.ReadBytes((Byte *)data, size); } - catch (const CInBufferException &e) { return e.ErrorCode; } - } - else - result = ReadStream(Stream, data, &realProcessedSize); - if (processedSize) - *processedSize = (UInt32)realProcessedSize; - m_Position += realProcessedSize; - return result; -} +It can read from volumes as long as Buffer.Size(). +But it moves to new volume, only if it's required to provide minRequired bytes in buffer. -void CInArchive::SafeReadBytes(void *data, unsigned size) -{ - size_t processed = size; - - HRESULT result = S_OK; +in: + (minRequired <= Buffer.Size()) - if (!_inBufMode) - result = ReadStream(Stream, data, &processed); - else +return: + S_OK : if (GetAvail() < minRequired) after function return, it's end of stream(s) data, or no new volume stream. + Error codes: IInStream::Read() error or IInStream::Seek() error for multivol +*/ + +HRESULT CInArchive::LookAhead(size_t minRequired) +{ + for (;;) { - for (;;) + const size_t avail = GetAvail(); + + if (minRequired <= avail) + return S_OK; + + if (_bufPos != 0) { - processed = _inBuffer.ReadBytes((Byte *)data, size); - if (processed != 0 - || IsMultiVol - || !CanStartNewVol - || Vols.StreamIndex < 0 - || (unsigned)Vols.StreamIndex >= Vols.Streams.Size()) - break; - Vols.StreamIndex++; - const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex]; - if (!s.Stream) - break; - // if (Vols.NeedSeek) - { - result = s.Stream->Seek(0, STREAM_SEEK_SET, NULL); - m_Position = 0; - if (result != S_OK) - break; - Vols.NeedSeek = false; - } - _inBuffer.SetStream(s.Stream); - _inBuffer.Init(); + if (avail != 0) + memmove(Buffer, Buffer + _bufPos, avail); + _bufPos = 0; + _bufCached = avail; } - CanStartNewVol = false; + + const size_t pos = _bufCached; + UInt32 processed = 0; + HRESULT res = Stream->Read(Buffer + pos, (UInt32)(Buffer.Size() - pos), &processed); + _streamPos += processed; + _bufCached += processed; + + if (res != S_OK) + return res; + + if (processed != 0) + continue; + + if ( !IsMultiVol + || !CanStartNewVol + || Vols.StreamIndex < 0 + || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size()) + return S_OK; + + const CVols::CSubStreamInfo &s = Vols.Streams[Vols.StreamIndex + 1]; + if (!s.Stream) + return S_OK; + + RINOK(s.SeekToStart()); + + Vols.StreamIndex++; + _streamPos = 0; + Stream = s.Stream; + // Vols.NeedSeek = false; } +} + + +class CUnexpectEnd {}; + + +/* +---------- SafeRead ---------- + +reads data of exact size from stream(s) - m_Position += processed; - _processedCnt += processed; +in: + _inBufMode + if (CanStartNewVol) it can go to next volume before first byte reading, if there is end of volume data. +in, out: + _streamPos : position in Stream + Stream + Vols : if (IsMultiVol) + _cnt + +out: + (CanStartNewVol == false), if some data was read + +return: + S_OK : success reading of requested data + +exceptions: + CSystemException() - stream reading error + CUnexpectEnd() : could not read data of requested size +*/ + +void CInArchive::SafeRead(Byte *data, unsigned size) +{ + unsigned processed; + HRESULT result = ReadFromCache(data, size, processed); if (result != S_OK) throw CSystemException(result); - - if (processed != size) + if (size != processed) throw CUnexpectEnd(); } void CInArchive::ReadBuffer(CByteBuffer &buffer, unsigned size) { buffer.Alloc(size); - if (size > 0) - SafeReadBytes(buffer, size); + if (size != 0) + SafeRead(buffer, size); } -Byte CInArchive::ReadByte() +// Byte CInArchive::ReadByte () { Byte b; SafeRead(&b, 1); return b; } +// UInt16 CInArchive::ReadUInt16() { Byte buf[2]; SafeRead(buf, 2); return Get16(buf); } +UInt32 CInArchive::ReadUInt32() { Byte buf[4]; SafeRead(buf, 4); return Get32(buf); } +UInt64 CInArchive::ReadUInt64() { Byte buf[8]; SafeRead(buf, 8); return Get64(buf); } + +void CInArchive::ReadSignature() { - Byte b; - SafeReadBytes(&b, 1); - return b; + CanStartNewVol = true; + _signature = ReadUInt32(); + // CanStartNewVol = false; // it's already changed in SafeRead } -UInt16 CInArchive::ReadUInt16() { Byte buf[2]; SafeReadBytes(buf, 2); return Get16(buf); } -UInt32 CInArchive::ReadUInt32() { Byte buf[4]; SafeReadBytes(buf, 4); return Get32(buf); } -UInt64 CInArchive::ReadUInt64() { Byte buf[8]; SafeReadBytes(buf, 8); return Get64(buf); } -// we use Skip() inside headers only, so no need for stream change in multivol. +// we Skip() inside headers only, so no need for stream change in multivol. -void CInArchive::Skip(unsigned num) +void CInArchive::Skip(size_t num) { - if (_inBufMode) - { - size_t skip = _inBuffer.Skip(num); - m_Position += skip; - _processedCnt += skip; - if (skip != num) - throw CUnexpectEnd(); - } - else + while (num != 0) { - for (unsigned i = 0; i < num; i++) - ReadByte(); + const unsigned kBufSize = (size_t)1 << 10; + Byte buf[kBufSize]; + unsigned step = kBufSize; + if (step > num) + step = (unsigned)num; + SafeRead(buf, step); + num -= step; } } -void CInArchive::Skip64(UInt64 num) +/* +HRESULT CInArchive::Callback_Completed(unsigned numFiles) +{ + const UInt64 numFiles64 = numFiles; + return Callback->SetCompleted(&numFiles64, &_cnt); +} +*/ + +HRESULT CInArchive::Skip64(UInt64 num, unsigned numFiles) { - for (UInt64 i = 0; i < num; i++) - ReadByte(); + if (num == 0) + return S_OK; + + for (;;) + { + size_t step = (size_t)1 << 24; + if (step > num) + step = (size_t)num; + Skip(step); + num -= step; + if (num == 0) + return S_OK; + if (Callback) + { + const UInt64 numFiles64 = numFiles; + RINOK(Callback->SetCompleted(&numFiles64, &_cnt)); + } + } } -void CInArchive::ReadFileName(unsigned size, AString &s) +bool CInArchive::ReadFileName(unsigned size, AString &s) { if (size == 0) { s.Empty(); - return; + return true; + } + char *p = s.GetBuf(size); + SafeRead((Byte *)p, size); + unsigned i = size; + do + { + if (p[i - 1] != 0) + break; } - SafeReadBytes(s.GetBuf(size), size); + while (--i); s.ReleaseBuf_CalcLen(size); + return s.Len() == i; } -bool CInArchive::ReadExtra(unsigned extraSize, CExtraBlock &extraBlock, - UInt64 &unpackSize, UInt64 &packSize, UInt64 &localHeaderOffset, UInt32 &diskStartNumber) +#define ZIP64_IS_32_MAX(n) ((n) == 0xFFFFFFFF) +#define ZIP64_IS_16_MAX(n) ((n) == 0xFFFF) + + +bool CInArchive::ReadExtra(unsigned extraSize, CExtraBlock &extra, + UInt64 &unpackSize, UInt64 &packSize, UInt64 &localOffset, UInt32 &disk) { - extraBlock.Clear(); + extra.Clear(); - UInt32 remain = extraSize; - - while (remain >= 4) + while (extraSize >= 4) { CExtraSubBlock subBlock; - subBlock.ID = ReadUInt16(); - unsigned dataSize = ReadUInt16(); - remain -= 4; - if (dataSize > remain) // it's bug + const UInt32 pair = ReadUInt32(); + subBlock.ID = (pair & 0xFFFF); + unsigned size = (unsigned)(pair >> 16); + + extraSize -= 4; + + if (size > extraSize) { + // it's error in extra HeadersWarning = true; - Skip(remain); + extra.Error = true; + Skip(extraSize); return false; } + + extraSize -= size; + if (subBlock.ID == NFileHeader::NExtraID::kZip64) { - if (unpackSize == 0xFFFFFFFF) - { - if (dataSize < 8) - { - HeadersWarning = true; - Skip(remain); - return false; - } - unpackSize = ReadUInt64(); - remain -= 8; - dataSize -= 8; - } - if (packSize == 0xFFFFFFFF) - { - if (dataSize < 8) - break; - packSize = ReadUInt64(); - remain -= 8; - dataSize -= 8; - } - if (localHeaderOffset == 0xFFFFFFFF) - { - if (dataSize < 8) - break; - localHeaderOffset = ReadUInt64(); - remain -= 8; - dataSize -= 8; - } - if (diskStartNumber == 0xFFFF) + extra.IsZip64 = true; + bool isOK = true; + + if (ZIP64_IS_32_MAX(unpackSize)) + if (size < 8) isOK = false; else { size -= 8; unpackSize = ReadUInt64(); } + + if (isOK && ZIP64_IS_32_MAX(packSize)) + if (size < 8) isOK = false; else { size -= 8; packSize = ReadUInt64(); } + + if (isOK && ZIP64_IS_32_MAX(localOffset)) + if (size < 8) isOK = false; else { size -= 8; localOffset = ReadUInt64(); } + + if (isOK && ZIP64_IS_16_MAX(disk)) + if (size < 4) isOK = false; else { size -= 4; disk = ReadUInt32(); } + + if (!isOK || size != 0) { - if (dataSize < 4) - break; - diskStartNumber = ReadUInt32(); - remain -= 4; - dataSize -= 4; + HeadersWarning = true; + extra.Error = true; + extra.IsZip64_Error = true; + Skip(size); } - Skip(dataSize); } else { - ReadBuffer(subBlock.Data, dataSize); - extraBlock.SubBlocks.Add(subBlock); + ReadBuffer(subBlock.Data, size); + extra.SubBlocks.Add(subBlock); } - remain -= dataSize; } - if (remain != 0) + if (extraSize != 0) { ExtraMinorError = true; + extra.MinorError = true; // 7-Zip before 9.31 created incorrect WsAES Extra in folder's local headers. // so we don't return false, but just set warning flag // return false; + Skip(extraSize); } - - Skip(remain); + return true; } @@ -691,7 +1047,7 @@ bool CInArchive::ReadLocalItem(CItemEx &item) item.Disk = Vols.StreamIndex; const unsigned kPureHeaderSize = kLocalHeaderSize - 4; Byte p[kPureHeaderSize]; - SafeReadBytes(p, kPureHeaderSize); + SafeRead(p, kPureHeaderSize); { unsigned i; for (i = 0; i < kPureHeaderSize && p[i] == 0; i++); @@ -709,8 +1065,9 @@ bool CInArchive::ReadLocalItem(CItemEx &item) G32(18, item.Size); const unsigned nameSize = Get16(p + 22); const unsigned extraSize = Get16(p + 24); - ReadFileName(nameSize, item.Name); + bool isOkName = ReadFileName(nameSize, item.Name); item.LocalFullHeaderSize = kLocalHeaderSize + (UInt32)nameSize + extraSize; + item.DescriptorWasRead = false; /* if (item.IsDir()) @@ -719,10 +1076,9 @@ bool CInArchive::ReadLocalItem(CItemEx &item) if (extraSize > 0) { - UInt64 localHeaderOffset = 0; - UInt32 diskStartNumber = 0; - if (!ReadExtra(extraSize, item.LocalExtra, item.Size, item.PackSize, - localHeaderOffset, diskStartNumber)) + UInt64 localOffset = 0; + UInt32 disk = 0; + if (!ReadExtra(extraSize, item.LocalExtra, item.Size, item.PackSize, localOffset, disk)) { /* Most of archives are OK for Extra. But there are some rare cases that have error. And if error in first item, it can't open archive. @@ -739,8 +1095,8 @@ bool CInArchive::ReadLocalItem(CItemEx &item) if (item.Name.Len() != nameSize) { - // we support "bad" archives with null-terminated name. - if (item.Name.Len() + 1 != nameSize) + // we support some "bad" zip archives that contain zeros after name + if (!isOkName) return false; HeadersWarning = true; } @@ -758,11 +1114,11 @@ static bool FlagsAreSame(const CItem &i1, const CItem &i2) UInt32 mask = 0xFFFF; switch (i1.Method) { - case NFileHeader::NCompressionMethod::kDeflated: + case NFileHeader::NCompressionMethod::kDeflate: mask = 0x7FF9; break; default: - if (i1.Method <= NFileHeader::NCompressionMethod::kImploded) + if (i1.Method <= NFileHeader::NCompressionMethod::kImplode) mask = 0x7FFF; } @@ -805,9 +1161,9 @@ static bool AreItemsEqual(const CItemEx &localItem, const CItemEx &cdItem) return false; if (!localItem.HasDescriptor()) { - if (cdItem.Crc != localItem.Crc || - cdItem.PackSize != localItem.PackSize || - cdItem.Size != localItem.Size) + if (cdItem.PackSize != localItem.PackSize + || cdItem.Size != localItem.Size + || cdItem.Crc != localItem.Crc && cdItem.Crc != 0) // some program writes 0 to crc field in central directory return false; } /* pkzip 2.50 creates incorrect archives. It uses @@ -833,7 +1189,8 @@ static bool AreItemsEqual(const CItemEx &localItem, const CItemEx &cdItem) // pkzip 2.50 uses DOS encoding in central dir and WIN encoding in local header. // so we ignore that error if (hostOs != NFileHeader::NHostOS::kFAT - || cdItem.MadeByVersion.Version != 25) + || cdItem.MadeByVersion.Version < 25 + || cdItem.MadeByVersion.Version > 40) return false; } } @@ -847,9 +1204,13 @@ static bool AreItemsEqual(const CItemEx &localItem, const CItemEx &cdItem) } -HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail) +HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail, bool &headersError) { + InitBuf(); + _inBufMode = false; + isAvail = true; + headersError = false; if (item.FromLocal) return S_OK; try @@ -863,15 +1224,13 @@ HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail) isAvail = false; return S_FALSE; } - IInStream *str2 = Vols.Streams[item.Disk].Stream; - if (!str2) + Stream = Vols.Streams[item.Disk].Stream; + Vols.StreamIndex = item.Disk; + if (!Stream) { isAvail = false; return S_FALSE; } - RINOK(str2->Seek(offset, STREAM_SEEK_SET, NULL)); - Stream = str2; - Vols.StreamIndex = item.Disk; } else { @@ -888,9 +1247,16 @@ HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail) isAvail = false; return S_FALSE; } - RINOK(Seek(offset)); } + RINOK(Seek_SavePos(offset)); + + /* + // we can check buf mode + InitBuf(); + _inBufMode = true; + Buffer.AllocAtLeast(1 << 10); + */ CItemEx localItem; if (ReadUInt32() != NSignature::kLocalFileHeader) @@ -900,6 +1266,11 @@ HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail) return S_FALSE; item.LocalFullHeaderSize = localItem.LocalFullHeaderSize; item.LocalExtra = localItem.LocalExtra; + if (item.Crc != localItem.Crc && !localItem.HasDescriptor()) + { + item.Crc = localItem.Crc; + headersError = true; + } item.FromLocal = true; } catch(...) { return S_FALSE; } @@ -907,50 +1278,199 @@ HRESULT CInArchive::ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail) } -HRESULT CInArchive::ReadLocalItemDescriptor(CItemEx &item) +/* +---------- FindDescriptor ---------- + +in: + _streamPos : position in Stream + Stream : + Vols : if (IsMultiVol) + +action: + searches descriptor in input stream(s). + sets + item.DescriptorWasRead = true; + item.Size + item.PackSize + item.Crc + if descriptor was found + +out: + S_OK: + if ( item.DescriptorWasRead) : if descriptor was found + if (!item.DescriptorWasRead) : if descriptor was not found : unexpected end of stream(s) + + S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive. + + another error code: Callback error. + +exceptions : + CSystemException() : stream reading error +*/ + +HRESULT CInArchive::FindDescriptor(CItemEx &item, unsigned numFiles) { - const unsigned kBufSize = (1 << 12); - Byte buf[kBufSize]; + // const size_t kBufSize = (size_t)1 << 5; // don't increase it too much. It reads data look ahead. + + // Buffer.Alloc(kBufSize); + // Byte *buf = Buffer; - UInt32 numBytesInBuffer = 0; - UInt32 packedSize = 0; + UInt64 packedSize = 0; + UInt64 progressPrev = _cnt; + for (;;) { - UInt32 processedSize; - RINOK(ReadBytes(buf + numBytesInBuffer, kBufSize - numBytesInBuffer, &processedSize)); - numBytesInBuffer += processedSize; - if (numBytesInBuffer < kDataDescriptorSize) - return S_FALSE; + /* appnote specification claims that we must use 64-bit descriptor, if there is zip64 extra. + But some old third-party xps archives used 64-bit descriptor without zip64 extra. */ + // unsigned descriptorSize = kDataDescriptorSize64 + kNextSignatureSize; + + // const unsigned kNextSignatureSize = 0; // we can disable check for next signatuire + const unsigned kNextSignatureSize = 4; // we check also for signature for next File headear + + const unsigned descriptorSize4 = item.GetDescriptorSize() + kNextSignatureSize; + + if (descriptorSize4 > Buffer.Size()) return E_FAIL; + + // size_t processedSize; + CanStartNewVol = true; + RINOK(LookAhead(descriptorSize4)); + const size_t avail = GetAvail(); + + if (avail < descriptorSize4) + { + // we write to packSize all these available bytes. + // later it's simpler to work with such value than with 0 + if (item.PackSize == 0) + item.PackSize = packedSize + avail; + return S_OK; + } + + const Byte * const pStart = Buffer + _bufPos; + const Byte * p = pStart; + const Byte * const limit = pStart + (avail - descriptorSize4); - UInt32 i; - for (i = 0; i <= numBytesInBuffer - kDataDescriptorSize; i++) + for (; p <= limit; p++) { // descriptor signature field is Info-ZIP's extension to pkware Zip specification. // New ZIP specification also allows descriptorSignature. - if (buf[i] != 0x50) + + p = FindPK(p, limit + 1); + if (p > limit) + break; + + /* + if (*p != 0x50) + continue; + */ + + if (Get32(p) != NSignature::kDataDescriptor) continue; - // !!!! It must be fixed for Zip64 archives - if (Get32(buf + i) == NSignature::kDataDescriptor) + + // we check next signatuire after descriptor + // maybe we need check only 2 bytes "PK" instead of 4 bytes, if some another type of header is possible after descriptor + const UInt32 sig = Get32(p + descriptorSize4 - kNextSignatureSize); + if ( sig != NSignature::kLocalFileHeader + && sig != NSignature::kCentralFileHeader) + continue; + + const UInt64 packSizeCur = packedSize + (p - pStart); + if (descriptorSize4 == kDataDescriptorSize64 + kNextSignatureSize) // if (item.LocalExtra.IsZip64) { - UInt32 descriptorPackSize = Get32(buf + i + 8); - if (descriptorPackSize == packedSize + i) - { - item.Crc = Get32(buf + i + 4); - item.PackSize = descriptorPackSize; - item.Size = Get32(buf + i + 12); - bool isFinished; - return IncreaseRealPosition((Int64)(Int32)(0 - (numBytesInBuffer - i - kDataDescriptorSize)), isFinished); - } + const UInt64 descriptorPackSize = Get64(p + 8); + if (descriptorPackSize != packSizeCur) + continue; + item.Size = Get64(p + 16); + } + else + { + const UInt32 descriptorPackSize = Get32(p + 8); + if (descriptorPackSize != (UInt32)packSizeCur) + continue; + item.Size = Get32(p + 12); + // that item.Size can be truncated to 32-bit value here } + // We write calculated 64-bit packSize, even if descriptor64 was not used + item.PackSize = packSizeCur; + + item.DescriptorWasRead = true; + item.Crc = Get32(p + 4); + + const size_t skip = (p - pStart) + descriptorSize4 - kNextSignatureSize; + + SkipLookahed(skip); + + return S_OK; } - packedSize += i; - unsigned j; - for (j = 0; i < numBytesInBuffer; i++, j++) - buf[j] = buf[i]; - numBytesInBuffer = j; + const size_t skip = (p - pStart); + SkipLookahed(skip); + + packedSize += skip; + + if (Callback) + if (_cnt - progressPrev >= ((UInt32)1 << 22)) + { + progressPrev = _cnt; + const UInt64 numFiles64 = numFiles; + RINOK(Callback->SetCompleted(&numFiles64, &_cnt)); + } + } +} + + +HRESULT CInArchive::CheckDescriptor(const CItemEx &item) +{ + if (!item.HasDescriptor()) + return S_OK; + + // pkzip's version without descriptor signature is not supported + + bool isFinished = false; + RINOK(IncreaseRealPosition(item.PackSize, isFinished)); + if (isFinished) + return S_FALSE; + + /* + if (!IsMultiVol) + { + RINOK(Seek_SavePos(ArcInfo.Base + item.GetDataPosition() + item.PackSize)); + } + */ + + Byte buf[kDataDescriptorSize64]; + try + { + CanStartNewVol = true; + SafeRead(buf, item.GetDescriptorSize()); + } + catch (const CSystemException &e) { return e.ErrorCode; } + // catch (const CUnexpectEnd &) + catch(...) + { + return S_FALSE; + } + // RINOK(ReadStream_FALSE(Stream, buf, item.GetDescriptorSize())); + + if (Get32(buf) != NSignature::kDataDescriptor) + return S_FALSE; + UInt32 crc = Get32(buf + 4); + UInt64 packSize, unpackSize; + + if (item.LocalExtra.IsZip64) + { + packSize = Get64(buf + 8); + unpackSize = Get64(buf + 16); + } + else + { + packSize = Get32(buf + 8); + unpackSize = Get32(buf + 12); } + + if (crc != item.Crc || item.PackSize != packSize || item.Size != unpackSize) + return S_FALSE; + return S_OK; } @@ -961,32 +1481,12 @@ HRESULT CInArchive::ReadLocalItemAfterCdItemFull(CItemEx &item) try { bool isAvail = true; - RINOK(ReadLocalItemAfterCdItem(item, isAvail)); + bool headersError = false; + RINOK(ReadLocalItemAfterCdItem(item, isAvail, headersError)); + if (headersError) + return S_FALSE; if (item.HasDescriptor()) - { - // pkzip's version without descriptor is not supported - RINOK(Seek(ArcInfo.Base + item.GetDataPosition() + item.PackSize)); - if (ReadUInt32() != NSignature::kDataDescriptor) - return S_FALSE; - UInt32 crc = ReadUInt32(); - UInt64 packSize, unpackSize; - - /* - if (IsZip64) - { - packSize = ReadUInt64(); - unpackSize = ReadUInt64(); - } - else - */ - { - packSize = ReadUInt32(); - unpackSize = ReadUInt32(); - } - - if (crc != item.Crc || item.PackSize != packSize || item.Size != unpackSize) - return S_FALSE; - } + return CheckDescriptor(item); } catch(...) { return S_FALSE; } return S_OK; @@ -997,7 +1497,7 @@ HRESULT CInArchive::ReadCdItem(CItemEx &item) { item.FromCentral = true; Byte p[kCentralHeaderSize - 4]; - SafeReadBytes(p, kCentralHeaderSize - 4); + SafeRead(p, kCentralHeaderSize - 4); item.MadeByVersion.Version = p[0]; item.MadeByVersion.HostOS = p[1]; @@ -1036,15 +1536,19 @@ HRESULT CInArchive::TryEcd64(UInt64 offset, CCdInfo &cdInfo) { if (offset >= ((UInt64)1 << 63)) return S_FALSE; - RINOK(Seek(offset)); Byte buf[kEcd64_FullSize]; - RINOK(ReadStream_FALSE(Stream, buf, kEcd64_FullSize)); + RINOK(SeekToVol(Vols.StreamIndex, offset)); + unsigned processed = 0; + ReadFromCache(buf, kEcd64_FullSize, processed); + + if (processed != kEcd64_FullSize) + return S_FALSE; if (Get32(buf) != NSignature::kEcd64) return S_FALSE; UInt64 mainSize = Get64(buf + 4); - if (mainSize < kEcd64_MainSize || mainSize > ((UInt64)1 << 32)) + if (mainSize < kEcd64_MainSize || mainSize > ((UInt64)1 << 40)) return S_FALSE; cdInfo.ParseEcd64e(buf + 12); return S_OK; @@ -1057,27 +1561,49 @@ HRESULT CInArchive::FindCd(bool checkOffsetMode) UInt64 endPos; + // There are no useful data in cache in most cases here. + // So here we don't use cache data from previous operations . + + InitBuf(); RINOK(Stream->Seek(0, STREAM_SEEK_END, &endPos)); + _streamPos = endPos; + + // const UInt32 kBufSizeMax2 = ((UInt32)1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize; + const size_t kBufSizeMax = ((size_t)1 << 17); // must be larger than kBufSizeMax2 - const UInt32 kBufSizeMax = ((UInt32)1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize; - const UInt32 bufSize = (endPos < kBufSizeMax) ? (UInt32)endPos : kBufSizeMax; + const size_t bufSize = (endPos < kBufSizeMax) ? (size_t)endPos : kBufSizeMax; if (bufSize < kEcdSize) return S_FALSE; - CByteArr byteBuffer(bufSize); + // CByteArr byteBuffer(bufSize); - const UInt64 startPos = endPos - bufSize; - RINOK(Stream->Seek(startPos, STREAM_SEEK_SET, &m_Position)); - if (m_Position != startPos) + if (Buffer.Size() < kBufSizeMax) + { + // InitBuf(); + Buffer.AllocAtLeast(kBufSizeMax); + if (!Buffer.IsAllocated()) + return E_OUTOFMEMORY; + } + + RINOK(Seek_SavePos(endPos - bufSize)); + + size_t processed = bufSize; + HRESULT res = ReadStream(Stream, Buffer, &processed); + _streamPos += processed; + _bufCached = processed; + _bufPos = 0; + _cnt += processed; + if (res != S_OK) + return res; + if (processed != bufSize) return S_FALSE; + - RINOK(ReadStream_FALSE(Stream, byteBuffer, bufSize)); - - for (UInt32 i = bufSize - kEcdSize + 1;;) + for (size_t i = bufSize - kEcdSize + 1;;) { if (i == 0) return S_FALSE; - const Byte *buf = byteBuffer; + const Byte *buf = Buffer; for (;;) { @@ -1095,24 +1621,26 @@ HRESULT CInArchive::FindCd(bool checkOffsetMode) if (i >= kEcd64Locator_Size) { - const Byte *locatorPtr = buf + i - kEcd64Locator_Size; - if (Get32(locatorPtr) == NSignature::kEcd64Locator) + const size_t locatorIndex = i - kEcd64Locator_Size; + if (Get32(buf + locatorIndex) == NSignature::kEcd64Locator) { CLocator locator; - locator.Parse(locatorPtr + 4); - if ((cdInfo.ThisDisk == locator.NumDisks - 1 || cdInfo.ThisDisk == 0xFFFF) + locator.Parse(buf + locatorIndex + 4); + if ((cdInfo.ThisDisk == locator.NumDisks - 1 || ZIP64_IS_16_MAX(cdInfo.ThisDisk)) && locator.Ecd64Disk < locator.NumDisks) { - if (locator.Ecd64Disk != cdInfo.ThisDisk && cdInfo.ThisDisk != 0xFFFF) + if (locator.Ecd64Disk != cdInfo.ThisDisk && !ZIP64_IS_16_MAX(cdInfo.ThisDisk)) return E_NOTIMPL; // Most of the zip64 use fixed size Zip64 ECD // we try relative backward reading. UInt64 absEcd64 = endPos - bufSize + i - (kEcd64Locator_Size + kEcd64_FullSize); + + if (locatorIndex >= kEcd64_FullSize) if (checkOffsetMode || absEcd64 == locator.Ecd64Offset) { - const Byte *ecd64 = locatorPtr - kEcd64_FullSize; + const Byte *ecd64 = buf + locatorIndex - kEcd64_FullSize; if (Get32(ecd64) == NSignature::kEcd64) { UInt64 mainEcd64Size = Get64(ecd64 + 4); @@ -1193,42 +1721,25 @@ HRESULT CInArchive::TryReadCd(CObjectVector &items, const CCdInfo &cdIn { items.Clear(); - ISequentialInStream *stream; - - if (!IsMultiVol) - { - stream = this->StartStream; - Vols.StreamIndex = -1; - RINOK(this->StartStream->Seek(cdOffset, STREAM_SEEK_SET, &m_Position)); - if (m_Position != cdOffset) - return S_FALSE; - } - else - { - if (cdInfo.CdDisk >= Vols.Streams.Size()) - return S_FALSE; - IInStream *str2 = Vols.Streams[cdInfo.CdDisk].Stream; - if (!str2) - return S_FALSE; - RINOK(str2->Seek(cdOffset, STREAM_SEEK_SET, NULL)); - stream = str2; - Vols.NeedSeek = false; - Vols.StreamIndex = cdInfo.CdDisk; - m_Position = cdOffset; - } - - _inBuffer.SetStream(stream); + RINOK(SeekToVol(IsMultiVol ? cdInfo.CdDisk : -1, cdOffset)); - _inBuffer.Init(); _inBufMode = true; + _cnt = 0; - _processedCnt = 0; + if (Callback) + { + RINOK(Callback->SetTotal(&cdInfo.NumEntries, IsMultiVol ? &Vols.TotalBytesSize : NULL)); + } + UInt64 numFileExpected = cdInfo.NumEntries; + const UInt64 *totalFilesPtr = &numFileExpected; + bool isCorrect_NumEntries = (cdInfo.IsFromEcd64 || numFileExpected >= ((UInt32)1 << 16)); - while (_processedCnt < cdSize) + while (_cnt < cdSize) { CanStartNewVol = true; if (ReadUInt32() != NSignature::kCentralFileHeader) return S_FALSE; + CanStartNewVol = false; { CItemEx cdItem; RINOK(ReadCdItem(cdItem)); @@ -1237,13 +1748,24 @@ HRESULT CInArchive::TryReadCd(CObjectVector &items, const CCdInfo &cdIn if (Callback && (items.Size() & 0xFFF) == 0) { const UInt64 numFiles = items.Size(); - RINOK(Callback->SetCompleted(&numFiles, NULL)); + + if (numFiles > numFileExpected && totalFilesPtr) + { + if (isCorrect_NumEntries) + totalFilesPtr = NULL; + else + while (numFiles > numFileExpected) + numFileExpected += (UInt32)1 << 16; + RINOK(Callback->SetTotal(totalFilesPtr, NULL)); + } + + RINOK(Callback->SetCompleted(&numFiles, &_cnt)); } } CanStartNewVol = true; - return (_processedCnt == cdSize) ? S_OK : S_FALSE; + return (_cnt == cdSize) ? S_OK : S_FALSE; } @@ -1275,11 +1797,12 @@ HRESULT CInArchive::ReadCd(CObjectVector &items, UInt32 &cdDisk, UInt64 cdOffset = cdInfo.Offset; cdDisk = cdInfo.CdDisk; - if (Callback) + if (!IsMultiVol) { - RINOK(Callback->SetTotal(&cdInfo.NumEntries, NULL)); + if (cdInfo.ThisDisk != cdInfo.CdDisk) + return S_FALSE; } - + const UInt64 base = (IsMultiVol ? 0 : ArcInfo.Base); res = TryReadCd(items, cdInfo, base + cdOffset, cdSize); @@ -1323,47 +1846,83 @@ static bool IsStrangeItem(const CItem &item) } + +/* + ---------- ReadLocals ---------- + +in: + (_signature == NSignature::kLocalFileHeader) + VirtStreamPos : after _signature : position in Stream + Stream : + Vols : if (IsMultiVol) + (_inBufMode == false) + +action: + it parses local items. + + if ( IsMultiVol) it writes absolute offsets to CItemEx::LocalHeaderPos + if (!IsMultiVol) it writes relative (from ArcInfo.Base) offsets to CItemEx::LocalHeaderPos + later we can correct CItemEx::LocalHeaderPos values, if + some new value for ArcInfo.Base will be detected +out: + S_OK: + (_signature != NSignature::kLocalFileHeade) + _streamPos : after _signature + + S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive. + + another error code: stream reading error or Callback error. + + CUnexpectEnd() exception : it's not fatal exception here. + It means that reading was interrupted by unexpected end of input stream, + but some CItemEx items were parsed OK. + We can stop further archive parsing. + But we can use all filled CItemEx items. +*/ + HRESULT CInArchive::ReadLocals(CObjectVector &items) { items.Clear(); + + UInt64 progressPrev = _cnt; - while (m_Signature == NSignature::kLocalFileHeader) + if (Callback) + { + RINOK(Callback->SetTotal(NULL, IsMultiVol ? &Vols.TotalBytesSize : NULL)); + } + + while (_signature == NSignature::kLocalFileHeader) { CItemEx item; - item.LocalHeaderPos = m_Position - 4; - if (!IsMultiVol) - item.LocalHeaderPos -= ArcInfo.MarkerPos; - // we write ralative LocalHeaderPos here. Later we can correct it to real Base. + item.LocalHeaderPos = GetVirtStreamPos() - 4; + if (!IsMultiVol) + item.LocalHeaderPos -= ArcInfo.Base; try { ReadLocalItem(item); item.FromLocal = true; bool isFinished = false; - + if (item.HasDescriptor()) - ReadLocalItemDescriptor(item); + { + RINOK(FindDescriptor(item, items.Size())); + isFinished = !item.DescriptorWasRead; + } else { - /* - if (IsMultiVol) - { - const int kStep = 10000; - RINOK(IncreaseRealPosition(-kStep, isFinished)); - RINOK(IncreaseRealPosition(item.PackSize + kStep, isFinished)); - } - else - */ + if (item.PackSize >= ((UInt64)1 << 62)) + throw CUnexpectEnd(); RINOK(IncreaseRealPosition(item.PackSize, isFinished)); } - + items.Add(item); if (isFinished) throw CUnexpectEnd(); - - m_Signature = ReadUInt32(); + + ReadSignature(); } catch (CUnexpectEnd &) { @@ -1372,17 +1931,18 @@ HRESULT CInArchive::ReadLocals(CObjectVector &items) throw; } - if (Callback && (items.Size() & 0xFF) == 0) + + if (Callback) + if ((items.Size() & 0xFF) == 0 + || _cnt - progressPrev >= ((UInt32)1 << 22)) { + progressPrev = _cnt; const UInt64 numFiles = items.Size(); - UInt64 numBytes = 0; - // if (!sMultiVol) - numBytes = item.LocalHeaderPos; - RINOK(Callback->SetCompleted(&numFiles, &numBytes)); + RINOK(Callback->SetCompleted(&numFiles, &_cnt)); } } - if (items.Size() == 1 && m_Signature != NSignature::kCentralFileHeader) + if (items.Size() == 1 && _signature != NSignature::kCentralFileHeader) if (IsStrangeItem(items[0])) return S_FALSE; @@ -1402,26 +1962,22 @@ HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback) name = prop.bstrVal; } - UString base = name; int dotPos = name.ReverseFind_Dot(); - if (dotPos < 0) return S_OK; - - base.DeleteFrom(dotPos + 1); - const UString ext = name.Ptr(dotPos + 1); + name.DeleteFrom(dotPos + 1); + StartVolIndex = (Int32)(-1); if (ext.IsEmpty()) return S_OK; - else { wchar_t c = ext[0]; IsUpperCase = (c >= 'A' && c <= 'Z'); if (ext.IsEqualTo_Ascii_NoCase("zip")) { - BaseName = base; + BaseName = name; StartIsZ = true; StartIsZip = true; return S_OK; @@ -1429,8 +1985,13 @@ HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback) else if (ext.IsEqualTo_Ascii_NoCase("exe")) { StartIsExe = true; - BaseName = base; + BaseName = name; StartVolIndex = 0; + /* sfx-zip can use both arc.exe and arc.zip + We can open arc.zip, if it was requesed to open arc.exe. + But it's possible that arc.exe and arc.zip are not parts of same archive. + So we can disable such operation */ + return S_FALSE; // don't open arc.zip instead of arc.exe } else if (ext[0] == 'z' || ext[0] == 'Z') { @@ -1441,7 +2002,7 @@ HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback) if (*end != 0 || volNum < 1 || volNum > ((UInt32)1 << 30)) return S_OK; StartVolIndex = volNum - 1; - BaseName = base; + BaseName = name; StartIsZ = true; } else @@ -1449,9 +2010,11 @@ HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback) } UString volName = BaseName; - volName.AddAscii(IsUpperCase ? "ZIP" : "zip"); - HRESULT result = volCallback->GetStream(volName, &ZipStream); - if (result == S_FALSE || !ZipStream) + volName += (IsUpperCase ? "ZIP" : "zip"); + + HRESULT res = volCallback->GetStream(volName, &ZipStream); + + if (res == S_FALSE || !ZipStream) { if (MissingName.IsEmpty()) { @@ -1461,7 +2024,7 @@ HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback) return S_OK; } - return result; + return res; } @@ -1493,24 +2056,30 @@ HRESULT CInArchive::ReadVols2(IArchiveOpenVolumeCallback *volCallback, { UString volName = Vols.BaseName; { - volName += (wchar_t)(Vols.IsUpperCase ? 'Z' : 'z'); + volName += (char)(Vols.IsUpperCase ? 'Z' : 'z'); + unsigned v = i + 1; + if (v < 10) + volName += '0'; + volName.Add_UInt32(v); + } + + HRESULT res = volCallback->GetStream(volName, &stream); + if (res != S_OK && res != S_FALSE) + return res; + if (res == S_FALSE || !stream) + { + if (i == 0) { - char s[32]; - ConvertUInt32ToString(i + 1, s); - unsigned len = (unsigned)strlen(s); - while (len < 2) - { - volName += (wchar_t)'0'; - len++; - } - volName.AddAscii(s); + UString volName_exe = Vols.BaseName; + volName_exe += (Vols.IsUpperCase ? "EXE" : "exe"); + + HRESULT res2 = volCallback->GetStream(volName_exe, &stream); + if (res2 != S_OK && res2 != S_FALSE) + return res2; + res = res2; } } - - HRESULT result = volCallback->GetStream(volName, &stream); - if (result != S_OK && result != S_FALSE) - return result; - if (result == S_FALSE || !stream) + if (res == S_FALSE || !stream) { if (Vols.MissingName.IsEmpty()) Vols.MissingName = volName; @@ -1524,7 +2093,6 @@ HRESULT CInArchive::ReadVols2(IArchiveOpenVolumeCallback *volCallback, } UInt64 size; - UInt64 pos; RINOK(stream->Seek(0, STREAM_SEEK_CUR, &pos)); RINOK(stream->Seek(0, STREAM_SEEK_END, &size)); @@ -1535,6 +2103,8 @@ HRESULT CInArchive::ReadVols2(IArchiveOpenVolumeCallback *volCallback, CVols::CSubStreamInfo &ss = Vols.Streams[i]; Vols.NumVols++; + Vols.TotalBytesSize += size; + ss.Stream = stream; ss.Size = size; @@ -1559,11 +2129,11 @@ HRESULT CInArchive::ReadVols() RINOK(Vols.ParseArcName(volCallback)); - int startZIndex = Vols.StartVolIndex; + // const int startZIndex = Vols.StartVolIndex; if (!Vols.StartIsZ) { - // if (!Vols.StartIsExe) + if (!Vols.StartIsExe) return S_OK; } @@ -1573,35 +2143,46 @@ HRESULT CInArchive::ReadVols() if (Vols.StartIsZip) Vols.ZipStream = StartStream; - // bool cdOK = false; - if (Vols.ZipStream) { Stream = Vols.ZipStream; + + if (Vols.StartIsZip) + Vols.StreamIndex = -1; + else + { + Vols.StreamIndex = -2; + InitBuf(); + } + HRESULT res = FindCd(true); + CCdInfo &ecd = Vols.ecd; if (res == S_OK) { zipDisk = ecd.ThisDisk; Vols.ecd_wasRead = true; + + // if is not multivol or bad multivol, we return to main single stream code if (ecd.ThisDisk == 0 || ecd.ThisDisk >= ((UInt32)1 << 30) || ecd.ThisDisk < ecd.CdDisk) return S_OK; + cdDisk = ecd.CdDisk; if (Vols.StartVolIndex < 0) Vols.StartVolIndex = ecd.ThisDisk; + else if ((UInt32)Vols.StartVolIndex >= ecd.ThisDisk) + return S_OK; + // Vols.StartVolIndex = ecd.ThisDisk; // Vols.EndVolIndex = ecd.ThisDisk; unsigned numMissingVols; - if (cdDisk == zipDisk) - { - // cdOK = true; - } - else + if (cdDisk != zipDisk) { + // get volumes required for cd. RINOK(ReadVols2(volCallback, cdDisk, zipDisk, zipDisk, 0, numMissingVols)); - if (numMissingVols == 0) + if (numMissingVols != 0) { // cdOK = false; } @@ -1611,25 +2192,50 @@ HRESULT CInArchive::ReadVols() return res; } - if (Vols.Streams.Size() > 0) - IsMultiVol = true; - if (Vols.StartVolIndex < 0) + { + // is not mutivol; return S_OK; + } + /* + if (!Vols.Streams.IsEmpty()) + IsMultiVol = true; + */ + unsigned numMissingVols; if (cdDisk != 0) { - RINOK(ReadVols2(volCallback, 0, cdDisk < 0 ? -1 : cdDisk, zipDisk, 1 << 10, numMissingVols)); + // get volumes that were no requested still + const unsigned kNumMissingVolsMax = 1 << 12; + RINOK(ReadVols2(volCallback, 0, cdDisk < 0 ? -1 : cdDisk, zipDisk, kNumMissingVolsMax, numMissingVols)); + } + + // if (Vols.StartVolIndex >= 0) + { + if (Vols.Streams.IsEmpty()) + if (Vols.StartVolIndex > (1 << 20)) + return S_OK; + if ((unsigned)Vols.StartVolIndex >= Vols.Streams.Size() + || !Vols.Streams[Vols.StartVolIndex].Stream) + { + // we get volumes starting from StartVolIndex, if they we not requested before know the volume index (if FindCd() was ok) + RINOK(ReadVols2(volCallback, Vols.StartVolIndex, zipDisk, zipDisk, 0, numMissingVols)); + } } if (Vols.ZipStream) { + // if there is no another volumes and volumeIndex is too big, we don't use multivol mode if (Vols.Streams.IsEmpty()) if (zipDisk > (1 << 10)) return S_OK; - RINOK(ReadVols2(volCallback, zipDisk, zipDisk + 1, zipDisk, 0, numMissingVols)); + if (zipDisk >= 0) + { + // we create item in Streams for ZipStream, if we know the volume index (if FindCd() was ok) + RINOK(ReadVols2(volCallback, zipDisk, zipDisk + 1, zipDisk, 0, numMissingVols)); + } } if (!Vols.Streams.IsEmpty()) @@ -1639,11 +2245,14 @@ HRESULT CInArchive::ReadVols() if (cdDisk) IsMultiVol = true; */ + const int startZIndex = Vols.StartVolIndex; if (startZIndex >= 0) { - if (Vols.Streams.Size() >= (unsigned)startZIndex) + // if all volumes before start volume are OK, we can start parsing from 0 + // if there are missing volumes before startZIndex, we start parsing in current startZIndex + if ((unsigned)startZIndex < Vols.Streams.Size()) { - for (unsigned i = 0; i < (unsigned)startZIndex; i++) + for (unsigned i = 0; i <= (unsigned)startZIndex; i++) if (!Vols.Streams[i].Stream) { Vols.StartParsingVol = startZIndex; @@ -1658,10 +2267,6 @@ HRESULT CInArchive::ReadVols() - - - - HRESULT CVols::Read(void *data, UInt32 size, UInt32 *processedSize) { if (processedSize) @@ -1680,7 +2285,7 @@ HRESULT CVols::Read(void *data, UInt32 size, UInt32 *processedSize) return S_FALSE; if (NeedSeek) { - RINOK(s.Stream->Seek(0, STREAM_SEEK_SET, NULL)); + RINOK(s.SeekToStart()); NeedSeek = false; } UInt32 realProcessedSize = 0; @@ -1704,47 +2309,112 @@ STDMETHODIMP CVolStream::Read(void *data, UInt32 size, UInt32 *processedSize) -#define COPY_ECD_ITEM_16(n) if (!isZip64 || ecd. n != 0xFFFF) ecd64. n = ecd. n; -#define COPY_ECD_ITEM_32(n) if (!isZip64 || ecd. n != 0xFFFFFFFF) ecd64. n = ecd. n; +#define COPY_ECD_ITEM_16(n) if (!isZip64 || !ZIP64_IS_16_MAX(ecd. n)) cdInfo. n = ecd. n; +#define COPY_ECD_ITEM_32(n) if (!isZip64 || !ZIP64_IS_32_MAX(ecd. n)) cdInfo. n = ecd. n; -HRESULT CInArchive::ReadHeaders2(CObjectVector &items) +HRESULT CInArchive::ReadHeaders(CObjectVector &items) { + if (Buffer.Size() < kSeqBufferSize) + { + InitBuf(); + Buffer.AllocAtLeast(kSeqBufferSize); + if (!Buffer.IsAllocated()) + return E_OUTOFMEMORY; + } + + _inBufMode = false; + HRESULT res = S_OK; bool localsWereRead = false; - UInt64 cdSize = 0, cdRelatOffset = 0, cdAbsOffset = 0; + + /* we try to open archive with the following modes: + 1) CD-MODE : fast mode : we read backward ECD and CD, compare CD items with first Local item. + 2) LOCALS-CD-MODE : slow mode, if CD-MODE fails : we sequentially read all Locals and then CD. + Then we read sequentially ECD64, Locator, ECD again at the end. + + - in LOCALS-CD-MODE we use use the following + variables (with real cd properties) to set Base archive offset + and check real cd properties with values from ECD/ECD64. + */ + + UInt64 cdSize = 0; + UInt64 cdRelatOffset = 0; UInt32 cdDisk = 0; - if (!_inBuffer.Create(1 << 15)) - return E_OUTOFMEMORY; + UInt64 cdAbsOffset = 0; // absolute cd offset, for LOCALS-CD-MODE only. - if (!MarkerIsFound) + if (!MarkerIsFound || !MarkerIsSafe) { IsArc = true; res = ReadCd(items, cdDisk, cdRelatOffset, cdSize); if (res == S_OK) - m_Signature = ReadUInt32(); + ReadSignature(); + else if (res != S_FALSE) + return res; } else { - // m_Signature must be kLocalFileHeader or kEcd - // m_Position points to next byte after signature - RINOK(Stream->Seek(m_Position, STREAM_SEEK_SET, NULL)); + // _signature must be kLocalFileHeader or kEcd or kEcd64 + + SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2 + 4); + + CanStartNewVol = false; + + if (_signature == NSignature::kEcd64) + { + // UInt64 ecd64Offset = GetVirtStreamPos() - 4; + IsZip64 = true; + + { + const UInt64 recordSize = ReadUInt64(); + if (recordSize < kEcd64_MainSize) + return S_FALSE; + if (recordSize >= ((UInt64)1 << 62)) + return S_FALSE; + + { + const unsigned kBufSize = kEcd64_MainSize; + Byte buf[kBufSize]; + SafeRead(buf, kBufSize); + CCdInfo cdInfo; + cdInfo.ParseEcd64e(buf); + if (!cdInfo.IsEmptyArc()) + return S_FALSE; + } + + RINOK(Skip64(recordSize - kEcd64_MainSize, 0)); + } - _inBuffer.SetStream(Stream); + ReadSignature(); + if (_signature != NSignature::kEcd64Locator) + return S_FALSE; - bool needReadCd = true; + { + const unsigned kBufSize = 16; + Byte buf[kBufSize]; + SafeRead(buf, kBufSize); + CLocator locator; + locator.Parse(buf); + if (!locator.IsEmptyArc()) + return S_FALSE; + } - if (m_Signature == NSignature::kEcd) + ReadSignature(); + if (_signature != NSignature::kEcd) + return S_FALSE; + } + + if (_signature == NSignature::kEcd) { // It must be empty archive or backware archive // we don't support backware archive still const unsigned kBufSize = kEcdSize - 4; Byte buf[kBufSize]; - SafeReadBytes(buf, kBufSize); + SafeRead(buf, kBufSize); CEcd ecd; ecd.Parse(buf); // if (ecd.cdSize != 0) @@ -1753,15 +2423,15 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) return S_FALSE; ArcInfo.Base = ArcInfo.MarkerPos; - needReadCd = false; IsArc = true; // check it: we need more tests? - RINOK(Stream->Seek(ArcInfo.MarkerPos2 + 4, STREAM_SEEK_SET, &m_Position)); - } - if (needReadCd) + RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2)); + ReadSignature(); + } + else { CItemEx firstItem; - // try + try { try { @@ -1776,9 +2446,10 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) IsArc = true; res = ReadCd(items, cdDisk, cdRelatOffset, cdSize); if (res == S_OK) - m_Signature = ReadUInt32(); + ReadSignature(); } - // catch() { res = S_FALSE; } + catch(CUnexpectEnd &) { res = S_FALSE; } + if (res != S_FALSE && res != S_OK) return res; @@ -1812,52 +2483,93 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) CObjectVector cdItems; - bool needSetBase = false; + bool needSetBase = false; // we set needSetBase only for LOCALS_CD_MODE unsigned numCdItems = items.Size(); - if (res == S_FALSE) + #ifdef ZIP_SELF_CHECK + res = S_FALSE; // if uncommented, it uses additional LOCALS-CD-MODE mode to check the code + #endif + + if (res != S_OK) { + // ---------- LOCALS-CD-MODE ---------- // CD doesn't match firstItem, - // so we clear items and read Locals. + // so we clear items and read Locals and CD. + items.Clear(); localsWereRead = true; + + // we can use any mode: with buffer and without buffer + // without buffer : skips packed data : fast for big files : slow for small files + // with buffer : reads packed data : slow for big files : fast for small files + _inBufMode = false; - ArcInfo.Base = ArcInfo.MarkerPos; + // _inBufMode = true; - if (IsMultiVol) + InitBuf(); + + ArcInfo.Base = 0; + + if (!MarkerIsFound) { - Vols.StreamIndex = Vols.StartParsingVol; - if (Vols.StartParsingVol >= (int)Vols.Streams.Size()) + if (!IsMultiVol) return S_FALSE; - Stream = Vols.Streams[Vols.StartParsingVol].Stream; - if (!Stream) + if (Vols.StartParsingVol != 0) return S_FALSE; + // if (StartParsingVol == 0) and we didn't find marker, we use default zero marker. + // so we suppose that there is no sfx stub + RINOK(SeekToVol(0, ArcInfo.MarkerPos2)); + } + else + { + if (ArcInfo.MarkerPos != 0) + { + /* + If multi-vol or there is (No)Span-marker at start of stream, we set (Base) as 0. + In another caes: + (No)Span-marker is supposed as false positive. So we set (Base) as main marker (MarkerPos2). + The (Base) can be corrected later after ECD reading. + But sfx volume with stub and (No)Span-marker in (!IsMultiVol) mode will have incorrect (Base) here. + */ + ArcInfo.Base = ArcInfo.MarkerPos2; + } + + RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2)); } - RINOK(Stream->Seek(ArcInfo.MarkerPos2, STREAM_SEEK_SET, &m_Position)); - m_Signature = ReadUInt32(); + _cnt = 0; + + ReadSignature(); + LocalsWereRead = true; + RINOK(ReadLocals(items)); - if (m_Signature != NSignature::kCentralFileHeader) + if (_signature != NSignature::kCentralFileHeader) { - // if (!UnexpectedEnd) - m_Position -= 4; + // GetVirtStreamPos() - 4 + if (items.IsEmpty()) + return S_FALSE; NoCentralDir = true; HeadersError = true; return S_OK; } _inBufMode = true; - _inBuffer.Init(); - - cdAbsOffset = m_Position - 4; + + cdAbsOffset = GetVirtStreamPos() - 4; cdDisk = Vols.StreamIndex; + #ifdef ZIP_SELF_CHECK + if (!IsMultiVol && _cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2) + return E_FAIL; + #endif + + const UInt64 processedCnt_start = _cnt; + for (;;) { CItemEx cdItem; - CanStartNewVol = true; RINOK(ReadCdItem(cdItem)); @@ -1865,17 +2577,29 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) if (Callback && (cdItems.Size() & 0xFFF) == 0) { const UInt64 numFiles = items.Size(); - RINOK(Callback->SetCompleted(&numFiles, NULL)); + const UInt64 numBytes = _cnt; + RINOK(Callback->SetCompleted(&numFiles, &numBytes)); } - CanStartNewVol = true; - m_Signature = ReadUInt32(); - if (m_Signature != NSignature::kCentralFileHeader) + ReadSignature(); + if (_signature != NSignature::kCentralFileHeader) break; } - cdSize = (m_Position - 4) - cdAbsOffset; + cdSize = _cnt - processedCnt_start; + + #ifdef ZIP_SELF_CHECK + if (!IsMultiVol) + { + if (_cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2) + return E_FAIL; + if (cdSize != (GetVirtStreamPos() - 4) - cdAbsOffset) + return E_FAIL; + } + #endif + needSetBase = true; numCdItems = cdItems.Size(); + cdRelatOffset = cdAbsOffset - ArcInfo.Base; if (!cdItems.IsEmpty()) { @@ -1886,13 +2610,13 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) - CCdInfo ecd64; + CCdInfo cdInfo; CLocator locator; bool isZip64 = false; - const UInt64 ecd64AbsOffset = m_Position - 4; + const UInt64 ecd64AbsOffset = GetVirtStreamPos() - 4; int ecd64Disk = -1; - if (m_Signature == NSignature::kEcd64) + if (_signature == NSignature::kEcd64) { ecd64Disk = Vols.StreamIndex; @@ -1900,26 +2624,27 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) { const UInt64 recordSize = ReadUInt64(); - if (recordSize < kEcd64_MainSize) + if (recordSize < kEcd64_MainSize + || recordSize >= ((UInt64)1 << 62)) { HeadersError = true; return S_OK; } - + { const unsigned kBufSize = kEcd64_MainSize; Byte buf[kBufSize]; - SafeReadBytes(buf, kBufSize); - ecd64.ParseEcd64e(buf); + SafeRead(buf, kBufSize); + cdInfo.ParseEcd64e(buf); } - Skip64(recordSize - kEcd64_MainSize); + RINOK(Skip64(recordSize - kEcd64_MainSize, items.Size())); } - m_Signature = ReadUInt32(); + ReadSignature(); - if (m_Signature != NSignature::kEcd64Locator) + if (_signature != NSignature::kEcd64Locator) { HeadersError = true; return S_OK; @@ -1928,28 +2653,30 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) { const unsigned kBufSize = 16; Byte buf[kBufSize]; - SafeReadBytes(buf, kBufSize); + SafeRead(buf, kBufSize); locator.Parse(buf); } - m_Signature = ReadUInt32(); + ReadSignature(); } - if (m_Signature != NSignature::kEcd) + if (_signature != NSignature::kEcd) { HeadersError = true; return S_OK; } + CanStartNewVol = false; + // ---------- ECD ---------- CEcd ecd; { const unsigned kBufSize = kEcdSize - 4; Byte buf[kBufSize]; - SafeReadBytes(buf, kBufSize); + SafeRead(buf, kBufSize); ecd.Parse(buf); } @@ -1960,34 +2687,103 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) COPY_ECD_ITEM_32(Size); COPY_ECD_ITEM_32(Offset); + bool cdOK = true; + + if ((UInt32)cdInfo.Size != (UInt32)cdSize) + { + // return S_FALSE; + cdOK = false; + } + + if (isZip64) + { + if (cdInfo.NumEntries != numCdItems + || cdInfo.Size != cdSize) + { + cdOK = false; + } + } + + if (IsMultiVol) { - if (cdDisk != (int)ecd64.CdDisk) + if (cdDisk != (int)cdInfo.CdDisk) HeadersError = true; } - else if (needSetBase) + else if (needSetBase && cdOK) { + const UInt64 oldBase = ArcInfo.Base; + // localsWereRead == true + // ArcInfo.Base == ArcInfo.MarkerPos2 + // cdRelatOffset == (cdAbsOffset - ArcInfo.Base) + if (isZip64) { if (ecd64Disk == Vols.StartVolIndex) { - ArcInfo.Base = ecd64AbsOffset - locator.Ecd64Offset; - // cdRelatOffset = ecd64.Offset; - needSetBase = false; + const Int64 newBase = (Int64)ecd64AbsOffset - locator.Ecd64Offset; + if (newBase <= (Int64)ecd64AbsOffset) + { + if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2) + { + ArcInfo.Base = newBase; + cdRelatOffset = cdAbsOffset - newBase; + } + else + cdOK = false; + } } } - else + else if (numCdItems != 0) // we can't use ecd.Offset in empty archive? { if ((int)cdDisk == Vols.StartVolIndex) { - ArcInfo.Base = cdAbsOffset - ecd64.Offset; - cdRelatOffset = ecd64.Offset; - needSetBase = false; + const Int64 newBase = (Int64)cdAbsOffset - cdInfo.Offset; + if (newBase <= (Int64)cdAbsOffset) + { + if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2) + { + // cd can be more accurate, when it points before Locals + // so we change Base and cdRelatOffset + ArcInfo.Base = newBase; + cdRelatOffset = cdInfo.Offset; + } + else + { + // const UInt64 delta = ((UInt64)cdRelatOffset - cdInfo.Offset); + const UInt64 delta = ((UInt64)(newBase - ArcInfo.Base)); + if ((UInt32)delta == 0) + { + // we set Overflow32bit mode, only if there is (x<<32) offset + // between real_CD_offset_from_MarkerPos and CD_Offset_in_ECD. + // Base and cdRelatOffset unchanged + Overflow32bit = true; + } + else + cdOK = false; + } + } + else + cdOK = false; + } + } + // cdRelatOffset = cdAbsOffset - ArcInfo.Base; + + if (localsWereRead) + { + const UInt64 delta = oldBase - ArcInfo.Base; + if (delta != 0) + { + FOR_VECTOR (i, items) + items[i].LocalHeaderPos += delta; } } } - EcdVolIndex = ecd64.ThisDisk; + if (!cdOK) + HeadersError = true; + + EcdVolIndex = cdInfo.ThisDisk; if (!IsMultiVol) { @@ -1997,54 +2793,80 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) Vols.MissingZip = false; } - UseDisk_in_SingleVol = true; - if (localsWereRead) { - if ((UInt64)ArcInfo.Base != ArcInfo.MarkerPos) - { - const UInt64 delta = ArcInfo.MarkerPos - ArcInfo.Base; - FOR_VECTOR (i, items) - items[i].LocalHeaderPos += delta; - } - if (EcdVolIndex != 0) { FOR_VECTOR (i, items) items[i].Disk = EcdVolIndex; } } + + UseDisk_in_SingleVol = true; } if (isZip64) { - if (ecd64.ThisDisk == 0 && ecd64AbsOffset != ArcInfo.Base + locator.Ecd64Offset - // || ecd64.NumEntries_in_ThisDisk != numCdItems - || ecd64.NumEntries != numCdItems - || ecd64.Size != cdSize - || (ecd64.Offset != cdRelatOffset && !items.IsEmpty())) + if (cdInfo.ThisDisk == 0 && ecd64AbsOffset != ArcInfo.Base + locator.Ecd64Offset + // || cdInfo.NumEntries_in_ThisDisk != numCdItems + || cdInfo.NumEntries != numCdItems + || cdInfo.Size != cdSize + || (cdInfo.Offset != cdRelatOffset && !items.IsEmpty())) { HeadersError = true; return S_OK; } } - // ---------- merge Central Directory Items ---------- - - if (!cdItems.IsEmpty()) + if (cdOK && !cdItems.IsEmpty()) { - CObjectVector items2; + // ---------- merge Central Directory Items ---------- + + CRecordVector items2; + + int nextLocalIndex = 0; + + LocalsCenterMerged = true; FOR_VECTOR (i, cdItems) { + if (Callback) + if ((i & 0x3FFF) == 0) + { + const UInt64 numFiles64 = items.Size() + items2.Size(); + RINOK(Callback->SetCompleted(&numFiles64, &_cnt)); + } + const CItemEx &cdItem = cdItems[i]; - int index = FindItem(items, cdItem); + + int index = -1; + + if (nextLocalIndex != -1) + { + if ((unsigned)nextLocalIndex < items.Size()) + { + CItemEx &item = items[nextLocalIndex]; + if (item.Disk == cdItem.Disk && + (item.LocalHeaderPos == cdItem.LocalHeaderPos + || Overflow32bit && (UInt32)item.LocalHeaderPos == cdItem.LocalHeaderPos)) + index = nextLocalIndex++; + else + nextLocalIndex = -1; + } + } + + if (index == -1) + index = FindItem(items, cdItem); + + // index = -1; + if (index == -1) { - items2.Add(cdItem); + items2.Add(i); HeadersError = true; continue; } + CItemEx &item = items[index]; if (item.Name != cdItem.Name // || item.Name.Len() != cdItem.Name.Len() @@ -2067,10 +2889,10 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) item.FromCentral = cdItem.FromCentral; } - items += items2; + FOR_VECTOR (k, items2) + items.Add(cdItems[items2[k]]); } - if (ecd.NumEntries < ecd.NumEntries_in_ThisDisk) HeadersError = true; @@ -2083,35 +2905,56 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) } } - if (ecd.NumEntries > items.Size()) - HeadersError = true; - if (isZip64) { - if (ecd64.NumEntries != items.Size()) + if (cdInfo.NumEntries != items.Size() + || ecd.NumEntries != items.Size() && ecd.NumEntries != 0xFFFF) HeadersError = true; } else { // old 7-zip could store 32-bit number of CD items to 16-bit field. - /* - if ((UInt16)ecd64.NumEntries == (UInt16)items.Size()) + // if (ecd.NumEntries != items.Size()) + if (ecd.NumEntries > items.Size()) HeadersError = true; - */ + + if (cdInfo.NumEntries != numCdItems) + { + if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems) + HeadersError = true; + else + Cd_NumEntries_Overflow_16bit = true; + } } ReadBuffer(ArcInfo.Comment, ecd.CommentSize); + _inBufMode = false; - _inBuffer.Free(); - if ((UInt16)ecd64.NumEntries != (UInt16)numCdItems - || (UInt32)ecd64.Size != (UInt32)cdSize - || ((UInt32)ecd64.Offset != (UInt32)cdRelatOffset && !items.IsEmpty())) + // DisableBufMode(); + // Buffer.Free(); + /* we can't clear buf varibles. we need them to calculate PhySize of archive */ + + if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems + || (UInt32)cdInfo.Size != (UInt32)cdSize + || ((UInt32)cdInfo.Offset != (UInt32)cdRelatOffset && !items.IsEmpty())) { // return S_FALSE; HeadersError = true; } - + + #ifdef ZIP_SELF_CHECK + if (localsWereRead) + { + const UInt64 endPos = ArcInfo.MarkerPos2 + _cnt; + if (endPos != (IsMultiVol ? Vols.TotalBytesSize : ArcInfo.FileEndPos)) + { + // there are some data after the end of archive or error in code; + return E_FAIL; + } + } + #endif + // printf("\nOpen OK"); return S_OK; } @@ -2121,40 +2964,47 @@ HRESULT CInArchive::ReadHeaders2(CObjectVector &items) HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit, IArchiveOpenCallback *callback, CObjectVector &items) { - _inBufMode = false; items.Clear(); Close(); - ArcInfo.Clear(); UInt64 startPos; RINOK(stream->Seek(0, STREAM_SEEK_CUR, &startPos)); RINOK(stream->Seek(0, STREAM_SEEK_END, &ArcInfo.FileEndPos)); - m_Position = ArcInfo.FileEndPos; + _streamPos = ArcInfo.FileEndPos; StartStream = stream; + Stream = stream; Callback = callback; + + DisableBufMode(); bool volWasRequested = false; if (callback && (startPos == 0 || !searchLimit || *searchLimit != 0)) { + // we try to read volumes only if it's first call (offset == 0) or scan is allowed. volWasRequested = true; RINOK(ReadVols()); } - if (IsMultiVol && Vols.StartVolIndex != 0) + if (IsMultiVol && Vols.StartParsingVol == 0 && (unsigned)Vols.StartParsingVol < Vols.Streams.Size()) { - Stream = Vols.Streams[0].Stream; - if (Stream) + // only StartParsingVol = 0 is safe search. + RINOK(SeekToVol(0, 0)); + // if (Stream) { - m_Position = 0; - RINOK(Stream->Seek(0, STREAM_SEEK_SET, NULL)); - UInt64 limit = 0; - HRESULT res = FindMarker(Stream, &limit); + // UInt64 limit = 1 << 22; // for sfx + UInt64 limit = 0; // without sfx + + HRESULT res = FindMarker(&limit); + if (res == S_OK) + { MarkerIsFound = true; + MarkerIsSafe = true; + } else if (res != S_FALSE) return res; } @@ -2162,56 +3012,93 @@ HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit, else { // printf("\nOpen offset = %u\n", (unsigned)startPos); - RINOK(stream->Seek(startPos, STREAM_SEEK_SET, NULL)); - m_Position = startPos; - HRESULT res = FindMarker(stream, searchLimit); - UInt64 curPos = m_Position; + if (IsMultiVol && (unsigned)Vols.StartParsingVol < Vols.Streams.Size() && Vols.Streams[Vols.StartParsingVol].Stream) + { + RINOK(SeekToVol(Vols.StartParsingVol, Vols.StreamIndex == Vols.StartVolIndex ? startPos : 0)); + } + else + { + RINOK(SeekToVol(-1, startPos)); + } + + // UInt64 limit = 1 << 22; + // HRESULT res = FindMarker(&limit); + + HRESULT res = FindMarker(searchLimit); + + // const UInt64 curPos = GetVirtStreamPos(); + const UInt64 curPos = ArcInfo.MarkerPos2 + 4; + if (res == S_OK) MarkerIsFound = true; - else + else if (!IsMultiVol) { - // if (res != S_FALSE) + /* + // if (startPos != 0), probably CD copuld be already tested with another call with (startPos == 0). + // so we don't want to try to open CD again in that ase. + if (startPos != 0) + return res; + // we can try to open CD, if there is no Marker and (startPos == 0). + // is it OK to open such files as ZIP, or big number of false positive, when CD can be find in end of file ? + */ return res; } - - MarkerIsFound = true; if (ArcInfo.IsSpanMode && !volWasRequested) { RINOK(ReadVols()); + if (IsMultiVol && MarkerIsFound && ArcInfo.MarkerVolIndex < 0) + ArcInfo.MarkerVolIndex = Vols.StartVolIndex; } + + MarkerIsSafe = !IsMultiVol + || (ArcInfo.MarkerVolIndex == 0 && ArcInfo.MarkerPos == 0) + ; - if (IsMultiVol && (unsigned)Vols.StartVolIndex < Vols.Streams.Size()) + + if (IsMultiVol) { - Stream = Vols.Streams[Vols.StartVolIndex].Stream; - if (!Stream) - IsMultiVol = false; - else + if ((unsigned)Vols.StartVolIndex < Vols.Streams.Size()) { - RINOK(Stream->Seek(curPos, STREAM_SEEK_SET, NULL)); - m_Position = curPos; + Stream = Vols.Streams[Vols.StartVolIndex].Stream; + if (Stream) + { + RINOK(Seek_SavePos(curPos)); + } + else + IsMultiVol = false; } + else + IsMultiVol = false; } - else - IsMultiVol = false; if (!IsMultiVol) { - RINOK(stream->Seek(curPos, STREAM_SEEK_SET, NULL)); - m_Position = curPos; + if (Vols.StreamIndex != -1) + { + Stream = StartStream; + Vols.StreamIndex = -1; + InitBuf(); + RINOK(Seek_SavePos(curPos)); + } + + ArcInfo.MarkerVolIndex = -1; StreamRef = stream; Stream = stream; } } + if (!IsMultiVol) + Vols.ClearRefs(); + { HRESULT res; try { - res = ReadHeaders2(items); + res = ReadHeaders(items); } - catch (const CInBufferException &e) { res = e.ErrorCode; } + catch (const CSystemException &e) { res = e.ErrorCode; } catch (const CUnexpectEnd &) { if (items.IsEmpty()) @@ -2221,7 +3108,7 @@ HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit, } catch (...) { - _inBufMode = false; + DisableBufMode(); throw; } @@ -2229,16 +3116,17 @@ HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit, { ArcInfo.FinishPos = ArcInfo.FileEndPos; if ((unsigned)Vols.StreamIndex < Vols.Streams.Size()) - if (m_Position < Vols.Streams[Vols.StreamIndex].Size) + if (GetVirtStreamPos() < Vols.Streams[Vols.StreamIndex].Size) ArcInfo.ThereIsTail = true; } else { - ArcInfo.FinishPos = m_Position; - ArcInfo.ThereIsTail = (ArcInfo.FileEndPos > m_Position); + ArcInfo.FinishPos = GetVirtStreamPos(); + ArcInfo.ThereIsTail = (ArcInfo.FileEndPos > ArcInfo.FinishPos); } - _inBufMode = false; + DisableBufMode(); + IsArcOpen = true; if (!IsMultiVol) Vols.Streams.Clear(); diff --git a/CPP/7zip/Archive/Zip/ZipIn.h b/CPP/7zip/Archive/Zip/ZipIn.h index 9b0afe28..a312c36a 100644 --- a/CPP/7zip/Archive/Zip/ZipIn.h +++ b/CPP/7zip/Archive/Zip/ZipIn.h @@ -3,12 +3,11 @@ #ifndef __ZIP_IN_H #define __ZIP_IN_H +#include "../../../Common/MyBuffer2.h" #include "../../../Common/MyCom.h" #include "../../IStream.h" -#include "../../Common/InBuffer.h" - #include "ZipHeader.h" #include "ZipItem.h" @@ -22,8 +21,12 @@ class CItemEx: public CItem public: UInt32 LocalFullHeaderSize; // including Name and Extra + bool DescriptorWasRead; + + CItemEx(): DescriptorWasRead(false) {} + UInt64 GetLocalFullSize() const - { return LocalFullHeaderSize + PackSize + (HasDescriptor() ? kDataDescriptorSize : 0); } + { return LocalFullHeaderSize + GetPackSizeWithDescriptor(); } UInt64 GetDataPosition() const { return LocalHeaderPos + LocalFullHeaderSize; } }; @@ -52,6 +55,10 @@ struct CInArchiveInfo UInt64 FirstItemRelatOffset; /* Relative offset of first local (read from cd) (relative to Base). = 0 in most archives = size of stub for some SFXs */ + + + int MarkerVolIndex; + bool CdWasRead; bool IsSpanMode; bool ThereIsTail; @@ -68,6 +75,7 @@ struct CInArchiveInfo FinishPos(0), FileEndPos(0), FirstItemRelatOffset(0), + MarkerVolIndex(-1), CdWasRead(false), IsSpanMode(false), ThereIsTail(false) @@ -82,6 +90,7 @@ struct CInArchiveInfo MarkerPos2 = 0; FinishPos = 0; FileEndPos = 0; + MarkerVolIndex = -1; ThereIsTail = false; FirstItemRelatOffset = 0; @@ -96,6 +105,10 @@ struct CInArchiveInfo struct CCdInfo { + bool IsFromEcd64; + + UInt16 CommentSize; + // 64 UInt16 VersionMade; UInt16 VersionNeedExtract; @@ -108,39 +121,55 @@ struct CCdInfo UInt64 Size; UInt64 Offset; - UInt16 CommentSize; - - CCdInfo() { memset(this, 0, sizeof(*this)); } + CCdInfo() { memset(this, 0, sizeof(*this)); IsFromEcd64 = false; } void ParseEcd32(const Byte *p); // (p) includes signature void ParseEcd64e(const Byte *p); // (p) exclude signature + + bool IsEmptyArc() const + { + return ThisDisk == 0 + && CdDisk == 0 + && NumEntries_in_ThisDisk == 0 + && NumEntries == 0 + && Size == 0 + && Offset == 0 // test it + ; + } }; -class CVols +struct CVols { -public: - struct CSubStreamInfo { CMyComPtr Stream; UInt64 Size; + HRESULT SeekToStart() const { return Stream->Seek(0, STREAM_SEEK_SET, NULL); } + CSubStreamInfo(): Size(0) {} }; CObjectVector Streams; - int StreamIndex; + + int StreamIndex; // -1 for StartStream + // -2 for ZipStream at multivol detection code + // >=0 volume index in multivol + bool NeedSeek; - CMyComPtr ZipStream; - bool StartIsExe; // is .exe bool StartIsZ; // is .zip or .zNN bool StartIsZip; // is .zip bool IsUpperCase; bool MissingZip; - Int32 StartVolIndex; // = (NN - 1), if StartStream is .zNN + + bool ecd_wasRead; + + Int32 StartVolIndex; // -1, if unknown vol index + // = (NN - 1), if StartStream is .zNN + // = 0, if start vol is exe Int32 StartParsingVol; // if we need local parsing, we must use that stream unsigned NumVols; @@ -148,19 +177,27 @@ public: int EndVolIndex; // index of last volume (ecd volume), // -1, if is not multivol - UString BaseName; // including '.' - + UString BaseName; // name of archive including '.' UString MissingName; + CMyComPtr ZipStream; + CCdInfo ecd; - bool ecd_wasRead; + + UInt64 TotalBytesSize; // for MultiVol only + + void ClearRefs() + { + Streams.Clear(); + ZipStream.Release(); + TotalBytesSize = 0; + } void Clear() { StreamIndex = -1; NeedSeek = false; - StartIsExe = false; StartIsZ = false; StartIsZip = false; @@ -177,21 +214,12 @@ public: MissingZip = false; ecd_wasRead = false; - Streams.Clear(); - ZipStream.Release(); + ClearRefs(); } HRESULT ParseArcName(IArchiveOpenVolumeCallback *volCallback); HRESULT Read(void *data, UInt32 size, UInt32 *processedSize); - - UInt64 GetTotalSize() const - { - UInt64 total = 0; - FOR_VECTOR (i, Streams) - total += Streams[i].Size; - return total; - } }; @@ -210,44 +238,69 @@ public: class CInArchive { - CInBuffer _inBuffer; + CMidBuffer Buffer; + size_t _bufPos; + size_t _bufCached; + + UInt64 _streamPos; + UInt64 _cnt; + + size_t GetAvail() const { return _bufCached - _bufPos; } + + void InitBuf() { _bufPos = 0; _bufCached = 0; } + void DisableBufMode() { InitBuf(); _inBufMode = false; } + + void SkipLookahed(size_t skip) + { + _bufPos += skip; + _cnt += skip; + } + + UInt64 GetVirtStreamPos() { return _streamPos - _bufCached + _bufPos; } + bool _inBufMode; - UInt32 m_Signature; - UInt64 m_Position; - UInt64 _processedCnt; - + bool IsArcOpen; bool CanStartNewVol; + UInt32 _signature; + CMyComPtr StreamRef; IInStream *Stream; IInStream *StartStream; + IArchiveOpenCallback *Callback; - bool IsArcOpen; + HRESULT Seek_SavePos(UInt64 offset); + HRESULT SeekToVol(int volIndex, UInt64 offset); + + HRESULT ReadFromCache(Byte *data, unsigned size, unsigned &processed); HRESULT ReadVols2(IArchiveOpenVolumeCallback *volCallback, unsigned start, int lastDisk, int zipDisk, unsigned numMissingVolsMax, unsigned &numMissingVols); HRESULT ReadVols(); - HRESULT Seek(UInt64 offset); - HRESULT FindMarker(IInStream *stream, const UInt64 *searchLimit); - HRESULT IncreaseRealPosition(Int64 addValue, bool &isFinished); + HRESULT FindMarker(const UInt64 *searchLimit); + HRESULT IncreaseRealPosition(UInt64 addValue, bool &isFinished); - HRESULT ReadBytes(void *data, UInt32 size, UInt32 *processedSize); - void SafeReadBytes(void *data, unsigned size); + HRESULT LookAhead(size_t minRequiredInBuffer); + void SafeRead(Byte *data, unsigned size); void ReadBuffer(CByteBuffer &buffer, unsigned size); - Byte ReadByte(); - UInt16 ReadUInt16(); + // Byte ReadByte(); + // UInt16 ReadUInt16(); UInt32 ReadUInt32(); UInt64 ReadUInt64(); - void Skip(unsigned num); - void Skip64(UInt64 num); - void ReadFileName(unsigned nameSize, AString &dest); - bool ReadExtra(unsigned extraSize, CExtraBlock &extraBlock, - UInt64 &unpackSize, UInt64 &packSize, UInt64 &localHeaderOffset, UInt32 &diskStartNumber); + void ReadSignature(); + + void Skip(size_t num); + HRESULT Skip64(UInt64 num, unsigned numFiles); + + bool ReadFileName(unsigned nameSize, AString &dest); + + bool ReadExtra(unsigned extraSize, CExtraBlock &extra, + UInt64 &unpackSize, UInt64 &packSize, UInt64 &localOffset, UInt32 &disk); bool ReadLocalItem(CItemEx &item); - HRESULT ReadLocalItemDescriptor(CItemEx &item); + HRESULT FindDescriptor(CItemEx &item, unsigned numFiles); HRESULT ReadCdItem(CItemEx &item); HRESULT TryEcd64(UInt64 offset, CCdInfo &cdInfo); HRESULT FindCd(bool checkOffsetMode); @@ -255,21 +308,28 @@ class CInArchive HRESULT ReadCd(CObjectVector &items, UInt32 &cdDisk, UInt64 &cdOffset, UInt64 &cdSize); HRESULT ReadLocals(CObjectVector &localItems); - HRESULT ReadHeaders2(CObjectVector &items); + HRESULT ReadHeaders(CObjectVector &items); HRESULT GetVolStream(unsigned vol, UInt64 pos, CMyComPtr &stream); + public: CInArchiveInfo ArcInfo; bool IsArc; bool IsZip64; + bool HeadersError; bool HeadersWarning; bool ExtraMinorError; bool UnexpectedEnd; + bool LocalsWereRead; + bool LocalsCenterMerged; bool NoCentralDir; + bool Overflow32bit; // = true, if zip without Zip64 extension support and it has some fields values truncated to 32-bits. + bool Cd_NumEntries_Overflow_16bit; // = true, if no Zip64 and 16-bit ecd:NumEntries was overflowed. bool MarkerIsFound; + bool MarkerIsSafe; bool IsMultiVol; bool UseDisk_in_SingleVol; @@ -277,9 +337,7 @@ public: CVols Vols; - IArchiveOpenCallback *Callback; - - CInArchive(): Stream(NULL), Callback(NULL), IsArcOpen(false) {} + CInArchive(): Stream(NULL), StartStream(NULL), Callback(NULL), IsArcOpen(false) {} UInt64 GetPhySize() const { @@ -301,7 +359,6 @@ public: void ClearRefs(); void Close(); HRESULT Open(IInStream *stream, const UInt64 *searchLimit, IArchiveOpenCallback *callback, CObjectVector &items); - HRESULT ReadHeaders(CObjectVector &items); bool IsOpen() const { return IsArcOpen; } @@ -329,7 +386,8 @@ public: } - HRESULT ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail); + HRESULT CheckDescriptor(const CItemEx &item); + HRESULT ReadLocalItemAfterCdItem(CItemEx &item, bool &isAvail, bool &headersError); HRESULT ReadLocalItemAfterCdItemFull(CItemEx &item); HRESULT GetItemStream(const CItemEx &item, bool seekPackData, CMyComPtr &stream); diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index e732df7c..4fc59f79 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -5,9 +5,12 @@ #include "../../../../C/CpuArch.h" #include "../../../../C/7zCrc.h" +#include "../../../Common/IntToString.h" #include "../../../Common/MyLinux.h" #include "../../../Common/StringConvert.h" +#include "../../../Windows/PropVariantUtils.h" + #include "../Common/ItemNameUtils.h" #include "ZipItem.h" @@ -17,6 +20,62 @@ namespace NZip { using namespace NFileHeader; +static const CUInt32PCharPair g_ExtraTypes[] = +{ + { NExtraID::kZip64, "Zip64" }, + { NExtraID::kNTFS, "NTFS" }, + { NExtraID::kStrongEncrypt, "StrongCrypto" }, + { NExtraID::kUnixTime, "UT" }, + { NExtraID::kUnixExtra, "UX" }, + { NExtraID::kIzUnicodeComment, "uc" }, + { NExtraID::kIzUnicodeName, "up" }, + { NExtraID::kWzAES, "WzAES" } +}; + +void CExtraSubBlock::PrintInfo(AString &s) const +{ + for (unsigned i = 0; i < ARRAY_SIZE(g_ExtraTypes); i++) + { + const CUInt32PCharPair &pair = g_ExtraTypes[i]; + if (pair.Value == ID) + { + s += pair.Name; + return; + } + } + { + char sz[32]; + sz[0] = '0'; + sz[1] = 'x'; + ConvertUInt32ToHex(ID, sz + 2); + s += sz; + } +} + + +void CExtraBlock::PrintInfo(AString &s) const +{ + if (Error) + s.Add_OptSpaced("Extra_ERROR"); + + if (MinorError) + s.Add_OptSpaced("Minor_Extra_ERROR"); + + if (IsZip64 || IsZip64_Error) + { + s.Add_OptSpaced("Zip64"); + if (IsZip64_Error) + s += "_ERROR"; + } + + FOR_VECTOR (i, SubBlocks) + { + s.Add_Space_if_NotEmpty(); + SubBlocks[i].PrintInfo(s); + } +} + + bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const { ft.dwHighDateTime = ft.dwLowDateTime = 0; @@ -83,6 +142,19 @@ bool CExtraSubBlock::ExtractUnixTime(bool isCentral, unsigned index, UInt32 &res } +bool CExtraSubBlock::ExtractUnixExtraTime(unsigned index, UInt32 &res) const +{ + res = 0; + const size_t size = Data.Size(); + unsigned offset = index * 4; + if (ID != NExtraID::kUnixExtra || size < offset + 4) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} + + bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const { FOR_VECTOR (i, SubBlocks) @@ -96,11 +168,29 @@ bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const { - FOR_VECTOR (i, SubBlocks) { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnixTime) - return sb.ExtractUnixTime(isCentral, index, res); + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnixTime) + return sb.ExtractUnixTime(isCentral, index, res); + } + } + + switch (index) + { + case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; + case NUnixTime::kATime: index = NUnixExtra::kATime; break; + default: return false; + } + + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnixExtra) + return sb.ExtractUnixExtraTime(index, res); + } } return false; } diff --git a/CPP/7zip/Archive/Zip/ZipItem.h b/CPP/7zip/Archive/Zip/ZipItem.h index c134ec79..0cf9bd09 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.h +++ b/CPP/7zip/Archive/Zip/ZipItem.h @@ -22,11 +22,12 @@ struct CVersion struct CExtraSubBlock { - UInt16 ID; + UInt32 ID; CByteBuffer Data; bool ExtractNtfsTime(unsigned index, FILETIME &ft) const; bool ExtractUnixTime(bool isCentral, unsigned index, UInt32 &res) const; + bool ExtractUnixExtraTime(unsigned index, UInt32 &res) const; bool ExtractIzUnicode(UInt32 crc, AString &name) const { @@ -44,6 +45,8 @@ struct CExtraSubBlock return false; return CheckUTF8(name, false); } + + void PrintInfo(AString &s) const; }; const unsigned k_WzAesExtra_Size = 7; @@ -129,11 +132,22 @@ struct CStrongCryptoExtra bool CertificateIsUsed() const { return (Flags > 0x0001); } }; + struct CExtraBlock { CObjectVector SubBlocks; + bool Error; + bool MinorError; + bool IsZip64; + bool IsZip64_Error; - void Clear() { SubBlocks.Clear(); } + CExtraBlock(): Error(false), MinorError(false), IsZip64(false), IsZip64_Error(false) {} + + void Clear() + { + SubBlocks.Clear(); + IsZip64 = false; + } size_t GetSize() const { @@ -176,6 +190,8 @@ struct CExtraBlock bool GetNtfsTime(unsigned index, FILETIME &ft) const; bool GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const; + void PrintInfo(AString &s) const; + void RemoveUnknownSubBlocks() { for (unsigned i = SubBlocks.Size(); i != 0;) @@ -206,12 +222,19 @@ public: CExtraBlock LocalExtra; + unsigned GetDescriptorSize() const { return LocalExtra.IsZip64 ? kDataDescriptorSize64 : kDataDescriptorSize32; } + + UInt64 GetPackSizeWithDescriptor() const + { return PackSize + (HasDescriptor() ? GetDescriptorSize() : 0); } + bool IsUtf8() const { return (Flags & NFileHeader::NFlags::kUtf8) != 0; } bool IsEncrypted() const { return (Flags & NFileHeader::NFlags::kEncrypted) != 0; } bool IsStrongEncrypted() const { return IsEncrypted() && (Flags & NFileHeader::NFlags::kStrongEncrypted) != 0; } bool IsAesEncrypted() const { return IsEncrypted() && (IsStrongEncrypted() || Method == NFileHeader::NCompressionMethod::kWzAES); } bool IsLzmaEOS() const { return (Flags & NFileHeader::NFlags::kLzmaEOS) != 0; } bool HasDescriptor() const { return (Flags & NFileHeader::NFlags::kDescriptorUsedMask) != 0; } + + unsigned GetDeflateLevel() const { return (Flags >> 1) & 3; } bool IsDir() const; diff --git a/CPP/7zip/Archive/Zip/ZipOut.cpp b/CPP/7zip/Archive/Zip/ZipOut.cpp index 2a1ba2c4..1fdc24f8 100644 --- a/CPP/7zip/Archive/Zip/ZipOut.cpp +++ b/CPP/7zip/Archive/Zip/ZipOut.cpp @@ -21,48 +21,20 @@ HRESULT COutArchive::Create(IOutStream *outStream) return m_Stream->Seek(0, STREAM_SEEK_CUR, &m_Base); } -void COutArchive::MoveCurPos(UInt64 distanceToMove) -{ - m_CurPos += distanceToMove; // test overflow -} - -void COutArchive::SeekToRelatPos(UInt64 offset) +void COutArchive::SeekToCurPos() { - HRESULT res = m_Stream->Seek(m_Base + offset, STREAM_SEEK_SET, NULL); + HRESULT res = m_Stream->Seek(m_Base + m_CurPos, STREAM_SEEK_SET, NULL); if (res != S_OK) throw CSystemException(res); } -void COutArchive::PrepareWriteCompressedDataZip64(unsigned fileNameLen, bool isZip64, bool aesEncryption) -{ - m_IsZip64 = isZip64; - m_ExtraSize = isZip64 ? (4 + 8 + 8) : 0; - if (aesEncryption) - m_ExtraSize += 4 + k_WzAesExtra_Size; - m_LocalFileHeaderSize = kLocalHeaderSize + fileNameLen + m_ExtraSize; -} - -void COutArchive::PrepareWriteCompressedData(unsigned fileNameLen, UInt64 unPackSize, bool aesEncryption) -{ - // We use Zip64, if unPackSize size is larger than 0xF8000000 to support - // cases when compressed size can be about 3% larger than uncompressed size - - PrepareWriteCompressedDataZip64(fileNameLen, unPackSize >= (UInt32)0xF8000000, aesEncryption); -} - #define DOES_NEED_ZIP64(v) (v >= (UInt32)0xFFFFFFFF) +// #define DOES_NEED_ZIP64(v) (v >= 0) -void COutArchive::PrepareWriteCompressedData2(unsigned fileNameLen, UInt64 unPackSize, UInt64 packSize, bool aesEncryption) -{ - bool isZip64 = - DOES_NEED_ZIP64(unPackSize) || - DOES_NEED_ZIP64(packSize); - PrepareWriteCompressedDataZip64(fileNameLen, isZip64, aesEncryption); -} -void COutArchive::WriteBytes(const void *buffer, UInt32 size) +void COutArchive::WriteBytes(const void *data, size_t size) { - m_OutBuffer.WriteBytes(buffer, size); + m_OutBuffer.WriteBytes(data, size); m_CurPos += size; } @@ -74,11 +46,8 @@ void COutArchive::Write8(Byte b) void COutArchive::Write16(UInt16 val) { - for (int i = 0; i < 2; i++) - { - Write8((Byte)val); - val >>= 8; - } + Write8((Byte)val); + Write8((Byte)(val >> 8)); } void COutArchive::Write32(UInt32 val) @@ -101,15 +70,12 @@ void COutArchive::Write64(UInt64 val) void COutArchive::WriteExtra(const CExtraBlock &extra) { - if (extra.SubBlocks.Size() != 0) + FOR_VECTOR (i, extra.SubBlocks) { - FOR_VECTOR (i, extra.SubBlocks) - { - const CExtraSubBlock &subBlock = extra.SubBlocks[i]; - Write16(subBlock.ID); - Write16((UInt16)subBlock.Data.Size()); - WriteBytes(subBlock.Data, (UInt32)subBlock.Data.Size()); - } + const CExtraSubBlock &subBlock = extra.SubBlocks[i]; + Write16((UInt16)subBlock.ID); + Write16((UInt16)subBlock.Data.Size()); + WriteBytes(subBlock.Data, (UInt16)subBlock.Data.Size()); } } @@ -125,40 +91,65 @@ void COutArchive::WriteCommonItemInfo(const CLocalItem &item, bool isZip64) Write16(item.Flags); Write16(item.Method); Write32(item.Time); - Write32(item.Crc); } + #define WRITE_32_VAL_SPEC(__v, __isZip64) Write32((__isZip64) ? 0xFFFFFFFF : (UInt32)(__v)); -void COutArchive::WriteLocalHeader(const CLocalItem &item) + +void COutArchive::WriteLocalHeader(CItemOut &item, bool needCheck) { - SeekToCurPos(); + m_LocalHeaderPos = m_CurPos; + item.LocalHeaderPos = m_CurPos; - bool isZip64 = m_IsZip64 || + bool isZip64 = DOES_NEED_ZIP64(item.PackSize) || DOES_NEED_ZIP64(item.Size); - + + if (needCheck && m_IsZip64) + isZip64 = true; + + const UInt32 localExtraSize = (UInt32)((isZip64 ? (4 + 8 + 8): 0) + item.LocalExtra.GetSize()); + if ((UInt16)localExtraSize != localExtraSize) + throw CSystemException(E_FAIL); + if (needCheck && m_ExtraSize != localExtraSize) + throw CSystemException(E_FAIL); + + m_IsZip64 = isZip64; + m_ExtraSize = localExtraSize; + + item.LocalExtra.IsZip64 = isZip64; + Write32(NSignature::kLocalFileHeader); + WriteCommonItemInfo(item, isZip64); + + Write32(item.HasDescriptor() ? 0 : item.Crc); - WRITE_32_VAL_SPEC(item.PackSize, isZip64); - WRITE_32_VAL_SPEC(item.Size, isZip64); - - Write16((UInt16)item.Name.Len()); + UInt64 packSize = item.PackSize; + UInt64 size = item.Size; + + if (item.HasDescriptor()) { - UInt16 localExtraSize = (UInt16)((isZip64 ? (4 + 8 + 8): 0) + item.LocalExtra.GetSize()); - if (localExtraSize != m_ExtraSize) - throw CSystemException(E_FAIL); + packSize = 0; + size = 0; } - Write16((UInt16)m_ExtraSize); - WriteBytes((const char *)item.Name, item.Name.Len()); + + WRITE_32_VAL_SPEC(packSize, isZip64); + WRITE_32_VAL_SPEC(size, isZip64); + + Write16((UInt16)item.Name.Len()); + + Write16((UInt16)localExtraSize); + + WriteBytes((const char *)item.Name, (UInt16)item.Name.Len()); if (isZip64) { Write16(NFileHeader::NExtraID::kZip64); Write16(8 + 8); - Write64(item.Size); - Write64(item.PackSize); + Write64(size); + Write64(packSize); } WriteExtra(item.LocalExtra); @@ -166,10 +157,57 @@ void COutArchive::WriteLocalHeader(const CLocalItem &item) // Why don't we write NTFS timestamps to local header? // Probably we want to reduce size of archive? + const UInt32 localFileHeaderSize = (UInt32)(m_CurPos - m_LocalHeaderPos); + if (needCheck && m_LocalFileHeaderSize != localFileHeaderSize) + throw CSystemException(E_FAIL); + m_LocalFileHeaderSize = localFileHeaderSize; + m_OutBuffer.FlushWithCheck(); - MoveCurPos(item.PackSize); } + +void COutArchive::WriteLocalHeader_Replace(CItemOut &item) +{ + m_CurPos = m_LocalHeaderPos + m_LocalFileHeaderSize + item.PackSize; + + if (item.HasDescriptor()) + { + WriteDescriptor(item); + m_OutBuffer.FlushWithCheck(); + } + + const UInt64 nextPos = m_CurPos; + m_CurPos = m_LocalHeaderPos; + SeekToCurPos(); + WriteLocalHeader(item, true); + m_CurPos = nextPos; + SeekToCurPos(); +} + + +void COutArchive::WriteDescriptor(const CItemOut &item) +{ + Byte buf[kDataDescriptorSize64]; + SetUi32(buf, NSignature::kDataDescriptor); + SetUi32(buf + 4, item.Crc); + unsigned descriptorSize; + if (m_IsZip64) + { + SetUi64(buf + 8, item.PackSize); + SetUi64(buf + 16, item.Size); + descriptorSize = kDataDescriptorSize64; + } + else + { + SetUi32(buf + 8, (UInt32)item.PackSize); + SetUi32(buf + 12, (UInt32)item.Size); + descriptorSize = kDataDescriptorSize32; + } + WriteBytes(buf, descriptorSize); +} + + + void COutArchive::WriteCentralHeader(const CItemOut &item) { bool isUnPack64 = DOES_NEED_ZIP64(item.Size); @@ -182,6 +220,7 @@ void COutArchive::WriteCentralHeader(const CItemOut &item) Write8(item.MadeByVersion.HostOS); WriteCommonItemInfo(item, isZip64); + Write32(item.Crc); WRITE_32_VAL_SPEC(item.PackSize, isPack64); WRITE_32_VAL_SPEC(item.Size, isUnPack64); @@ -196,7 +235,10 @@ void COutArchive::WriteCentralHeader(const CItemOut &item) item.CentralExtra.GetSize()); Write16(centralExtraSize); // test it; - Write16((UInt16)item.Comment.Size()); + + const UInt16 commentSize = (UInt16)item.Comment.Size(); + + Write16(commentSize); Write16(0); // DiskNumberStart; Write16(item.InternalAttrib); Write32(item.ExternalAttrib); @@ -228,14 +270,12 @@ void COutArchive::WriteCentralHeader(const CItemOut &item) } WriteExtra(item.CentralExtra); - if (item.Comment.Size() > 0) - WriteBytes(item.Comment, (UInt32)item.Comment.Size()); + if (commentSize != 0) + WriteBytes(item.Comment, commentSize); } void COutArchive::WriteCentralDir(const CObjectVector &items, const CByteBuffer *comment) { - SeekToCurPos(); - UInt64 cdOffset = GetCurPos(); FOR_VECTOR (i, items) WriteCentralHeader(items[i]); @@ -252,6 +292,11 @@ void COutArchive::WriteCentralDir(const CObjectVector &items, const CB { Write32(NSignature::kEcd64); Write64(kEcd64_MainSize); + + // to test extra block: + // const UInt32 extraSize = 1 << 26; + // Write64(kEcd64_MainSize + extraSize); + Write16(45); // made by version Write16(45); // extract version Write32(0); // ThisDiskNumber = 0; @@ -261,6 +306,8 @@ void COutArchive::WriteCentralDir(const CObjectVector &items, const CB Write64((UInt64)cdSize); Write64((UInt64)cdOffset); + // for (UInt32 iii = 0; iii < extraSize; iii++) Write8(1); + Write32(NSignature::kEcd64Locator); Write32(0); // number of the disk with the start of the zip64 end of central directory Write64(cd64EndOffset); @@ -276,37 +323,23 @@ void COutArchive::WriteCentralDir(const CObjectVector &items, const CB WRITE_32_VAL_SPEC(cdSize, cdSize64); WRITE_32_VAL_SPEC(cdOffset, cdOffset64); - UInt32 commentSize = (UInt32)(comment ? comment->Size() : 0); + const UInt16 commentSize = (UInt16)(comment ? comment->Size() : 0); Write16((UInt16)commentSize); - if (commentSize > 0) + if (commentSize != 0) WriteBytes((const Byte *)*comment, commentSize); m_OutBuffer.FlushWithCheck(); } -void COutArchive::CreateStreamForCompressing(IOutStream **outStream) +void COutArchive::CreateStreamForCompressing(CMyComPtr &outStream) { COffsetOutStream *streamSpec = new COffsetOutStream; - CMyComPtr tempStream(streamSpec); - streamSpec->Init(m_Stream, m_Base + m_CurPos + m_LocalFileHeaderSize); - *outStream = tempStream.Detach(); -} - -/* -void COutArchive::SeekToPackedDataPosition() -{ - SeekTo(m_BasePosition + m_LocalFileHeaderSize); -} -*/ - -void COutArchive::SeekToCurPos() -{ - SeekToRelatPos(m_CurPos); + outStream = streamSpec; + streamSpec->Init(m_Stream, m_Base + m_CurPos); } -void COutArchive::CreateStreamForCopying(ISequentialOutStream **outStream) +void COutArchive::CreateStreamForCopying(CMyComPtr &outStream) { - CMyComPtr tempStream(m_Stream); - *outStream = tempStream.Detach(); + outStream = m_Stream; } }} diff --git a/CPP/7zip/Archive/Zip/ZipOut.h b/CPP/7zip/Archive/Zip/ZipOut.h index 056d0d09..0a0ac0c8 100644 --- a/CPP/7zip/Archive/Zip/ZipOut.h +++ b/CPP/7zip/Archive/Zip/ZipOut.h @@ -5,7 +5,6 @@ #include "../../../Common/MyCom.h" -#include "../../IStream.h" #include "../../Common/OutBuffer.h" #include "ZipItem.h" @@ -13,8 +12,6 @@ namespace NArchive { namespace NZip { -// can throw CSystemException and COutBufferException - class CItemOut: public CItem { public: @@ -28,21 +25,23 @@ public: CItemOut(): NtfsTimeIsDefined(false) {} }; + +// COutArchive can throw CSystemException and COutBufferException + class COutArchive { - CMyComPtr m_Stream; COutBuffer m_OutBuffer; + CMyComPtr m_Stream; - UInt64 m_Base; // Base of arc (offset in output Stream) + UInt64 m_Base; // Base of archive (offset in output Stream) UInt64 m_CurPos; // Curent position in archive (relative from m_Base) + UInt64 m_LocalHeaderPos; // LocalHeaderPos (relative from m_Base) for last WriteLocalHeader() call UInt32 m_LocalFileHeaderSize; UInt32 m_ExtraSize; bool m_IsZip64; - void SeekToRelatPos(UInt64 offset); - - void WriteBytes(const void *buffer, UInt32 size); + void WriteBytes(const void *data, size_t size); void Write8(Byte b); void Write16(UInt16 val); void Write32(UInt32 val); @@ -57,30 +56,26 @@ class COutArchive void WriteCommonItemInfo(const CLocalItem &item, bool isZip64); void WriteCentralHeader(const CItemOut &item); - void PrepareWriteCompressedDataZip64(unsigned fileNameLen, bool isZip64, bool aesEncryption); - + void SeekToCurPos(); public: HRESULT Create(IOutStream *outStream); - void MoveCurPos(UInt64 distanceToMove); UInt64 GetCurPos() const { return m_CurPos; } - void SeekToCurPos(); - - void PrepareWriteCompressedData(unsigned fileNameLen, UInt64 unPackSize, bool aesEncryption); - void PrepareWriteCompressedData2(unsigned fileNameLen, UInt64 unPackSize, UInt64 packSize, bool aesEncryption); - void WriteLocalHeader(const CLocalItem &item); - - void WriteLocalHeader_And_SeekToNextFile(const CLocalItem &item) + void MoveCurPos(UInt64 distanceToMove) { - WriteLocalHeader(item); - SeekToCurPos(); + m_CurPos += distanceToMove; } + void WriteLocalHeader(CItemOut &item, bool needCheck = false); + void WriteLocalHeader_Replace(CItemOut &item); + + void WriteDescriptor(const CItemOut &item); + void WriteCentralDir(const CObjectVector &items, const CByteBuffer *comment); - void CreateStreamForCompressing(IOutStream **outStream); - void CreateStreamForCopying(ISequentialOutStream **outStream); + void CreateStreamForCompressing(CMyComPtr &outStream); + void CreateStreamForCopying(CMyComPtr &outStream); }; }} diff --git a/CPP/7zip/Archive/Zip/ZipRegister.cpp b/CPP/7zip/Archive/Zip/ZipRegister.cpp index 6674189f..e6929f1b 100644 --- a/CPP/7zip/Archive/Zip/ZipRegister.cpp +++ b/CPP/7zip/Archive/Zip/ZipRegister.cpp @@ -10,13 +10,14 @@ namespace NArchive { namespace NZip { static const Byte k_Signature[] = { - 4, 0x50, 0x4B, 0x03, 0x04, - 4, 0x50, 0x4B, 0x05, 0x06, - 6, 0x50, 0x4B, 0x07, 0x08, 0x50, 0x4B, - 6, 0x50, 0x4B, 0x30, 0x30, 0x50, 0x4B }; + 4, 0x50, 0x4B, 0x03, 0x04, // Local + 4, 0x50, 0x4B, 0x05, 0x06, // Ecd + 4, 0x50, 0x4B, 0x06, 0x06, // Ecd64 + 6, 0x50, 0x4B, 0x07, 0x08, 0x50, 0x4B, // Span / Descriptor + 6, 0x50, 0x4B, 0x30, 0x30, 0x50, 0x4B }; // NoSpan REGISTER_ARC_IO( - "zip", "zip z01 zipx jar xpi odt ods docx xlsx epub", 0, 1, + "zip", "zip z01 zipx jar xpi odt ods docx xlsx epub ipa apk appx", 0, 1, k_Signature, 0, NArcInfoFlags::kFindSignature | diff --git a/CPP/7zip/Archive/Zip/ZipUpdate.cpp b/CPP/7zip/Archive/Zip/ZipUpdate.cpp index bc50c1d7..81f48a2a 100644 --- a/CPP/7zip/Archive/Zip/ZipUpdate.cpp +++ b/CPP/7zip/Archive/Zip/ZipUpdate.cpp @@ -42,32 +42,38 @@ static const Byte kHostOS = static const Byte kMadeByHostOS = kHostOS; static const Byte kExtractHostOS = kHostOS; -static const Byte kMethodForDirectory = NFileHeader::NCompressionMethod::kStored; +static const Byte kMethodForDirectory = NFileHeader::NCompressionMethod::kStore; -static HRESULT CopyBlockToArchive(ISequentialInStream *inStream, UInt64 size, - COutArchive &outArchive, ICompressProgressInfo *progress) + +static void AddAesExtra(CItem &item, Byte aesKeyMode, UInt16 method) { - CMyComPtr outStream; - outArchive.CreateStreamForCopying(&outStream); - return NCompress::CopyStream_ExactSize(inStream, outStream, size, progress); + CWzAesExtra wzAesField; + wzAesField.Strength = aesKeyMode; + wzAesField.Method = method; + item.Method = NFileHeader::NCompressionMethod::kWzAES; + item.Crc = 0; + CExtraSubBlock sb; + wzAesField.SetSubBlock(sb); + item.LocalExtra.SubBlocks.Add(sb); + item.CentralExtra.SubBlocks.Add(sb); } + static void SetFileHeader( - COutArchive &archive, const CCompressionMethodMode &options, const CUpdateItem &ui, // bool isSeqMode, CItemOut &item) { item.Size = ui.Size; - bool isDir; + bool isDir = ui.IsDir; item.ClearFlags(); if (ui.NewProps) { - isDir = ui.IsDir; item.Name = ui.Name; + item.Comment = ui.Comment; item.SetUtf8(ui.IsUtf8); item.ExternalAttrib = ui.Attrib; item.Time = ui.Time; @@ -76,10 +82,11 @@ static void SetFileHeader( item.Ntfs_CTime = ui.Ntfs_CTime; item.NtfsTimeIsDefined = ui.NtfsTimeIsDefined; } + /* else isDir = item.IsDir(); + */ - item.LocalHeaderPos = archive.GetCurPos(); item.MadeByVersion.HostOS = kMadeByHostOS; item.MadeByVersion.Version = NFileHeader::NCompressionMethod::kMadeByProgramVersion; @@ -97,14 +104,32 @@ static void SetFileHeader( item.Size = 0; item.Crc = 0; } + + item.LocalExtra.Clear(); + item.CentralExtra.Clear(); + + if (isDir) + { + item.ExtractVersion.Version = NFileHeader::NCompressionMethod::kExtractVersion_Dir; + item.Method = kMethodForDirectory; + item.PackSize = 0; + item.Size = 0; + item.Crc = 0; + } + else if (options.IsRealAesMode()) + AddAesExtra(item, options.AesKeyMode, (Byte)(options.MethodSequence.IsEmpty() ? 8 : options.MethodSequence[0])); } +// we call SetItemInfoFromCompressingResult() after SetFileHeader() + static void SetItemInfoFromCompressingResult(const CCompressingResult &compressingResult, bool isAesMode, Byte aesKeyMode, CItem &item) { item.ExtractVersion.Version = compressingResult.ExtractVersion; item.Method = compressingResult.Method; + if (compressingResult.Method == NFileHeader::NCompressionMethod::kLZMA && compressingResult.LzmaEos) + item.Flags |= NFileHeader::NFlags::kLzmaEOS; item.Crc = compressingResult.CRC; item.Size = compressingResult.UnpackSize; item.PackSize = compressingResult.PackSize; @@ -113,17 +138,7 @@ static void SetItemInfoFromCompressingResult(const CCompressingResult &compressi item.CentralExtra.Clear(); if (isAesMode) - { - CWzAesExtra wzAesField; - wzAesField.Strength = aesKeyMode; - wzAesField.Method = compressingResult.Method; - item.Method = NFileHeader::NCompressionMethod::kWzAES; - item.Crc = 0; - CExtraSubBlock sb; - wzAesField.SetSubBlock(sb); - item.LocalExtra.SubBlocks.Add(sb); - item.CentralExtra.SubBlocks.Add(sb); - } + AddAesExtra(item, aesKeyMode, compressingResult.Method); } @@ -151,6 +166,7 @@ struct CThreadInfo HRESULT Result; CCompressingResult CompressingResult; + bool SeqMode; bool IsFree; UInt32 UpdateIndex; UInt32 FileTime; @@ -160,6 +176,7 @@ struct CThreadInfo ProgressSpec(0), OutStreamSpec(0), Coder(options), + SeqMode(false), FileTime(0) {} @@ -193,7 +210,7 @@ void CThreadInfo::WaitAndCode() Result = Coder.Compress( EXTERNAL_CODECS_LOC_VARS - InStream, OutStream, FileTime, Progress, CompressingResult); + InStream, OutStream, SeqMode, FileTime, Progress, CompressingResult); if (Result == S_OK && Progress) Result = Progress->SetRatioInfo(&CompressingResult.UnpackSize, &CompressingResult.PackSize); @@ -342,6 +359,8 @@ static HRESULT UpdateItemOldData( NUpdateNotifyOp::kReplicate)) } + UInt64 rangeSize; + if (ui.NewProps) { if (item.HasDescriptor()) @@ -349,14 +368,11 @@ static HRESULT UpdateItemOldData( // use old name size. - CMyComPtr packStream; - RINOK(inArchive->GetItemStream(itemEx, true, packStream)); - if (!packStream) - return E_NOTIMPL; - // we keep ExternalAttrib and some another properties from old archive // item.ExternalAttrib = ui.Attrib; + // if we don't change Comment, we keep Comment from OldProperties + item.Comment = ui.Comment; item.Name = ui.Name; item.SetUtf8(ui.IsUtf8); item.Time = ui.Time; @@ -367,46 +383,37 @@ static HRESULT UpdateItemOldData( item.CentralExtra.RemoveUnknownSubBlocks(); item.LocalExtra.RemoveUnknownSubBlocks(); - item.LocalHeaderPos = archive.GetCurPos(); - archive.PrepareWriteCompressedData2(item.Name.Len(), item.Size, item.PackSize, item.LocalExtra.HasWzAes()); archive.WriteLocalHeader(item); - - RINOK(CopyBlockToArchive(packStream, itemEx.PackSize, archive, progress)); - - complexity += itemEx.PackSize; + rangeSize = item.GetPackSizeWithDescriptor(); } else { - CMyComPtr packStream; - RINOK(inArchive->GetItemStream(itemEx, false, packStream)); - if (!packStream) - return E_NOTIMPL; - - // set new header position item.LocalHeaderPos = archive.GetCurPos(); - - const UInt64 rangeSize = itemEx.GetLocalFullSize(); - - RINOK(CopyBlockToArchive(packStream, rangeSize, archive, progress)); - - complexity += rangeSize; - archive.MoveCurPos(rangeSize); + rangeSize = itemEx.GetLocalFullSize(); } - return S_OK; + CMyComPtr packStream; + + RINOK(inArchive->GetItemStream(itemEx, ui.NewProps, packStream)); + if (!packStream) + return E_NOTIMPL; + + complexity += rangeSize; + + CMyComPtr outStream; + archive.CreateStreamForCopying(outStream); + HRESULT res = NCompress::CopyStream_ExactSize(packStream, outStream, rangeSize, progress); + archive.MoveCurPos(rangeSize); + return res; } static void WriteDirHeader(COutArchive &archive, const CCompressionMethodMode *options, const CUpdateItem &ui, CItemOut &item) { - SetFileHeader(archive, *options, ui, item); - archive.PrepareWriteCompressedData(item.Name.Len(), ui.Size, - // options->IsRealAesMode() - false // fixed 9.31 - ); - archive.WriteLocalHeader_And_SeekToNextFile(item); + SetFileHeader(*options, ui, item); + archive.WriteLocalHeader(item); } @@ -490,6 +497,8 @@ static HRESULT Update2St( if (!ui.NewProps || !ui.NewData) { + // Note: for (ui.NewProps && !ui.NewData) it copies Props from old archive, + // But we will rewrite all important properties later. But we can keep some properties like Comment itemEx = inputItems[ui.IndexInArc]; if (inArchive->ReadLocalItemAfterCdItemFull(itemEx) != S_OK) return E_NOTIMPL; @@ -498,7 +507,8 @@ static HRESULT Update2St( if (ui.NewData) { - bool isDir = ((ui.NewProps) ? ui.IsDir : item.IsDir()); + // bool isDir = ((ui.NewProps) ? ui.IsDir : item.IsDir()); + bool isDir = ui.IsDir; if (isDir) { WriteDirHeader(archive, options, ui, item); @@ -517,28 +527,39 @@ static HRESULT Update2St( if (!fileInStream) return E_INVALIDARG; - // bool isSeqMode = false; - /* + bool seqMode; { CMyComPtr inStream2; fileInStream->QueryInterface(IID_IInStream, (void **)&inStream2); - isSeqMode = (inStream2 == NULL); + seqMode = (inStream2 == NULL); } - */ + // seqMode = true; // to test seqMode UpdatePropsFromStream(ui, fileInStream, updateCallback, totalComplexity); - SetFileHeader(archive, *options, ui, item); + SetFileHeader(*options, ui, item); + + item.SetDescriptorMode(seqMode); // file Size can be 64-bit !!! - archive.PrepareWriteCompressedData(item.Name.Len(), ui.Size, options->IsRealAesMode()); + CCompressingResult compressingResult; + + RINOK(compressor.Set_Pre_CompressionResult( + seqMode, + ui.Size, + compressingResult)); + + SetItemInfoFromCompressingResult(compressingResult, options->IsRealAesMode(), options->AesKeyMode, item); + + archive.WriteLocalHeader(item); + CMyComPtr outStream; - archive.CreateStreamForCompressing(&outStream); + archive.CreateStreamForCompressing(outStream); RINOK(compressor.Compress( EXTERNAL_CODECS_LOC_VARS fileInStream, outStream, - ui.Time, + seqMode, ui.Time, progress, compressingResult)); if (compressingResult.FileTimeWasUsed) @@ -551,7 +572,9 @@ static HRESULT Update2St( } SetItemInfoFromCompressingResult(compressingResult, options->IsRealAesMode(), options->AesKeyMode, item); - archive.WriteLocalHeader_And_SeekToNextFile(item); + + archive.WriteLocalHeader_Replace(item); + RINOK(updateCallback->SetOperationResult(NArchive::NUpdate::NOperationResult::kOK)); unpackSizeTotal += item.Size; packSizeTotal += item.PackSize; @@ -561,7 +584,9 @@ static HRESULT Update2St( { UInt64 complexity = 0; lps->SendRatio = false; + RINOK(UpdateItemOldData(archive, inArchive, itemEx, ui, item, progress, opCallback, complexity)); + lps->SendRatio = true; lps->ProgressOffset += complexity; } @@ -591,6 +616,7 @@ static HRESULT Update2( CMyComPtr opCallback; updateCallback->QueryInterface(IID_IArchiveUpdateCallbackFile, (void **)&opCallback); + bool unknownComplexity = false; UInt64 complexity = 0; UInt64 numFilesToCompress = 0; UInt64 numBytesToCompress = 0; @@ -602,7 +628,10 @@ static HRESULT Update2( const CUpdateItem &ui = updateItems[i]; if (ui.NewData) { - complexity += ui.Size; + if (ui.Size == (UInt64)(Int64)-1) + unknownComplexity = true; + else + complexity += ui.Size; numBytesToCompress += ui.Size; numFilesToCompress++; /* @@ -625,19 +654,49 @@ static HRESULT Update2( if (comment) complexity += comment->Size(); complexity++; // end of central - updateCallback->SetTotal(complexity); + + if (!unknownComplexity) + updateCallback->SetTotal(complexity); UInt64 totalComplexity = complexity; - CAddCommon compressor(options); + CCompressionMethodMode options2 = options; + + if (options2._methods.IsEmpty()) + { + // we need method item, if default method was used + options2._methods.AddNew(); + } + + CAddCommon compressor(options2); complexity = 0; - CCompressionMethodMode options2 = options; + const Byte method = options.MethodSequence.Front(); + + COneMethodInfo *oneMethodMain = NULL; + if (!options2._methods.IsEmpty()) + oneMethodMain = &options2._methods[0]; + + { + FOR_VECTOR (mi, options2._methods) + { + options2.SetGlobalLevelTo(options2._methods[mi]); + } + } + + if (oneMethodMain) + { + // appnote recommends to use EOS marker for LZMA. + if (method == NFileHeader::NCompressionMethod::kLZMA) + oneMethodMain->AddProp_EndMarker_if_NotFound(true); + } + #ifndef _7ZIP_ST - UInt32 numThreads = options.NumThreads; + UInt32 numThreads = options._numThreads; + const UInt32 kNumMaxThreads = 64; if (numThreads > kNumMaxThreads) numThreads = kNumMaxThreads; @@ -646,7 +705,6 @@ static HRESULT Update2( if (numThreads < 1) numThreads = 1; - const size_t kMemPerThread = (1 << 25); const size_t kBlockSize = 1 << 16; @@ -655,44 +713,69 @@ static HRESULT Update2( if (numFilesToCompress <= 1) mtMode = false; - Byte method = options.MethodSequence.Front(); - if (!mtMode) { - if (options2.MethodInfo.FindProp(NCoderPropID::kNumThreads) < 0) + FOR_VECTOR (mi, options2._methods) { - // fixed for 9.31. bzip2 default is just one thread. - if (options2.NumThreadsWasChanged || method == NFileHeader::NCompressionMethod::kBZip2) - options2.MethodInfo.AddProp_NumThreads(numThreads); + COneMethodInfo &onem = options2._methods[mi]; + + if (onem.FindProp(NCoderPropID::kNumThreads) < 0) + { + // fixed for 9.31. bzip2 default is just one thread. + onem.AddProp_NumThreads(numThreads); + } } } else { - if (method == NFileHeader::NCompressionMethod::kStored && !options.PasswordIsDefined) + if (method == NFileHeader::NCompressionMethod::kStore && !options.PasswordIsDefined) numThreads = 1; + + if (oneMethodMain) + { + if (method == NFileHeader::NCompressionMethod::kBZip2) { bool fixedNumber; - UInt32 numBZip2Threads = options2.MethodInfo.Get_BZip2_NumThreads(fixedNumber); + UInt32 numBZip2Threads = oneMethodMain->Get_BZip2_NumThreads(fixedNumber); if (!fixedNumber) { - UInt64 averageSize = numBytesToCompress / numFilesToCompress; - UInt32 blockSize = options2.MethodInfo.Get_BZip2_BlockSize(); - UInt64 averageNumberOfBlocks = averageSize / blockSize + 1; + const UInt64 averageSize = numBytesToCompress / numFilesToCompress; + const UInt32 blockSize = oneMethodMain->Get_BZip2_BlockSize(); + const UInt64 averageNumberOfBlocks = averageSize / blockSize + 1; numBZip2Threads = 32; - if (averageNumberOfBlocks < numBZip2Threads) + if (numBZip2Threads > averageNumberOfBlocks) numBZip2Threads = (UInt32)averageNumberOfBlocks; - options2.MethodInfo.AddProp_NumThreads(numBZip2Threads); + oneMethodMain->AddProp_NumThreads(numBZip2Threads); } numThreads /= numBZip2Threads; } - if (method == NFileHeader::NCompressionMethod::kLZMA) + + if (method == NFileHeader::NCompressionMethod::kXz) { bool fixedNumber; + UInt32 numLzma2Threads = oneMethodMain->Get_Lzma2_NumThreads(fixedNumber); + if (!fixedNumber) + { + const UInt64 averageSize = numBytesToCompress / numFilesToCompress; + const UInt64 blockSize = oneMethodMain->Get_Lzma2_BlockSize(); + const UInt64 averageNumberOfBlocks = averageSize / blockSize + 1; + numLzma2Threads = 2; + if (numLzma2Threads > averageNumberOfBlocks) + numLzma2Threads = (UInt32)averageNumberOfBlocks; + oneMethodMain->AddProp_NumThreads(numLzma2Threads); + } + numThreads /= numLzma2Threads; + } + + if (method == NFileHeader::NCompressionMethod::kLZMA) + { // we suppose that default LZMA is 2 thread. So we don't change it - UInt32 numLZMAThreads = options2.MethodInfo.Get_Lzma_NumThreads(fixedNumber); + UInt32 numLZMAThreads = oneMethodMain->Get_Lzma_NumThreads(); numThreads /= numLZMAThreads; } + } + if (numThreads > numFilesToCompress) numThreads = (UInt32)numFilesToCompress; if (numThreads <= 1) @@ -747,6 +830,7 @@ static HRESULT Update2( threadInfo.ProgressSpec = new CMtCompressProgress(); threadInfo.Progress = threadInfo.ProgressSpec; threadInfo.ProgressSpec->Init(&mtCompressProgressMixer, (int)i); + threadInfo.SeqMode = false; // fix it ! threadInfo.FileTime = 0; // fix it ! RINOK(threadInfo.CreateThread()); } @@ -777,7 +861,9 @@ static HRESULT Update2( if (inArchive->ReadLocalItemAfterCdItemFull(itemEx) != S_OK) return E_NOTIMPL; (CItem &)item = itemEx; - if (item.IsDir()) + if (item.IsDir() != ui.IsDir) + return E_NOTIMPL; + if (ui.IsDir) continue; } @@ -849,7 +935,8 @@ static HRESULT Update2( if (ui.NewData) { - bool isDir = ((ui.NewProps) ? ui.IsDir : item.IsDir()); + // bool isDir = ((ui.NewProps) ? ui.IsDir : item.IsDir()); + bool isDir = ui.IsDir; if (isDir) { @@ -857,39 +944,51 @@ static HRESULT Update2( } else { - if (lastRealStreamItemIndex < (int)itemIndex) - { - lastRealStreamItemIndex = itemIndex; - SetFileHeader(archive, options, ui, item); - // file Size can be 64-bit !!! - archive.PrepareWriteCompressedData(item.Name.Len(), ui.Size, options.IsRealAesMode()); - } - CMemBlocks2 &memRef = refs.Refs[itemIndex]; if (memRef.Defined) { - CMyComPtr outStream; - archive.CreateStreamForCompressing(&outStream); - memRef.WriteToStream(memManager.GetBlockSize(), outStream); - SetFileHeader(archive, options, ui, item); + if (lastRealStreamItemIndex < (int)itemIndex) + lastRealStreamItemIndex = itemIndex; + + SetFileHeader(options, ui, item); // the BUG was fixed in 9.26: // SetItemInfoFromCompressingResult must be after SetFileHeader // to write correct Size. SetItemInfoFromCompressingResult(memRef.CompressingResult, options.IsRealAesMode(), options.AesKeyMode, item); - archive.WriteLocalHeader_And_SeekToNextFile(item); + archive.WriteLocalHeader(item); // RINOK(updateCallback->SetOperationResult(NArchive::NUpdate::NOperationResult::kOK)); + CMyComPtr outStream; + archive.CreateStreamForCopying(outStream); + memRef.WriteToStream(memManager.GetBlockSize(), outStream); + archive.MoveCurPos(item.PackSize); memRef.FreeOpt(&memManager); } else { + if (lastRealStreamItemIndex < (int)itemIndex) + { + lastRealStreamItemIndex = itemIndex; + SetFileHeader(options, ui, item); + + CCompressingResult compressingResult; + RINOK(compressor.Set_Pre_CompressionResult( + false, // seqMode + ui.Size, + compressingResult)); + SetItemInfoFromCompressingResult(compressingResult, options.IsRealAesMode(), options.AesKeyMode, item); + + // file Size can be 64-bit !!! + archive.WriteLocalHeader(item); + } + { CThreadInfo &thread = threads.Threads[threadIndices.Front()]; if (!thread.OutStreamSpec->WasUnlockEventSent()) { CMyComPtr outStream; - archive.CreateStreamForCompressing(&outStream); + archive.CreateStreamForCompressing(outStream); thread.OutStreamSpec->SetOutStream(outStream); thread.OutStreamSpec->SetRealStreamMode(); } @@ -918,10 +1017,10 @@ static HRESULT Update2( { RINOK(threadInfo.OutStreamSpec->WriteToRealStream()); threadInfo.OutStreamSpec->ReleaseOutStream(); - SetFileHeader(archive, options, ui, item); + SetFileHeader(options, ui, item); SetItemInfoFromCompressingResult(threadInfo.CompressingResult, options.IsRealAesMode(), options.AesKeyMode, item); - archive.WriteLocalHeader_And_SeekToNextFile(item); + archive.WriteLocalHeader_Replace(item); } else { diff --git a/CPP/7zip/Archive/Zip/ZipUpdate.h b/CPP/7zip/Archive/Zip/ZipUpdate.h index 15cbf69d..d5fda855 100644 --- a/CPP/7zip/Archive/Zip/ZipUpdate.h +++ b/CPP/7zip/Archive/Zip/ZipUpdate.h @@ -14,6 +14,7 @@ namespace NArchive { namespace NZip { +/* struct CUpdateRange { UInt64 Position; @@ -22,6 +23,7 @@ struct CUpdateRange // CUpdateRange() {}; CUpdateRange(UInt64 position, UInt64 size): Position(position), Size(size) {}; }; +*/ struct CUpdateItem { @@ -36,12 +38,23 @@ struct CUpdateItem UInt32 Time; UInt64 Size; AString Name; + CByteBuffer Comment; // bool Commented; // CUpdateRange CommentRange; FILETIME Ntfs_MTime; FILETIME Ntfs_ATime; FILETIME Ntfs_CTime; + void Clear() + { + IsDir = false; + NtfsTimeIsDefined = false; + IsUtf8 = false; + Size = 0; + Name.Empty(); + Comment.Free(); + } + CUpdateItem(): NtfsTimeIsDefined(false), IsUtf8(false), Size(0) {} }; -- cgit v1.2.3