// ArHandler.cpp #include "StdAfx.h" #include "../../../C/CpuArch.h" #include "../../Common/ComTry.h" #include "../../Common/IntToString.h" #include "../../Common/StringConvert.h" #include "../../Common/StringToInt.h" #include "../../Windows/PropVariant.h" #include "../../Windows/TimeUtils.h" #include "../Common/LimitedStreams.h" #include "../Common/ProgressUtils.h" #include "../Common/RegisterArc.h" #include "../Common/StreamObjects.h" #include "../Common/StreamUtils.h" #include "../Compress/CopyCoder.h" #include "Common/ItemNameUtils.h" using namespace NWindows; using namespace NTime; namespace NArchive { namespace NAr { /* The end of each file member (including last file in archive) is 2-bytes aligned. It uses 0xA padding if required. File Names: GNU/SVR4 variant (.a static library): / - archive symbol table // - the list of the long filenames, separated by one or more LF characters. /N - the reference to name string in long filenames list name/ - the name Microsoft variant (.lib static library): / - First linker file (archive symbol table) / - Second linker file // - the list of the long filenames, null-terminated. Each string begins immediately after the null byte in the previous string. /N - the reference to name string in long filenames list name/ - the name BSD (Mac OS X) variant: "__.SYMDEF" - archive symbol table or "__.SYMDEF SORTED" - archive symbol table #1/N - the real filename of length N is appended to the file header. */ static const unsigned kSignatureLen = 8; #define SIGNATURE { '!', '<', 'a', 'r', 'c', 'h', '>', 0x0A } static const Byte kSignature[kSignatureLen] = SIGNATURE; static const unsigned kNameSize = 16; static const unsigned kTimeSize = 12; static const unsigned kUserSize = 6; static const unsigned kModeSize = 8; static const unsigned kSizeSize = 10; static const unsigned kHeaderSize = kNameSize + kTimeSize + kUserSize * 2 + kModeSize + kSizeSize + 1 + 1; enum EType { kType_Ar, kType_ALib, kType_Deb, kType_Lib }; static const char * const k_TypeExtionsions[] = { "ar" , "a" , "deb" , "lib" }; enum ESubType { kSubType_None, kSubType_BSD }; /* struct CHeader { char Name[kNameSize]; char MTime[kTimeSize]; char User[kUserSize]; char Group[kUserSize]; char Mode[kModeSize]; char Size[kSizeSize]; char Quote; char NewLine; }; */ struct CItem { AString Name; UInt64 Size; UInt32 MTime; UInt32 User; UInt32 Group; UInt32 Mode; UInt64 HeaderPos; UInt64 HeaderSize; int TextFileIndex; int SameNameIndex; CItem(): TextFileIndex(-1), SameNameIndex(-1) {} UInt64 GetDataPos() const { return HeaderPos + HeaderSize; } }; class CInArchive { CMyComPtr m_Stream; public: UInt64 Position; ESubType SubType; HRESULT GetNextItem(CItem &itemInfo, bool &filled); HRESULT Open(IInStream *inStream); HRESULT SkipData(UInt64 dataSize) { return m_Stream->Seek(dataSize + (dataSize & 1), STREAM_SEEK_CUR, &Position); } }; HRESULT CInArchive::Open(IInStream *inStream) { SubType = kSubType_None; RINOK(inStream->Seek(0, STREAM_SEEK_CUR, &Position)); char signature[kSignatureLen]; RINOK(ReadStream_FALSE(inStream, signature, kSignatureLen)); Position += kSignatureLen; if (memcmp(signature, kSignature, kSignatureLen) != 0) return S_FALSE; m_Stream = inStream; return S_OK; } static unsigned RemoveTailSpaces(char *dest, const char *s, unsigned size) { memcpy(dest, s, size); for (; size != 0; size--) { if (dest[size - 1] != ' ') break; } dest[size] = 0; return size; } static bool OctalToNumber32(const char *s, unsigned size, UInt32 &res) { res = 0; char sz[32]; size = RemoveTailSpaces(sz, s, size); if (size == 0) return true; // some items doesn't contaion any numbers const char *end; UInt64 res64 = ConvertOctStringToUInt64(sz, &end); if ((unsigned)(end - sz) != size) return false; res = (UInt32)res64; return (res64 <= 0xFFFFFFFF); } static bool DecimalToNumber(const char *s, unsigned size, UInt64 &res) { res = 0; char sz[32]; size = RemoveTailSpaces(sz, s, size); if (size == 0) return true; // some items doesn't contaion any numbers const char *end; res = ConvertStringToUInt64(sz, &end); return ((unsigned)(end - sz) == size); } static bool DecimalToNumber32(const char *s, unsigned size, UInt32 &res) { UInt64 res64; if (!DecimalToNumber(s, size, res64)) return false; res = (UInt32)res64; return (res64 <= 0xFFFFFFFF); } #define RIF(x) { if (!(x)) return S_FALSE; } HRESULT CInArchive::GetNextItem(CItem &item, bool &filled) { filled = false; char header[kHeaderSize]; const char *cur = header; { size_t processedSize = sizeof(header); item.HeaderPos = Position; item.HeaderSize = kHeaderSize; RINOK(ReadStream(m_Stream, header, &processedSize)); if (processedSize != sizeof(header)) return S_OK; if (header[kHeaderSize - 2] != 0x60 || header[kHeaderSize - 1] != 0x0A) return S_OK; for (unsigned i = 0; i < kHeaderSize - 2; i++) // if (header[i] < 0x20) if (header[i] == 0) return S_OK; Position += processedSize; } UInt32 longNameLen = 0; if (cur[0] == '#' && cur[1] == '1' && cur[2] == '/' && cur[3] != 0) { // BSD variant RIF(DecimalToNumber32(cur + 3, kNameSize - 3 , longNameLen)); if (longNameLen >= (1 << 12)) longNameLen = 0; } else { char tempString[kNameSize + 1]; RemoveTailSpaces(tempString, cur, kNameSize); item.Name = tempString; } cur += kNameSize; RIF(DecimalToNumber32(cur, kTimeSize, item.MTime)); cur += kTimeSize; RIF(DecimalToNumber32(cur, kUserSize, item.User)); cur += kUserSize; RIF(DecimalToNumber32(cur, kUserSize, item.Group)); cur += kUserSize; RIF(OctalToNumber32(cur, kModeSize, item.Mode)); cur += kModeSize; RIF(DecimalToNumber(cur, kSizeSize, item.Size)); cur += kSizeSize; if (longNameLen != 0 && longNameLen <= item.Size) { SubType = kSubType_BSD; size_t processedSize = longNameLen; char *s = item.Name.GetBuf(longNameLen); HRESULT res = ReadStream(m_Stream, s, &processedSize); item.Name.ReleaseBuf_CalcLen(longNameLen); RINOK(res); if (processedSize != longNameLen) return S_OK; item.Size -= longNameLen; item.HeaderSize += longNameLen; Position += processedSize; } filled = true; return S_OK; } class CHandler: public IInArchive, public IInArchiveGetStream, public CMyUnknownImp { CObjectVector _items; CMyComPtr _stream; Int32 _mainSubfile; UInt64 _phySize; EType _type; ESubType _subType; int _longNames_FileIndex; AString _libFiles[2]; unsigned _numLibFiles; AString _errorMessage; bool _isArc; void UpdateErrorMessage(const char *s); HRESULT ParseLongNames(IInStream *stream); void ChangeDuplicateNames(); int FindItem(UInt32 offset) const; HRESULT AddFunc(UInt32 offset, const Byte *data, size_t size, size_t &pos); HRESULT ParseLibSymbols(IInStream *stream, unsigned fileIndex); public: MY_UNKNOWN_IMP2(IInArchive, IInArchiveGetStream) INTERFACE_IInArchive(;) STDMETHOD(GetStream)(UInt32 index, ISequentialInStream **stream); }; void CHandler::UpdateErrorMessage(const char *s) { if (!_errorMessage.IsEmpty()) _errorMessage += '\n'; _errorMessage += s; } static const Byte kArcProps[] = { kpidSubType }; static const Byte kProps[] = { kpidPath, kpidSize, kpidMTime, kpidPosixAttrib, kpidUser, kpidGroup }; IMP_IInArchive_Props IMP_IInArchive_ArcProps HRESULT CHandler::ParseLongNames(IInStream *stream) { unsigned i; for (i = 0; i < _items.Size(); i++) if (_items[i].Name == "//") break; if (i == _items.Size()) return S_OK; unsigned fileIndex = i; const CItem &item = _items[fileIndex]; if (item.Size > ((UInt32)1 << 30)) return S_FALSE; RINOK(stream->Seek(item.GetDataPos(), STREAM_SEEK_SET, NULL)); const size_t size = (size_t)item.Size; CByteArr p(size); RINOK(ReadStream_FALSE(stream, p, size)); for (i = 0; i < _items.Size(); i++) { CItem &item2 = _items[i]; if (item2.Name[0] != '/') continue; const char *ptr = item2.Name.Ptr(1); const char *end; UInt32 pos = ConvertStringToUInt32(ptr, &end); if (*end != 0 || end == ptr) continue; if (pos >= size) continue; UInt32 start = pos; for (;;) { if (pos >= size) return S_FALSE; char c = p[pos]; if (c == 0 || c == 0x0A) break; pos++; } item2.Name.SetFrom((const char *)(p + start), pos - start); } _longNames_FileIndex = fileIndex; return S_OK; } void CHandler::ChangeDuplicateNames() { unsigned i; for (i = 1; i < _items.Size(); i++) { CItem &item = _items[i]; if (item.Name[0] == '/') continue; CItem &prev = _items[i - 1]; if (item.Name == prev.Name) { if (prev.SameNameIndex < 0) prev.SameNameIndex = 0; item.SameNameIndex = prev.SameNameIndex + 1; } } for (i = 0; i < _items.Size(); i++) { CItem &item = _items[i]; if (item.SameNameIndex < 0) continue; char sz[32]; ConvertUInt32ToString(item.SameNameIndex + 1, sz); unsigned len = MyStringLen(sz); sz[len++] = '.'; sz[len] = 0; item.Name.Insert(0, sz); } } int CHandler::FindItem(UInt32 offset) const { unsigned left = 0, right = _items.Size(); while (left != right) { unsigned mid = (left + right) / 2; UInt64 midVal = _items[mid].HeaderPos; if (offset == midVal) return mid; if (offset < midVal) right = mid; else left = mid + 1; } return -1; } HRESULT CHandler::AddFunc(UInt32 offset, const Byte *data, size_t size, size_t &pos) { int fileIndex = FindItem(offset); if (fileIndex < (int)0) return S_FALSE; size_t i = pos; do { if (i >= size) return S_FALSE; } while (data[i++] != 0); AString &s = _libFiles[_numLibFiles]; const AString &name = _items[fileIndex].Name; s += name; if (!name.IsEmpty() && name.Back() == '/') s.DeleteBack(); s += " "; s += (const char *)(data + pos); s += (char)0xD; s += (char)0xA; pos = i; return S_OK; } static UInt32 Get32(const Byte *p, unsigned be) { if (be) return GetBe32(p); return GetUi32(p); } HRESULT CHandler::ParseLibSymbols(IInStream *stream, unsigned fileIndex) { CItem &item = _items[fileIndex]; if (item.Name != "/" && item.Name != "__.SYMDEF" && item.Name != "__.SYMDEF SORTED") return S_OK; if (item.Size > ((UInt32)1 << 30) || item.Size < 4) return S_OK; RINOK(stream->Seek(item.GetDataPos(), STREAM_SEEK_SET, NULL)); size_t size = (size_t)item.Size; CByteArr p(size); RINOK(ReadStream_FALSE(stream, p, size)); size_t pos = 0; if (item.Name != "/") { // __.SYMDEF parsing (BSD) unsigned be; for (be = 0; be < 2; be++) { UInt32 tableSize = Get32(p, be); pos = 4; if (size - pos < tableSize || (tableSize & 7) != 0) continue; size_t namesStart = pos + tableSize; UInt32 namesSize = Get32(p + namesStart, be); namesStart += 4; if (namesStart > size || namesStart + namesSize != size) continue; UInt32 numSymbols = tableSize >> 3; UInt32 i; for (i = 0; i < numSymbols; i++, pos += 8) { size_t namePos = Get32(p + pos, be); UInt32 offset = Get32(p + pos + 4, be); if (AddFunc(offset, p + namesStart, namesSize, namePos) != S_OK) break; } if (i == numSymbols) { pos = size; _type = kType_ALib; _subType = kSubType_BSD; break; } } if (be == 2) return S_FALSE; } else if (_numLibFiles == 0) { // archive symbol table (GNU) UInt32 numSymbols = GetBe32(p); pos = 4; if (numSymbols > (size - pos) / 4) return S_FALSE; pos += 4 * numSymbols; for (UInt32 i = 0; i < numSymbols; i++) { UInt32 offset = GetBe32(p + 4 + i * 4); RINOK(AddFunc(offset, p, size, pos)); } _type = kType_ALib; } else { // Second linker file (Microsoft .lib) UInt32 numMembers = GetUi32(p); pos = 4; if (numMembers > (size - pos) / 4) return S_FALSE; pos += 4 * numMembers; if (size - pos < 4) return S_FALSE; UInt32 numSymbols = GetUi32(p + pos); pos += 4; if (numSymbols > (size - pos) / 2) return S_FALSE; size_t indexStart = pos; pos += 2 * numSymbols; for (UInt32 i = 0; i < numSymbols; i++) { // index is 1-based. So 32-bit numSymbols field works as item[0] UInt32 index = GetUi16(p + indexStart + i * 2); if (index == 0 || index > numMembers) return S_FALSE; UInt32 offset = GetUi32(p + index * 4); RINOK(AddFunc(offset, p, size, pos)); } _type = kType_Lib; } // size can be 2-byte aligned in linux files if (pos != size && pos + (pos & 1) != size) return S_FALSE; item.TextFileIndex = _numLibFiles++; return S_OK; } STDMETHODIMP CHandler::Open(IInStream *stream, const UInt64 * /* maxCheckStartPosition */, IArchiveOpenCallback *callback) { COM_TRY_BEGIN { Close(); UInt64 fileSize = 0; RINOK(stream->Seek(0, STREAM_SEEK_END, &fileSize)); RINOK(stream->Seek(0, STREAM_SEEK_SET, NULL)); CInArchive arc; RINOK(arc.Open(stream)); if (callback) { RINOK(callback->SetTotal(NULL, &fileSize)); UInt64 numFiles = _items.Size(); RINOK(callback->SetCompleted(&numFiles, &arc.Position)); } CItem item; for (;;) { bool filled; RINOK(arc.GetNextItem(item, filled)); if (!filled) break; _items.Add(item); arc.SkipData(item.Size); if (callback && (_items.Size() & 0xFF) == 0) { UInt64 numFiles = _items.Size(); RINOK(callback->SetCompleted(&numFiles, &arc.Position)); } } if (_items.IsEmpty()) { // we don't need false empty archives (8-bytes signature only) if (arc.Position != fileSize) return S_FALSE; } _isArc = true; _subType = arc.SubType; if (ParseLongNames(stream) != S_OK) UpdateErrorMessage("Long file names parsing error"); if (_longNames_FileIndex >= 0) _items.Delete(_longNames_FileIndex); if (!_items.IsEmpty() && _items[0].Name == "debian-binary") { _type = kType_Deb; _items.DeleteFrontal(1); for (unsigned i = 0; i < _items.Size(); i++) if (_items[i].Name.IsPrefixedBy("data.tar.")) if (_mainSubfile < 0) _mainSubfile = i; else { _mainSubfile = -1; break; } } else { ChangeDuplicateNames(); bool error = false; for (unsigned li = 0; li < 2 && li < _items.Size(); li++) if (ParseLibSymbols(stream, li) != S_OK) error = true; if (error) UpdateErrorMessage("Library symbols information error"); } _stream = stream; _phySize = arc.Position; /* if (fileSize < _phySize) UpdateErrorMessage("Unexpected end of archive"); */ } return S_OK; COM_TRY_END } STDMETHODIMP CHandler::Close() { _isArc = false; _phySize = 0; _errorMessage.Empty(); _stream.Release(); _items.Clear(); _type = kType_Ar; _subType = kSubType_None; _mainSubfile = -1; _longNames_FileIndex = -1; _numLibFiles = 0; _libFiles[0].Empty(); _libFiles[1].Empty(); return S_OK; } STDMETHODIMP CHandler::GetNumberOfItems(UInt32 *numItems) { *numItems = _items.Size(); return S_OK; } STDMETHODIMP CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value) { COM_TRY_BEGIN NCOM::CPropVariant prop; switch (propID) { case kpidPhySize: prop = _phySize; break; case kpidMainSubfile: if (_mainSubfile >= 0) prop = (UInt32)_mainSubfile; break; case kpidExtension: prop = k_TypeExtionsions[(unsigned)_type]; break; case kpidShortComment: case kpidSubType: { AString s = k_TypeExtionsions[(unsigned)_type]; if (_subType == kSubType_BSD) s += ":BSD"; prop = s; break; } case kpidErrorFlags: { UInt32 v = 0; if (!_isArc) v |= kpv_ErrorFlags_IsNotArc; prop = v; break; } case kpidWarning: if (!_errorMessage.IsEmpty()) prop = _errorMessage; break; case kpidIsNotArcType: if (_type != kType_Deb) prop = true; break; } prop.Detach(value); return S_OK; COM_TRY_END } STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *value) { COM_TRY_BEGIN NWindows::NCOM::CPropVariant prop; const CItem &item = _items[index]; switch (propID) { case kpidPath: if (item.TextFileIndex >= 0) prop = (item.TextFileIndex == 0) ? "1.txt" : "2.txt"; else prop = (const wchar_t *)NItemName::GetOSName2(MultiByteToUnicodeString(item.Name, CP_OEMCP)); break; case kpidSize: case kpidPackSize: if (item.TextFileIndex >= 0) prop = (UInt64)_libFiles[(unsigned)item.TextFileIndex].Len(); else prop = item.Size; break; case kpidMTime: { if (item.MTime != 0) { FILETIME fileTime; NTime::UnixTimeToFileTime(item.MTime, fileTime); prop = fileTime; } break; } case kpidUser: if (item.User != 0) prop = item.User; break; case kpidGroup: if (item.Group != 0) prop = item.Group; break; case kpidPosixAttrib: if (item.TextFileIndex < 0) prop = item.Mode; break; } prop.Detach(value); return S_OK; COM_TRY_END } STDMETHODIMP CHandler::Extract(const UInt32 *indices, UInt32 numItems, Int32 testMode, IArchiveExtractCallback *extractCallback) { COM_TRY_BEGIN bool allFilesMode = (numItems == (UInt32)(Int32)-1); if (allFilesMode) numItems = _items.Size(); if (numItems == 0) return S_OK; UInt64 totalSize = 0; UInt32 i; for (i = 0; i < numItems; i++) { const CItem &item = _items[allFilesMode ? i : indices[i]]; totalSize += (item.TextFileIndex >= 0) ? (UInt64)_libFiles[(unsigned)item.TextFileIndex].Len() : item.Size; } extractCallback->SetTotal(totalSize); UInt64 currentTotalSize = 0; NCompress::CCopyCoder *copyCoderSpec = new NCompress::CCopyCoder(); CMyComPtr copyCoder = copyCoderSpec; CLocalProgress *lps = new CLocalProgress; CMyComPtr progress = lps; lps->Init(extractCallback, false); CLimitedSequentialInStream *streamSpec = new CLimitedSequentialInStream; CMyComPtr inStream(streamSpec); streamSpec->SetStream(_stream); for (i = 0; i < numItems; i++) { lps->InSize = lps->OutSize = currentTotalSize; RINOK(lps->SetCur()); CMyComPtr realOutStream; Int32 askMode = testMode ? NExtract::NAskMode::kTest : NExtract::NAskMode::kExtract; Int32 index = allFilesMode ? i : indices[i]; const CItem &item = _items[index]; RINOK(extractCallback->GetStream(index, &realOutStream, askMode)); currentTotalSize += (item.TextFileIndex >= 0) ? (UInt64)_libFiles[(unsigned)item.TextFileIndex].Len() : item.Size; if (!testMode && !realOutStream) continue; RINOK(extractCallback->PrepareOperation(askMode)); if (testMode) { RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kOK)); continue; } bool isOk = true; if (item.TextFileIndex >= 0) { const AString &f = _libFiles[(unsigned)item.TextFileIndex]; if (realOutStream) RINOK(WriteStream(realOutStream, f, f.Len())); } else { RINOK(_stream->Seek(item.GetDataPos(), STREAM_SEEK_SET, NULL)); streamSpec->Init(item.Size); RINOK(copyCoder->Code(inStream, realOutStream, NULL, NULL, progress)); isOk = (copyCoderSpec->TotalSize == item.Size); } realOutStream.Release(); RINOK(extractCallback->SetOperationResult(isOk ? NExtract::NOperationResult::kOK: NExtract::NOperationResult::kDataError)); } return S_OK; COM_TRY_END } STDMETHODIMP CHandler::GetStream(UInt32 index, ISequentialInStream **stream) { COM_TRY_BEGIN const CItem &item = _items[index]; if (item.TextFileIndex >= 0) { const AString &f = _libFiles[(unsigned)item.TextFileIndex]; Create_BufInStream_WithNewBuffer((const void *)(const char *)f, f.Len(), stream); return S_OK; } else return CreateLimitedInStream(_stream, item.GetDataPos(), item.Size, stream); COM_TRY_END } REGISTER_ARC_I( "Ar", "ar a deb lib", 0, 0xEC, kSignature, 0, 0, NULL) }}